原始数据如下
+----+-----+ |code|value| +----+-----+ | a| 1| | b| 1| | c| 1| | d| 1| | e| 1| | f| 1| +----+-----+
pivot函数效果
+----+----+----+----+----+----+----+----+ |code| a| b| c| d| e| f|temp| +----+----+----+----+----+----+----+----+ | f|null|null|null|null|null| 1| 1| | e|null|null|null|null| 1|null| 1| | d|null|null|null| 1|null|null| 1| | c|null|null| 1|null|null|null| 1| | b|null| 1|null|null|null|null| 1| | a| 1|null|null|null|null|null| 1| +----+----+----+----+----+----+----+----+ +---+---+---+---+---+---+ | a| b| c| d| e| f| +---+---+---+---+---+---+ | 1| 1| 1| 1| 1| 1| +---+---+---+---+---+---+
实现代码
package myspark.sql
import myspark.warehouse.common.base.getLocalSparkSession
import org.apache.spark.sql.functions.{col, lit}
object rowTurnColumn {
def main(args: Array[String]): Unit = {
val spark = getLocalSparkSession()
import spark.implicits._
val tuples = Seq(
("a", 1),
("b", 1),
("c", 1),
("d", 1),
("e", 1),
("f", 1)
)
val frame = tuples.toDF("code", "value")
frame.show()
val frame1 = frame.groupBy("code")
.pivot("code")
.sum("value")
.withColumn("temp", lit("1"))
frame1.show()
frame1.createOrReplaceTempView("frame1")
spark.sql(
s"""
|select
|${tuples.map(x=>{s"""sum(t1.${x._1}) as ${x._1}"""}).mkString(",")}
|from frame1 t1
|group by
|t1.temp
|""".stripMargin).show()
}
}
行转列 :stack函数应用
+---+---+---+---+---+---+ | a| b| c| d| e| f| +---+---+---+---+---+---+ | 1| 1| 1| 1| 1| 1| +---+---+---+---+---+---+ +----+-----+ |code|value| +----+-----+ | a| 1| | b| 1| | c| 1| | d| 1| | e| 1| | f| 1| +----+-----+
code如下
// SELECt stack(n,expr1, ..., exprk) AS (alias1, alias2, ...) FROM myTable; 语法格式
// 说明:stack(n, expr1, ..., exprk) - 会将expr1, ..., exprk 分割为n行.
spark.sql(
s"""
|select
|stack(${tuples.length},${tuples.map(x=>{s"'${x._1}',`${x._1}`"}).mkString(",")}) as (`code`,`value`)
|from frame2 t1
|""".stripMargin).show()
列转行:lateral view + explode()
参考资料:
https://www.cnblogs.com/hhelibeb/p/10310369.html



