这里自定义了一个转换大写的UDF函数,用scala建表时会出现toDF不能导入的问题,后面通过查看源码可以找知道toDF的使用方法
正确代码
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
object ScalaTest {
def main(args: Array[String]): Unit = {
val sparkConfig = new SparkConf().setMaster("local")
val spark = SparkSession.builder().appName("test").config(sparkConfig).enableHiveSupport().getOrCreate()
// 需使用隐式转化
import spark.implicits._
spark.udf.register("to_uppercase", (s: String) => s.toUpperCase())
Seq((1, "zhangsan"), (2, "lisi"), (3, "xiaoming")).toDF("id", "name").createOrReplaceTempView("table_tmp")
spark.sql("select id, to_uppercase(name) from table_tmp").show()
spark.sql("select id, to_uppercase(name) new_name from table_tmp").show()
}
}
输出结果:
+---+----------------------+ | id|UDF:to_uppercase(name)| +---+----------------------+ | 1| ZHANGSAN| | 2| LISI| | 3| XIAOMING| +---+----------------------+ +---+--------+ | id|new_name| +---+--------+ | 1|ZHANGSAN| | 2| LISI| | 3|XIAOMING| +---+--------+
toDF()源码介绍
@InterfaceStability.Stable
case class DatasetHolder[T] private[sql](private val ds: Dataset[T]) {
// This is declared with parentheses to prevent the Scala compiler from treating
// `rdd.toDS("1")` as invoking this toDS and then apply on the returned Dataset.
def toDS(): Dataset[T] = ds
// This is declared with parentheses to prevent the Scala compiler from treating
// `rdd.toDF("1")` as invoking this toDF and then apply on the returned Dataframe.
def toDF(): Dataframe = ds.toDF()
def toDF(colNames: String*): Dataframe = ds.toDF(colNames : _*)
}
再贴一个关于scala隐式讲解的链接Scala隐式详解



