两种查询方式:
代码:
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Dataframe, Row, SparkSession}
object Demo_SparkSession {
//花式查询
def main(args: Array[String]): Unit = {
//准备环境
var session:SparkSession=SparkSession.builder().master("local[*]").appName("xiaobai").getOrCreate()
var sc=session.sparkContext
//加载不同数据源数据,生成分布式表
var data:RDD[String]=sc.textFile("data/person.txt")
var personRDD:RDD[Row]=data.map(
line=>{
var arr=line.split(" ")
Row(arr(0).toInt,arr(1),arr(2).toInt)
}
)
var schema : StructType= StructType(List
( StructField("id",IntegerType,false),
StructField("name",StringType,false),
StructField("age",IntegerType,false)
))
var personDF:Dataframe=session.createDataframe(personRDD,schema)
//创建临时视图,sql查询的必要条件
personDF.createOrReplaceTempView("t_person")
//sql查询
session.sql("select name from t_person").show()
//DSL查询:面向对象的sql,无需临时视图
personDF.select("name").where("id=1").show()
session.stop()
}
}
另附person.txt内容:
1 xiaobai 18 2 xiaowang 19 3 zhangzi 33 4 heqi 55 5 zhouzhanyang 99



