参考:
how to filter out a null value from spark dataframe
scala> df.show() +----+---+--------+ | age| id| name| +----+---+--------+ |null| 1| Michael| | 30| 1| Tom| | 19| 2| Justin| +----+---+--------+
每列不为null
val filterCond: Column = df.columns.map(x => col(x).isNotNull).reduce(_ && _)
val filteredDf = df.filter(filterCond)
filteredDf.show()
scala> filterCond res16: org.apache.spark.sql.Column = (((age IS NOT NULL) AND (id IS NOT NULL)) AND (name IS NOT NULL)) scala> filteredDf.show() +---+---+--------+ |age| id| name| +---+---+--------+ | 30| 1|Tom| | 19| 2| Justin| +---+---+--------+
age id 值大于1,name 不为null
val filterCond2=df.columns.map {
case x@"name" => col(x).isNotNull
case x => col(x) > 1
}.reduce(_ && _)
val filteredDf2 = df.filter(filterCond2)
scala> val filterCond2=df.columns.map {
| case x@"name" => col(x).isNotNull
| case x => col(x) > 1
| }.reduce(_ && _)
filterCond2: org.apache.spark.sql.Column = (((age > 1) AND (id > 1)) AND (name IS NOT NULL))
scala> df.filter(filterCond2).show()
+---+---+------+
|age| id| name|
+---+---+------+
| 19| 2|Justin|
+---+---+------+



