希望这可以帮助!
from pyspark.sql.functions import monotonically_increasing_id, row_numberfrom pyspark.sql import Window#sample dataa= sqlContext.createDataframe([("Dog", "Cat"), ("Cat", "Dog"), ("Mouse", "Cat")], ["Animal", "Enemy"])a.show()#convert list to a dataframerating = [5,4,1]b = sqlContext.createDataframe([(l,) for l in rating], ['Rating'])#add 'sequential' index and join both dataframe to get the final resulta = a.withColumn("row_idx", row_number().over(Window.orderBy(monotonically_increasing_id())))b = b.withColumn("row_idx", row_number().over(Window.orderBy(monotonically_increasing_id())))final_df = a.join(b, a.row_idx == b.row_idx). drop("row_idx")final_df.show()输入:
+------+-----+|Animal|Enemy|+------+-----+| Dog| Cat|| Cat| Dog|| Mouse| Cat|+------+-----+
输出为:
+------+-----+------+|Animal|Enemy|Rating|+------+-----+------+| Cat| Dog| 4|| Dog| Cat| 5|| Mouse| Cat| 1|+------+-----+------+



