conf pyspark.SparkConf().setAppName( wordcount ).setMaster( local[*] )
sc pyspark.SparkContext(conf conf)
# inputdata sc.textFile( hdfs://linux1:9000/home/linux1/Desktop/spark/words.txt ) # hdfs read file
inputdata sc.textFile( file:///home/linux1/Desktop/spark/data ) #local reading file
output inputdata.flatMap(lambda x: x.split( , )).map(lambda x: (x, 1))
outputarray output.reduceByKey(lambda a, b: a b)
result outputarray.collect()
for i in result:
print(i)
sc.stop()
print(output.getNumPartitions())