# coding=utf-8
# !/usr/bin/python3
# @Time : 2021.9.11
# @Author : Coly
# @version: V1.0
# @Des : learning
import findspark
findspark.init()
import pyspark
import os
os.environ['JAVA_HOME'] = '/usr/lib/jdk8/jdk1.8.0_301'
conf = pyspark.SparkConf().setAppName("wordcount").setMaster("local[*]")
sc = pyspark.SparkContext(conf=conf)
# inputdata = sc.textFile("hdfs://linux1:9000/home/linux1/Desktop/spark/words.txt") # hdfs read file
inputdata = sc.textFile("file:///home/linux1/Desktop/spark/data") #local reading file
output = inputdata.flatMap(lambda x: x.split(",")).map(lambda x: (x, 1))
outputarray = output.reduceByKey(lambda a, b: a + b)
result = outputarray.collect()
for i in result:
print(i)
sc.stop()
print(output.getNumPartitions())