#Word count program using Pyspark
r1 = spark.sparkContext.textFile("E:\\vow\\email.txt")
r2 = r1.map(lambda x:x.encode('utf-8'))
r2 = r1.flatMap(lambda x:x.split(" ")).map(lambda x:(x,1)).reduceByKey(lambda x,y:x+y)
for j in r2.collect():
print(j)
r2.coalesce(1).saveAsTextFile("E:\\vow\\emailwordcount.txt")
('acebook', 1)
('Session', 1)
('Developers', 2)
('', 10)
('The', 1)
('world', 1)
('is', 3)
.....
No comments:
Post a Comment