from pyspark.sql.functions import count
df = spark.read.format("csv").option("header",True).\
option("inferSchema",True).\
load("E:\\vow\\CancerData10.csv")
#df.printSchema()
#df.show(5)
df1 = df.select("ID","Age","State","Sex")
df2 = df1.groupBy("Sex").agg(count('Sex').alias("Count"))
df2.show()
+---+-----+
|Sex|Count|
+---+-----+
| F| 4|
| M| 5|
+---+-----+
No comments:
Post a Comment