Wednesday, 26 December 2018

Case Class example in Spark

//create a text file with sample data
hadoop@hadoop:~/Desktop/vow$ cat > person.txt
Barack,Obama,53
George,Bush,68
Bill,Clinton,68

//copy the file into hdfs
hadoop@hadoop:~/Desktop/vow$ hdfs dfs -copyFromLocal person.txt /user/person.txt

//view the content
hadoop@hadoop:~/Desktop/vow$ hdfs dfs -cat /user/person.txt
Barack,Obama,53
George,Bush,68
Bill,Clinton,68

$ spark-shell

// case class definition
scala> case class Person(firstName:String, lastName:String, age:Int)
defined class Person

// make RDD from file
scala> val personFile = sc.textFile("hdfs://localhost:9000/user/person.txt")
personFile: org.apache.spark.rdd.RDD[String] = hdfs://localhost:9000/user/person.txt MapPartitionsRDD[1] at textFile at <console>:24

// view the line count
scala> personFile.count
res0: Long = 3 

// Mapping each row of an RDD
scala> val p = personFile.map(p => {
     | val fields = p.split(",")
     | val FirstName = fields(0)
     | val LastName = fields(1)
     | val Age = fields(2).toInt
     | Person(FirstName,LastName,Age)
     | })
p: org.apache.spark.rdd.RDD[Person] = MapPartitionsRDD[2] at map at <console>:27

// display all elements
scala> p.collect.foreach(println)
Person(Barack,Obama,53)
Person(George,Bush,68)
Person(Bill,Clinton,68)

//Making Dataframe
scala> val df = p.toDF()
df: org.apache.spark.sql.DataFrame = [firstName: string, lastName: string ... 1 more field]

//Display the schema
scala> df.printSchema
root
 |-- firstName: string (nullable = true)
 |-- lastName: string (nullable = true)
 |-- age: integer (nullable = false)


scala> df.show
+---------+--------+---+
|firstName|lastName|age|
+---------+--------+---+
|   Barack|   Obama| 53|
|   George|    Bush| 68|
|     Bill| Clinton| 68|
+---------+--------+---+

scala> df.createOrReplaceTempView("tblPerson")

+---------+--------+---+
|firstName|lastName|age|
+---------+--------+---+
|   Barack|   Obama| 53|
|   George|    Bush| 68|
|     Bill| Clinton| 68|
+---------+--------+---+

scala> df.select("firstName","age").show
+---------+---+
|firstName|age|
+---------+---+
|   Barack| 53|
|   George| 68|
|     Bill| 68|
+---------+---+

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...