Input Data:
emp.csv
---------
empno,ename,designation,manager,hire_date,sal,deptno
7369,SMITH,CLERK,7902,12/17/1980,800,20
7499,ALLEN,SALESMAN,7698,2/20/1981,1600,30
7521,WARD,SALESMAN,7698,2/22/1981,1250,30
7566,TURNER,MANAGER,7839,4/2/1981,2975,20
7654,MARTIN,SALESMAN,7698,9/28/1981,1250,30
7698,MILLER,MANAGER,7839,5/1/1981,2850,30
7782,CLARK,MANAGER,7839,6/9/1981,2450,10
7788,SCOTT,ANALYST,7566,12/9/1982,3000,20
7839,KING,PRESIDENT,NULL,11/17/1981,5000,10
7844,TURNER,SALESMAN,7698,9/8/1981,1500,30
7876,ADAMS,CLERK,7788,1/12/1983,1100,20
7900,JAMES,CLERK,7698,12/3/1981,950,30
7902,FORD,ANALYST,7566,12/3/1981,3000,20
7934,MILLER,CLERK,7782,1/23/1982,1300,10
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf,SparkContext}
object LoadCSV2RDDtoDF{
case class Employee(empno:String, ename:String, designation:String, manager:String, hire_date:String, sal:String, deptno:String)
def main(args:Array[String]):Unit ={
val conf = new SparkConf().setAppName("CSV Loader").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
/*
val txtRDD = sc.textFile("/home/hadoop/IdeaProjects/SparkJob/src/main/resources/emp_data.csv")
// println(txtRDD.foreach(println))
val empRDD = txtRDD.map {
line =>
val fields = line.split(",")
val empno = fields(0)
val designation = fields(1)
val manager = fields(2)
val hire_date = fields(3)
val sal = fields(4)
val deptno = fields(5)
(empno, designation, manager, hire_date, sal, deptno)
}
val empDF = empRDD.toDF("#No","Designation","Manager","Joining Date","Salary","Department#")
empDF.show()
*/
val eDF = sqlContext.read.format("csv").option("header","true").option("inferSchema","true").load("/home/hadoop/IdeaProjects/SparkJob/src/main/resources/emp_data.csv")
eDF.show()
}
}
emp.csv
---------
empno,ename,designation,manager,hire_date,sal,deptno
7369,SMITH,CLERK,7902,12/17/1980,800,20
7499,ALLEN,SALESMAN,7698,2/20/1981,1600,30
7521,WARD,SALESMAN,7698,2/22/1981,1250,30
7566,TURNER,MANAGER,7839,4/2/1981,2975,20
7654,MARTIN,SALESMAN,7698,9/28/1981,1250,30
7698,MILLER,MANAGER,7839,5/1/1981,2850,30
7782,CLARK,MANAGER,7839,6/9/1981,2450,10
7788,SCOTT,ANALYST,7566,12/9/1982,3000,20
7839,KING,PRESIDENT,NULL,11/17/1981,5000,10
7844,TURNER,SALESMAN,7698,9/8/1981,1500,30
7876,ADAMS,CLERK,7788,1/12/1983,1100,20
7900,JAMES,CLERK,7698,12/3/1981,950,30
7902,FORD,ANALYST,7566,12/3/1981,3000,20
7934,MILLER,CLERK,7782,1/23/1982,1300,10
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf,SparkContext}
object LoadCSV2RDDtoDF{
case class Employee(empno:String, ename:String, designation:String, manager:String, hire_date:String, sal:String, deptno:String)
def main(args:Array[String]):Unit ={
val conf = new SparkConf().setAppName("CSV Loader").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
/*
val txtRDD = sc.textFile("/home/hadoop/IdeaProjects/SparkJob/src/main/resources/emp_data.csv")
// println(txtRDD.foreach(println))
val empRDD = txtRDD.map {
line =>
val fields = line.split(",")
val empno = fields(0)
val designation = fields(1)
val manager = fields(2)
val hire_date = fields(3)
val sal = fields(4)
val deptno = fields(5)
(empno, designation, manager, hire_date, sal, deptno)
}
val empDF = empRDD.toDF("#No","Designation","Manager","Joining Date","Salary","Department#")
empDF.show()
*/
val eDF = sqlContext.read.format("csv").option("header","true").option("inferSchema","true").load("/home/hadoop/IdeaProjects/SparkJob/src/main/resources/emp_data.csv")
eDF.show()
}
}
No comments:
Post a Comment