// Create the Spark program using IntelliJ idea in Windows
package my
import org.apache.spark.sql.SparkSession
object my {
def main(args:Array[String]) : Unit = {
val spark = SparkSession.builder()
.master("local[2]")
.appName("Ex")
.getOrCreate()
val df = spark.read.format("json").load("hdfs://quickstart.cloudera:8020/user/cloudera/customers.json")
df.printSchema()
df.show()
}
}
build.sbt:
----------
name := "myspark"
version := "0.1"
scalaVersion := "2.11.12"
// https://mvnrepository.com/artifact/org.apache.spark/spark-core
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.6"
// https://mvnrepository.com/artifact/org.apache.spark/spark-sql
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.6"
// to make the jar file
sbt> package --> to generate a jar file
.jar file will be created in : IdeaProjects\myspark\target\scala-2.11\myspark_2.11-0.1.jar
copy the myspark_2.11-0.1.jar file from Windows to Cloudera VM using WinSCP.
// run the spark-submit in Cloudera VM
spark-submit --master local --driver-memory 2g --executor-memory 2g --class my.my myspark_2.11-0.1.jar
root
|-- emailAddress: string (nullable = true)
|-- firstName: string (nullable = true)
|-- lastName: string (nullable = true)
|-- phoneNumber: string (nullable = true)
|-- userId: long (nullable = true)
+--------------------+---------+--------+-----------+------+
| emailAddress|firstName|lastName|phoneNumber|userId|
+--------------------+---------+--------+-----------+------+
|krish.lee@learnin...| Krish| Lee| 123456| 1|
|racks.jacson@lear...| racks| jacson| 123456| 2|
|denial.roast@lear...| denial| roast| 33333333| 3|
|devid.neo@learnin...| devid| neo| 222222222| 4|
|jone.mac@learning...| jone| mac| 111111111| 5|
+--------------------+---------+--------+-----------+------+
No comments:
Post a Comment