Monday, 25 May 2020

Date, DateTime Objects in Pyspark

#Date, Row Objects
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName("demoApp").getOrCreate()

from pyspark.sql import Row
import datetime as DT

#Pyspark Date object 
r1 = Row(id=101,city="Hyderabad",doj=DT.date(2014,10,23))
r2 = Row(id=102,city="Bangalore",doj=DT.date(2018,3,20))

df = spark.createDataFrame([r1,r2])

df.printSchema()
df.show()

root
 |-- city: string (nullable = true)
 |-- doj: date (nullable = true)
 |-- id: long (nullable = true)

+---------+----------+---+
|     city|       doj| id|
+---------+----------+---+
|Hyderabad|2014-10-23|101|
|Bangalore|2018-03-20|102|
+---------+----------+---+

#Pyspark DateTime object

from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local").appName("demoApp").getOrCreate()

from pyspark.sql import Row
import datetime as DT

r1 = Row(id=101,city="Hyderabad",doj=DT.datetime(2014,10,23,23,34,45))
r2 = Row(id=102,city="Bangalore",doj=DT.datetime(2018,3,20,12,45,58))

df = spark.createDataFrame([r1,r2])
df.printSchema()
df.show()

root
 |-- city: string (nullable = true)
 |-- doj: timestamp (nullable = true)
 |-- id: long (nullable = true)

+---------+-------------------+---+
|     city|                doj| id|
+---------+-------------------+---+
|Hyderabad|2014-10-23 23:34:45|101|
|Bangalore|2018-03-20 12:45:58|102|
+---------+-------------------+---+

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...