Monday, 25 May 2020

WithColumn, Casting - Converting data type Example in PySpark

#WithColumn and Converting data type example

 
from pyspark.sql import Row
from pyspark.sql import functions 
 

r1 = Row(id=100,name='Sara',city='Nellai',pin=627001, doj='2014-12-23 23:34:45')
r2 = Row(id=102,name=None,city='Kovai',pin=None,doj=None)
r3 = Row(id=None,name='Raji',city=None,pin=None,doj='2010-01-01 12:34:22')
df = spark.createDataFrame([r1,r2,r3])
#df.select("id","name",'doj',"city","pin").show()
df.printSchema()

#df["field"] example
df1 = df.withColumn("doj",df["doj"].cast("timestamp")) #withColumn and Changing data type
df1.printSchema()


root
 |-- city: string (nullable = true)
 |-- doj: string (nullable = true)  #String
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- pin: long (nullable = true)

root
 |-- city: string (nullable = true)
 |-- doj: timestamp (nullable = true) #timestamp
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- pin: long (nullable = true)
 
 
 
 #with None
from pyspark.sql import Row
from pyspark.sql import functions 
from pyspark.sql.functions import col

r1 = Row(id=100,name='Sara',city='Nellai',pin=627001, doj='2014-12-23 23:34:45')
r2 = Row(id=102,name=None,city='Kovai',pin=None,doj=None)
r3 = Row(id=None,name='Raji',city=None,pin=None,doj='2010-01-01 12:34:22')
df = spark.createDataFrame([r1,r2,r3])
#df.select("id","name",'doj',"city","pin").show()
df.printSchema()

#using col 
df1 = df.withColumn("doj",col("doj").cast("timestamp"))
df1.printSchema()


root
 |-- city: string (nullable = true)
 |-- doj: string (nullable = true)
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- pin: long (nullable = true)

root
 |-- city: string (nullable = true)
 |-- doj: timestamp (nullable = true)
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- pin: long (nullable = true)

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...