#WithColumn and Converting data type example
from pyspark.sql import Row
from pyspark.sql import functions
r1 = Row(id=100,name='Sara',city='Nellai',pin=627001, doj='2014-12-23 23:34:45')
r2 = Row(id=102,name=None,city='Kovai',pin=None,doj=None)
r3 = Row(id=None,name='Raji',city=None,pin=None,doj='2010-01-01 12:34:22')
df = spark.createDataFrame([r1,r2,r3])
#df.select("id","name",'doj',"city","pin").show()
df.printSchema()
#df["field"] example
df1 = df.withColumn("doj",df["doj"].cast("timestamp")) #withColumn and Changing data type
df1.printSchema()
root
|-- city: string (nullable = true)
|-- doj: string (nullable = true) #String
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)
root
|-- city: string (nullable = true)
|-- doj: timestamp (nullable = true) #timestamp
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)
#with None
from pyspark.sql import Row
from pyspark.sql import functions
from pyspark.sql.functions import col
r1 = Row(id=100,name='Sara',city='Nellai',pin=627001, doj='2014-12-23 23:34:45')
r2 = Row(id=102,name=None,city='Kovai',pin=None,doj=None)
r3 = Row(id=None,name='Raji',city=None,pin=None,doj='2010-01-01 12:34:22')
df = spark.createDataFrame([r1,r2,r3])
#df.select("id","name",'doj',"city","pin").show()
df.printSchema()
#using col
df1 = df.withColumn("doj",col("doj").cast("timestamp"))
df1.printSchema()
root
|-- city: string (nullable = true)
|-- doj: string (nullable = true)
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)
root
|-- city: string (nullable = true)
|-- doj: timestamp (nullable = true)
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)
No comments:
Post a Comment