from pyspark.sql import Row
r1 = Row(id=100,name='Sara',salary=50000,city='Bangalore')
r2 = Row(id=101,salary=63000,name='Siva',city='Chennai')
r3 = Row(name='Malar',city='Mumbai',id=103,salary=63222)
df = spark.createDataFrame([r1,r2,r3])
df.show()
+---------+---+-----+------+
| city| id| name|salary|
+---------+---+-----+------+
|Bangalore|100| Sara| 50000|
| Chennai|101| Siva| 63000|
| Mumbai|103|Malar| 63222|
+---------+---+-----+------+
df.select("id","name","salary","city").show()
+---+-----+------+---------+
| id| name|salary| city|
+---+-----+------+---------+
|100| Sara| 50000|Bangalore|
|101| Siva| 63000| Chennai|
|103|Malar| 63222| Mumbai|
+---+-----+------+---------+
#with None
from pyspark.sql import Row
r1 = Row(id=100,name='Sara',city='Nellai',pin=627001)
r2 = Row(id=102,name=None,city='Kovai',pin=None)
r3 = Row(id=None,name='Raji',city=None,pin=None)
df = spark.createDataFrame([r1,r2,r3])
df.select("id","name","city","pin").show()
+----+----+------+------+
| id|name| city| pin|
+----+----+------+------+
| 100|Sara|Nellai|627001|
| 102|null| Kovai| null|
|null|Raji| null| null|
+----+----+------+------+
df.printSchema()
root
|-- city: string (nullable = true)
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)
df.na.fill(-1).na.fill("NotProvided").show()
# fill -1 for all numeric fields and NotProvided for all string fields
+-----------+---+-----------+------+
| city| id| name| pin|
+-----------+---+-----------+------+
| Nellai|100| Sara|627001|
| Kovai|102|NotProvided| -1|
|NotProvided| -1| Raji| -1|
+-----------+---+-----------+------+
#with datetime fields
#with None
from pyspark.sql import Row
r1 = Row(id=100,name='Sara',city='Nellai',pin=627001, doj='2014-12-23 23:34:45')
r2 = Row(id=102,name=None,city='Kovai',pin=None,doj=None)
r3 = Row(id=None,name='Raji',city=None,pin=None,doj='2010-01-01 12:34:22')
df = spark.createDataFrame([r1,r2,r3])
df.select("id","name",'doj',"city","pin").show()
+----+----+-------------------+------+------+
| id|name| doj| city| pin|
+----+----+-------------------+------+------+
| 100|Sara|2014-12-23 23:34:45|Nellai|627001|
| 102|null| null| Kovai| null|
|null|Raji|2010-01-01 12:34:22| null| null|
+----+----+-------------------+------+------+
root
|-- city: string (nullable = true)
|-- doj: string (nullable = true) -- it is recognizing doj as string
|-- id: long (nullable = true)
|-- name: string (nullable = true)
|-- pin: long (nullable = true)