Tuesday, 19 May 2020

StructField, StructType Example in PySpark. Create Dataframe using in memory object in PySpark

#Create in memory object in Pyspark and create dataframe using it and export into MySQL.

import pyspark.sql.types as st

data = [
    (101,'Sankaranarayanan','1976-04-20','M','Bangalore','Married'),
    (102,'Anbusudha','1979-07-22','F','Bangalore','Married'),
(103,'Rahul','1989-07-15','M','Manachai','Bachelor')
]

print(data)

[(101, 'Sankaranarayanan', '1976-04-20', 'M', 'Bangalore', 'Married'), (102, 'Anbusudha', '1979-07-22', 'F', 'Bangalore', 'Married'), (103, 'Rahul', '1989-07-15', 'M', 'Manachai', 'Bachelor')]

for i in data:
    print(i)
(101, 'Sankaranarayanan', '1976-04-20', 'M', 'Bangalore', 'Married')
(102, 'Anbusudha', '1979-07-22', 'F', 'Bangalore', 'Married')
(103, 'Rahul', '1989-07-15', 'M', 'Manachai', 'Bachelor')

person_schema = st.StructType([
st.StructField('SNo', st.IntegerType(), True),
    st.StructField('Name', st.StringType(), True),
    st.StructField('DOB', st.StringType(), True),
    st.StructField('Gender', st.StringType(), True),
    st.StructField('City', st.StringType(), True),
    st.StructField('MarritalStatus', st.StringType(), True)
])

print(person_schema)

StructType(List(StructField(SNo,IntegerType,true),StructField(Name,StringType,true),StructField(DOB,StringType,true),StructField(Gender,StringType,true),StructField(City,StringType,true),StructField(MarritalStatus,StringType,true)))

user_df = spark.createDataFrame(data, person_schema)
 
user_df.printSchema()

root
 |-- SNo: integer (nullable = true)
 |-- Name: string (nullable = true)
 |-- DOB: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- City: string (nullable = true)
 |-- MarritalStatus: string (nullable = true)

user_df.show(5)

+---+----------------+----------+------+---------+--------------+
|SNo|            Name|       DOB|Gender|     City|MarritalStatus|
+---+----------------+----------+------+---------+--------------+
|101|Sankaranarayanan|1976-04-20|     M|Bangalore|       Married|
|102|       Anbusudha|1979-07-22|     F|Bangalore|       Married|
|103|           Rahul|1989-07-15|     M| Manachai|      Bachelor|
+---+----------------+----------+------+---------+--------------+ 

#Write the dataframe content into MySQL table
user_df.write.format("jdbc").\
option("url", "jdbc:mysql://localhost:3306/school").\
option("driver", "com.mysql.jdbc.Driver").\
option("dbtable", "user_table").\
option("user", "root").\
option("password", "Studi0Plus").save()
Goto Mysql :


use school;
show tables;
select * from user_table;

# SNo, Name, DOB, Gender, City, MarritalStatus
'103', 'Rahul', '1989-07-15', 'M', 'Manachai', 'Bachelor'
'102', 'Anbusudha', '1979-07-22', 'F', 'Bangalore', 'Married'
'101', 'Sankaranarayanan', '1976-04-20', 'M', 'Bangalore', 'Married'

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...