Showing posts with label mongodb. Show all posts
Showing posts with label mongodb. Show all posts

Tuesday, 12 May 2020

Python with Mongo db - Sample Programs

python -m pip install pymongo


import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")

print(myclient.list_database_names())
['admin', 'config', 'local']



dblist = myclient.list_database_names()
if "school" in dblist:
  print("The database exists.")
  
mydb = myclient["school"]

for p in dblist:
    print(p)
    
admin
config
local


 
 
#Display all the databases.
import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
for p in myclient.list_database_names():
    print(p)
    
admin
config
local
import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")

dblist = myclient.list_database_names()
if "school" in dblist:
  print("The database : {} exists.".format("school"))
    
The database : school exists.
#Display all the collections (tables) in a database:

import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
collist = mydb.list_collection_names()
for collectionName in collist:
    print(collectionName)
    
student
import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
collist = mydb.list_collection_names()

if "student"  in collist:
        print("Collection student exists in school database")

Collection student exists in school database



#insert a document (Row) in a collection (table)
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

personDict = { "firstname":"Priya","lastname":"Balakumaran","city":"Kandanur" }

x = mycol.insert_one(personDict)
 
print(x.inserted_id) 


#insert multiple documents - Multi row insert
#autogenerated ids
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

mydocs = [
  { "firstname":"Ravi","lastname":"Rahul","city":"Pallathur"},
  { "firstname":"Siva","lastname":"Prasad","city":"Vadagudi"},
  { "firstname":"Arun","lastname":"Kumar","city":"Kanadukathan"},
  { "firstname":"Awesome","lastname":"Nator","city":"London"}
]

x = mycol.insert_many(mydocs)

#print list of the _id values of the inserted documents:
print(x.inserted_ids)


#insert multiple document with id specified

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

mydocs = [
  {"_id": 101,"firstname":"Kalai","lastname":"Selvi","city":"Aathalur"},
  {"_id": 102,"firstname":"Anna","lastname":"Malai","city":"Singapore"},
  {"_id": 103,"firstname":"Vanakkam","lastname":"da mappla","city":"Theni"},
  {"_id": 104 ,"firstname":"Anbu","lastname":"Sudha","city":"Bangalore"}
]

x = mycol.insert_many(mydocs)

#print list of the _id values of the inserted documents:
print(x.inserted_ids)


[101, 102, 103, 104]


#Search document in a collection

import pymongo
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

x = mycol.find_one()
print(x)
{'_id': ObjectId('5eba5eff2a9545fa868c9aa8'), 'firstname': 'Raja', 'lastname': 'Raman', 'city': 'Kottaiyur'}



#find : Selct all : Select * from doc



import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
docs = mycol.find()
for d in docs:
    print (d)
{'_id': ObjectId('5eba5eff2a9545fa868c9aa8'), 'firstname': 'Raja', 'lastname': 'Raman', 'city': 'Kottaiyur'}
{'_id': ObjectId('5eba5f522a9545fa868c9aac'), 'firstname': 'Priya', 'lastname': 'Balakumaran', 'city': 'Kandanur'}
{'_id': ObjectId('5eba60012a9545fa868c9aae'), 'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'_id': ObjectId('5eba60012a9545fa868c9aaf'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'_id': ObjectId('5eba60012a9545fa868c9ab0'), 'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}
{'_id': ObjectId('5eba60012a9545fa868c9ab1'), 'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'London'}
{'_id': 101, 'firstname': 'Kalai', 'lastname': 'Selvi', 'city': 'Aathalur'}
{'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'_id': 103, 'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}
{'_id': 104, 'firstname': 'Anbu', 'lastname': 'Sudha', 'city': 'Bangalore'}



#display specific columns

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

for x in mycol.find({},{ "_id": 0, "firstname": 1, "lastname": 1, "city":1 }):
  print(x)
  
  
  {'firstname': 'Raja', 'lastname': 'Raman', 'city': 'Kottaiyur'}
{'firstname': 'Priya', 'lastname': 'Balakumaran', 'city': 'Kandanur'}
{'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}
{'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'London'}
{'firstname': 'Kalai', 'lastname': 'Selvi', 'city': 'Aathalur'}
{'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}
{'firstname': 'Anbu', 'lastname': 'Sudha', 'city': 'Bangalore'}


#display only the last name:

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

for x in mycol.find({},{ "_id": 0,"city":0,"firstname":0  }):
  print(x)
  
  {'lastname': 'Raman'}
{'lastname': 'Balakumaran'}
{'lastname': 'Rahul'}
{'lastname': 'Prasad'}
{'lastname': 'Kumar'}
{'lastname': 'Nator'}
{'lastname': 'Selvi'}
{'lastname': 'Malai'}
{'lastname': 'da mappla'}
{'lastname': 'Sudha'}



#Except id and City:

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

for x in mycol.find({},{ "_id":0,"city":0 }):
  print(x)
  
  {'firstname': 'Raja', 'lastname': 'Raman'}
{'firstname': 'Priya', 'lastname': 'Balakumaran'}
{'firstname': 'Ravi', 'lastname': 'Rahul'}
{'firstname': 'Siva', 'lastname': 'Prasad'}
{'firstname': 'Arun', 'lastname': 'Kumar'}
{'firstname': 'Awesome', 'lastname': 'Nator'}
{'firstname': 'Kalai', 'lastname': 'Selvi'}
{'firstname': 'Anna', 'lastname': 'Malai'}
{'firstname': 'Vanakkam', 'lastname': 'da mappla'}
{'firstname': 'Anbu', 'lastname': 'Sudha'}


#Search specific city

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": "London" }
mydoc = mycol.find(myquery)
for x in mydoc:
  print(x)
  
  {'_id': ObjectId('5eba60012a9545fa868c9ab1'), 'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'London'}





#Search city >= "S"

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

myquery = { "city": { "$gt": "S" } }
mydoc = mycol.find(myquery)
for x in mydoc:
  print(x)
  
  
{'_id': ObjectId('5eba60012a9545fa868c9aaf'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'_id': 103, 'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}


#Regular Explression. city starts with 'S'


import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$regex": "S" } }
mydoc = mycol.find(myquery)
for x in mydoc:
  print(x)
  
  {'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}


#Order by City Ascending

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$regex": "S" } }
mydoc = mycol.find().sort("city")
for x in mydoc:
  print(x)
  
  
  {'_id': 101, 'firstname': 'Kalai', 'lastname': 'Selvi', 'city': 'Aathalur'}
{'_id': 104, 'firstname': 'Anbu', 'lastname': 'Sudha', 'city': 'Bangalore'}
{'_id': ObjectId('5eba60012a9545fa868c9ab0'), 'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}
{'_id': ObjectId('5eba5f522a9545fa868c9aac'), 'firstname': 'Priya', 'lastname': 'Balakumaran', 'city': 'Kandanur'}
{'_id': ObjectId('5eba5eff2a9545fa868c9aa8'), 'firstname': 'Raja', 'lastname': 'Raman', 'city': 'Kottaiyur'}
{'_id': ObjectId('5eba60012a9545fa868c9ab1'), 'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'London'}
{'_id': ObjectId('5eba60012a9545fa868c9aae'), 'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'_id': 103, 'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}
{'_id': ObjectId('5eba60012a9545fa868c9aaf'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}




#Order by City descending

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$regex": "S" } }
mydoc = mycol.find().sort("city",-1)
for x in mydoc:
  print(x)



{'_id': ObjectId('5eba60012a9545fa868c9aaf'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'_id': 103, 'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}
{'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'_id': ObjectId('5eba60012a9545fa868c9aae'), 'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'_id': ObjectId('5eba60012a9545fa868c9ab1'), 'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'London'}
{'_id': ObjectId('5eba5eff2a9545fa868c9aa8'), 'firstname': 'Raja', 'lastname': 'Raman', 'city': 'Kottaiyur'}
{'_id': ObjectId('5eba5f522a9545fa868c9aac'), 'firstname': 'Priya', 'lastname': 'Balakumaran', 'city': 'Kandanur'}
{'_id': ObjectId('5eba60012a9545fa868c9ab0'), 'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}
{'_id': 104, 'firstname': 'Anbu', 'lastname': 'Sudha', 'city': 'Bangalore'}
{'_id': 101, 'firstname': 'Kalai', 'lastname': 'Selvi', 'city': 'Aathalur'}


#Delete single doc

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": "London" }
mycol.delete_one(myquery)



#Delete multiple docs

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$gt": "S" } }  # city >= "S"
x = mycol.delete_many(myquery)
print(x.deleted_count, " documents deleted.")

3  documents deleted.



#Delete all docs / rows in a collection

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$gt": "S" } }
x = mycol.delete_many({})
print(x.deleted_count, " documents deleted.")

6  documents deleted.


#Drop a collection

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
mycol.drop()




#Adding documents again

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

mydocs = [
  {"_id": 101,"firstname":"Kalai","lastname":"Selvi","city":"Aathalur"},
  {"_id": 102,"firstname":"Anna","lastname":"Malai","city":"Singapore"},
  {"_id": 103,"firstname":"Vanakkam","lastname":"da mappla","city":"Theni"},
  {"_id": 104 ,"firstname":"Anbu","lastname":"Sudha","city":"Bangalore"}
]

x = mycol.insert_many(mydocs)

#print list of the _id values of the inserted documents:
print(x.inserted_ids)


import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

mydocs = [
  { "firstname":"Ravi","lastname":"Rahul","city":"Pallathur"},
  { "firstname":"Siva","lastname":"Prasad","city":"Vadagudi"},
  { "firstname":"Arun","lastname":"Kumar","city":"Kanadukathan"},
  { "firstname":"Awesome","lastname":"Nator","city":"London"}
]

x = mycol.insert_many(mydocs)

#print list of the _id values of the inserted documents:
print(x.inserted_ids)


#limit example.. display 3 records


import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]

myresult = mycol.find().limit(3)

#print the result:
for x in myresult:
  print(x)
  
  {'_id': ObjectId('5eba65732a9545fa868c9acf'), 'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'_id': ObjectId('5eba65732a9545fa868c9ad0'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'_id': ObjectId('5eba65732a9545fa868c9ad1'), 'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}


#Update one record - Single record update
#London to Pudukkottai

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": "London" }
newvalues = { "$set": { "city": "Pudukkottai" } }
mycol.update_one(myquery, newvalues)

for x in mycol.find():
  print(x)


{'_id': ObjectId('5eba65732a9545fa868c9acf'), 'firstname': 'Ravi', 'lastname': 'Rahul', 'city': 'Pallathur'}
{'_id': ObjectId('5eba65732a9545fa868c9ad0'), 'firstname': 'Siva', 'lastname': 'Prasad', 'city': 'Vadagudi'}
{'_id': ObjectId('5eba65732a9545fa868c9ad1'), 'firstname': 'Arun', 'lastname': 'Kumar', 'city': 'Kanadukathan'}
{'_id': ObjectId('5eba65732a9545fa868c9ad2'), 'firstname': 'Awesome', 'lastname': 'Nator', 'city': 'Pudukkottai'}
{'_id': 101, 'firstname': 'Kalai', 'lastname': 'Selvi', 'city': 'Aathalur'}
{'_id': 102, 'firstname': 'Anna', 'lastname': 'Malai', 'city': 'Singapore'}
{'_id': 103, 'firstname': 'Vanakkam', 'lastname': 'da mappla', 'city': 'Theni'}
{'_id': 104, 'firstname': 'Anbu', 'lastname': 'Sudha', 'city': 'Bangalore'}


#Update multiple documents

import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["school"]
mycol = mydb["student"]
myquery = { "city": { "$gt": "S" } }
newvalues = { "$set": { "city": "Pillaiyarpatti" } }
x = mycol.update_many(myquery, newvalues)
print(x.modified_count, "documents updated.")


2 documents updated.

Thursday, 28 February 2019

Spark with MongoDB - Read / Write Operations

$ mongo // start CLI
MongoDB shell version v3.6.3
connecting to: mongodb://127.0.0.1:27017
MongoDB server version: 3.6.3

// Adding 2 records in person

> db.person.insert([{
   "id":100,
           "name":"Sankara",
           "salary":3000,
"city":"Pallathur"
         
        },
        {
           "id":101,
           "name":"Rasee",
           "salary":3100,
"city":"Kanadukathan"
        }])
BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 2,
"nUpserted" : 0,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [ ]
})

// select
> db.person.find()
{ "_id" : ObjectId("5c780db0d44e3fc2ebd26d43"), "id" : 100, "name" : "Sankara", "salary" : 3000, "city" : "Pallathur" }
{ "_id" : ObjectId("5c780db0d44e3fc2ebd26d44"), "id" : 101, "name" : "Rasee", "salary" : 3100, "city" : "Kanadukathan" }

// beautify
> db.person.find().pretty()
{
"_id" : ObjectId("5c780db0d44e3fc2ebd26d43"),
"id" : 100,
"name" : "Sankara",
"salary" : 3000,
"city" : "Pallathur"
}
{
"_id" : ObjectId("5c780db0d44e3fc2ebd26d44"),
"id" : 101,
"name" : "Rasee",
"salary" : 3100,
"city" : "Kanadukathan"
}

//search with condition
> db.person.find({"name":"Sankara"})
{ "_id" : ObjectId("5c780db0d44e3fc2ebd26d43"), "id" : 100, "name" : "Sankara", "salary" : 3000, "city" : "Pallathur" }


// Spark starts here
spark-shell --packages org.mongodb.spark:mongo-spark-connector_2.11:2.4.0
 
import com.mongodb.spark._
import com.mongodb.spark.config.ReadConfig
import com.mongodb.spark.sql._
 import org.bson.Document
import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder().appName("MongoPerson").master("local[*]").getOrCreate()
val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/", "database" -> "test", "collection" -> "person"))
val df = spark.read.mongo(readConfig)  // test.person content populated here


scala> df.show
+--------------------+------------+-----+-------+------+
|                 _id|        city|   id|   name|salary|
+--------------------+------------+-----+-------+------+
|[5c780db0d44e3fc2...|   Pallathur|100.0|Sankara|3000.0|
|[5c780db0d44e3fc2...|Kanadukathan|101.0|  Rasee|3100.0|
+--------------------+------------+-----+-------+------+


scala> df.select("city","id","name","salary").show
+------------+-----+-------+------+
|        city|   id|   name|salary|
+------------+-----+-------+------+
|   Pallathur|100.0|Sankara|3000.0|
|Kanadukathan|101.0|  Rasee|3100.0|
+------------+-----+-------+------+

scala> df.createOrReplaceTempView("tblPerson") // SparkSQL

scala> spark.sql("select * from tblPerson").show
+--------------------+------------+-----+-------+------+
|                 _id|        city|   id|   name|salary|
+--------------------+------------+-----+-------+------+
|[5c780db0d44e3fc2...|   Pallathur|100.0|Sankara|3000.0|
|[5c780db0d44e3fc2...|Kanadukathan|101.0|  Rasee|3100.0|
+--------------------+------------+-----+-------+------+

scala> spark.sql("select id,name,city,salary from tblPerson").filter("salary==3000").show
+-----+-------+---------+------+
|   id|   name|     city|salary|
+-----+-------+---------+------+
|100.0|Sankara|Pallathur|3000.0|
+-----+-------+---------+------+


// Making In-memory collection in Spark
scala> case class Person(id:Int, name:String, city:String, salary:Int)
defined class Person

scala> val ob1 = new Person(500,"Sathya","Bangalore",3000)
ob1: Person = Person(500,Sathya,Bangalore,3000)

scala> val ob2 = new Person(501,"Lakshmi","Chennai",3100)
ob2: Person = Person(501,Lakshmi,Chennai,3100)

scala> val ob3 = new Person(502,"Kalai","Perai",3200)
ob3: Person = Person(502,Kalai,Perai,3200)

scala> val r1 = sc.parallelize(List(ob1,ob2,ob3))
r1: org.apache.spark.rdd.RDD[Person] = ParallelCollectionRDD[40] at parallelize at <console>:40

scala> r1.collect.foreach(println)
Person(500,Sathya,Bangalore,3000)
Person(501,Lakshmi,Chennai,3100)
Person(502,Kalai,Perai,3200)

// Converting inmemory collection into Dataframe
scala> val dfCollection = r1.toDF
dfCollection: org.apache.spark.sql.DataFrame = [id: int, name: string ... 2 more fields]

scala> dfCollection.printSchema
root
 |-- id: integer (nullable = false)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- salary: integer (nullable = false)


scala> dfCollection.show
+---+-------+---------+------+
| id|   name|     city|salary|
+---+-------+---------+------+
|500| Sathya|Bangalore|  3000|
|501|Lakshmi|  Chennai|  3100|
|502|  Kalai|    Perai|  3200|
+---+-------+---------+------+



// Going to write in-memory collection into Mongo
scala> import com.mongodb.spark.config.{ReadConfig, WriteConfig}
import com.mongodb.spark.config.{ReadConfig, WriteConfig}

// Writer Configuration : test.person
scala> val writeConfig = WriteConfig(Map("uri" -> "mongodb://127.0.0.1/test.person"))
writeConfig: com.mongodb.spark.config.WriteConfig.Self = WriteConfig(test,person,Some(mongodb://127.0.0.1/test.person),true,512,15,WriteConcernConfig(None,None,None,None),None,false,true)

scala> MongoSpark.save(dfCollection.write.mode("append"), writeConfig)


// Back to Mongo
db.person.find().pretty()
{
"_id" : ObjectId("5c7815d1d44e3fc2ebd26d47"),
"id" : 100,
"name" : "Sankara",
"salary" : 3000,
"city" : "Pallathur"
}
{
"_id" : ObjectId("5c7815d1d44e3fc2ebd26d48"),
"id" : 101,
"name" : "Rasee",
"salary" : 3100,
"city" : "Kanadukathan"
}
{
"_id" : ObjectId("5c7816f165e80e7efcae207d"),
"id" : 500,
"name" : "Sathya",
"city" : "Bangalore",
"salary" : 3000
}
{
"_id" : ObjectId("5c7816f165e80e7efcae207e"),
"id" : 501,
"name" : "Lakshmi",
"city" : "Chennai",
"salary" : 3100
}
{
"_id" : ObjectId("5c7816f165e80e7efcae207f"),
"id" : 502,
"name" : "Kalai",
"city" : "Perai",
"salary" : 3200
}

// Back to Spark
scala> df.show
+--------------------+------------+---+-------+------+
|                 _id|        city| id|   name|salary|
+--------------------+------------+---+-------+------+
|[5c7815d1d44e3fc2...|   Pallathur|100|Sankara|  3000|
|[5c7815d1d44e3fc2...|Kanadukathan|101|  Rasee|  3100|
|[5c7816f165e80e7e...|   Bangalore|500| Sathya|  3000|
|[5c7816f165e80e7e...|     Chennai|501|Lakshmi|  3100|
|[5c7816f165e80e7e...|       Perai|502|  Kalai|  3200|
+--------------------+------------+---+-------+------+

// Dropping in Mongo
>db.person.drop()
true

// Dataframe is empty
scala> df.show
+---+----+---+----+------+
|_id|city| id|name|salary|
+---+----+---+----+------+
+---+----+---+----+------+

Spark with MongoDB integration


<!-- https://mvnrepository.com/artifact/org.mongodb.spark/mongo-spark-connector -->
<dependency>
    <groupId>org.mongodb.spark</groupId>
    <artifactId>mongo-spark-connector_2.11</artifactId>
    <version>2.4.0</version>
</dependency>



spark-shell --packages org.mongodb.spark:mongo-spark-connector_2.11:2.4.0


scala> import com.mongodb.spark._
import com.mongodb.spark._

scala> import com.mongodb.spark.config.ReadConfig
import com.mongodb.spark.config.ReadConfig

scala> import com.mongodb.spark.sql._
import com.mongodb.spark.sql._

scala> import org.bson.Document
import org.bson.Document

scala> import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SparkSession

scala> import org.apache.spark.sql.functions.{max, min}
import org.apache.spark.sql.functions.{max, min}

scala> val spark = SparkSession.builder().appName("MongoPlayers").master("local[*]").getOrCreate()

scala> val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/", "database" -> "test", "collection" -> "players"))

scala>  val df = spark.read.mongo(readConfig)

scala>  df.printSchema()
root
 |-- _id: struct (nullable = true)
 |    |-- oid: string (nullable = true)
 |-- age: double (nullable = true)
 |-- birthdate: string (nullable = true)
 |-- birthplace: string (nullable = true)
 |-- height: string (nullable = true)
 |-- id: double (nullable = true)
 |-- imageUrl: string (nullable = true)
 |-- name: string (nullable = true)
 |-- number: double (nullable = true)
 |-- position: string (nullable = true)
 |-- twitterHandle: string (nullable = true)
 |-- twitterURL: string (nullable = true)
 |-- weight: double (nullable = true)


scala> df.show()

Playing with MongoDB

$ mongo // start CLI
MongoDB shell version v3.6.3
connecting to: mongodb://127.0.0.1:27017
MongoDB server version: 3.6.3

// create a json object
 post = { "position":"Right Wing", "id": 84, "weight":200, "height":"6' 0\"", "imageurl" : "url", "birthplace" : "Seria,BRN", "age":37, "name":"Craig Adams", "birthdate":"April 26,1977", "number":27 }

// insert single
db.players.insert(post); 

//insert multiple
db.players.insert([{ 
         "position":"Right Wing",
         "id":8465166,
         "weight":200,
         "height":"6' 0\"",
         "imageUrl":"http://1.cdn.nhle.com/photos/mugs/8465166.jpg",
         "birthplace":"Seria, BRN",
         "age":37,
         "name":"Craig Adams",
         "birthdate":"April 26, 1977",
         "number":27
      },
      { 
         "position":"Right Wing",
         "id":8475761,
         "weight":195,
         "height":"6' 2\"",
         "imageUrl":"http://1.cdn.nhle.com/photos/mugs/8475761.jpg",
         "birthplace":"Gardena, CA, USA",
         "age":23,
         "name":"Beau Bennett",
         "birthdate":"November 27, 1991",
         "number":19
      }])

BulkWriteResult({
"writeErrors" : [ ],
"writeConcernErrors" : [ ],
"nInserted" : 2,
"nUpserted" : 0,
"nMatched" : 0,
"nModified" : 0,
"nRemoved" : 0,
"upserted" : [ ]
})

// select all
db.players.find()
{ "_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2a"), "position" : "Right Wing", "id" : 8465166, "weight" : 200, "height" : "6' 0\"", "imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8465166.jpg", "birthplace" : "Seria, BRN", "age" : 37, "name" : "Craig Adams", "birthdate" : "April 26, 1977", "number" : 27 }
{ "_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2b"), "position" : "Right Wing", "id" : 8475761, "weight" : 195, "height" : "6' 2\"", "imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8475761.jpg", "birthplace" : "Gardena, CA, USA", "age" : 23, "name" : "Beau Bennett", "birthdate" : "November 27, 1991", "number" : 19 }
{ "_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2c"), "position" : "Left Wing", "id" : 8471260, "weight" : 202, "height" : "6' 1\"", "imageUrl" : "http://3.cdn.nhle.com/photos/mugs/8471260.jpg", "birthplace" : "Meadow Lake, SK, CAN", "age" : 29, "name" : "Blake Comeau", "birthdate" : "February 18, 1986", "number" : 17 }

// select with condition
>  db.players.find(
... {"position":"Goalie"}
... )
{ "_id" : ObjectId("5c77b1dbd44e3fc2ebd26d41"), "position" : "Goalie", "id" : 8470594, "weight" : 180, "height" : "6' 2\"", "imageUrl" : "http://3.cdn.nhle.com/photos/mugs/8470594.jpg", "birthplace" : "Sorel, QC, CAN", "age" : 30, "name" : "Marc-Andre Fleury", "birthdate" : "November 28, 1984", "number" : 29 }
{ "_id" : ObjectId("5c77b1dbd44e3fc2ebd26d42"), "position" : "Goalie", "id" : 8471306, "weight" : 220, "height" : "6' 1\"", "imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8471306.jpg", "birthplace" : "Fussen, DEU", "age" : 29, "name" : "Thomas Greiss", "birthdate" : "January 29, 1986", "number" : 1 }


// display the db
> db
test

// display it in pretty way
>  db.players.find().pretty()
{
"_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2a"),
"position" : "Right Wing",
"id" : 8465166,
"weight" : 200,
"height" : "6' 0\"",
"imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8465166.jpg",
"birthplace" : "Seria, BRN",
"age" : 37,
"name" : "Craig Adams",
"birthdate" : "April 26, 1977",
"number" : 27
}
{
"_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2b"),
"position" : "Right Wing",
"id" : 8475761,
"weight" : 195,
"height" : "6' 2\"",
"imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8475761.jpg",
"birthplace" : "Gardena, CA, USA",
"age" : 23,
"name" : "Beau Bennett",
"birthdate" : "November 27, 1991",
"number" : 19
}

// show all tables
>  show collections
players



>  db.players.findOne();
{
"_id" : ObjectId("5c77b1dbd44e3fc2ebd26d2a"),
"position" : "Right Wing",
"id" : 8465166,
"weight" : 200,
"height" : "6' 0\"",
"imageUrl" : "http://1.cdn.nhle.com/photos/mugs/8465166.jpg",
"birthplace" : "Seria, BRN",
"age" : 37,
"name" : "Craig Adams",
"birthdate" : "April 26, 1977",
"number" : 27
}

// remove a record
> db.players.remove({"id":8465166 })
WriteResult({ "nRemoved" : 1 })

// drop a document
> db.players.drop()
true

MongoDB Installation steps in Ubuntu 18.10

MongoDB Installation:
-----------------------
$ sudo apt update

$ sudo apt install mongodb

$ sudo systemctl status mongodb

$ sudo systemctl start / stop / restart / disable / enable mongodb

$ sudo systemctl start mongodb // start it

$ mongo  // to start with CLI
MongoDB shell version v3.6.3
connecting to: mongodb://127.0.0.1:27017
MongoDB server version: 3.6.3



Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...