Sankara's Big Data Notes: logic

Showing posts with label logic. Show all posts

Sunday, 7 April 2019

Find 2nd Maximum Salary from employee dataframe in Spark with Scala

scala> dfEmp.orderBy(desc("salary")).show
+---+-------+------+------+------+
| id| name|gender|salary|deptid|
+---+-------+------+------+------+
|106|Ayeesha| f| 4000| 10|
|105| Priya| f| 3600| 12|
|104| Rashee| f| 3500| 11|
|109| Vinay| m| 3200| 10|
|102| Suresh| m| 3000| 12|
|108| Arushi| f| 2800| 12|
|111| Shilpa| f| 2600| 12|
|110| Kalai| f| 2550| 11|
|107| Aruvi| f| 2500| 11|
|101| Rani| f| 2000| 11|
|103| Rahul| m| 1250| 10|
|100| Ravi| m| 1000| 10|
+---+-------+------+------+------+

scala> dfEmp.where($"salary" < dfEmp.agg(max("salary")).first().getInt(0)).orderBy(desc("salary")).show(1)
+---+-----+------+------+------+
| id| name|gender|salary|deptid|
+---+-----+------+------+------+
|105|Priya| f| 3600| 12|
+---+-----+------+------+------+
only showing top 1 row

// 2nd maximum salaried person
hive> select * from emp where salary not in (select max(salary) from emp ) order by salary desc limit 1;
105 Priya f 3600 12

hive> select * from (select * from emp sort by salary desc limit 2) result sort by salary limit 1;

105 Priya f 3600 12

scala> dfEmp.orderBy(desc("Salary")).limit(2).orderBy("salary").show(1);
+---+-----+------+------+------+
| id| name|gender|salary|deptid|
+---+-----+------+------+------+
|105|Priya| f| 3600| 12|
+---+-----+------+------+------+
only showing top 1 row

scala> dfEmp.orderBy(desc("Salary")).take(2)
res87: Array[org.apache.spark.sql.Row] = Array([106,Ayeesha,f,4000,10], [105,Priya,f,3600,12])

scala> dfEmp.orderBy(desc("Salary")).take(2)(1);
res91: org.apache.spark.sql.Row = [105,Priya,f,3600,12]

Thursday, 4 April 2019

Calculate the Square Root of Sum of Squares of Each numbers in a given file - using UDF which use Option..Some..None

Calculate the Square Root of Sum of Squares of Each numbers in a given file - using UDF which use Option..Some..None

// Excluded all characters from each line and find the square root of sum of squares of each numbers

$ cat charsAndNumbers.txt
1,a,b,c,2,3,4
2,3,4,x,y,z
s,t,u,5,2
m,n,8,10
5,2,1,a,x,y
7,a,x,2,6,h

scala> val r1 = sc.textFile("/home/hadoop/Desktop/vow/charsAndNumbers.txt")

// user defined function to extract only integers and exclude all characters
def toInt(s:String):Option[Int] ={
try{
Some(s.toInt)
}
catch {
case e: Exception => None
}
}

val r2 = r1.map(x => {
val fields = x.split(",")
var s = 0
for(f <- fields)
{
val currentNumber = toInt(f).getOrElse(0) // calling UDF
if (currentNumber != 0){
s = s + (currentNumber * currentNumber)
}
}

s
})

scala> r2.collect
res1: Array[Int] = Array(30, 29, 29, 164, 30, 89)

scala> val result = r2.reduce(_+_)
result: Int = 371

scala> val finalResult = scala.math.sqrt(result)
finalResult: Double = 19.261360284258224

scala> scala.math.sqrt( (1*1) + (2*2) + (3*3) + (4*4)
| + (2*2) + (3*3) + (4*4)
| + (5*5) + (2*2)
| + (8*8) + (10*10)
| + (5*5) + (2*2) + (1*1)
| + (7*7) + (2*2) + (6*6))
res8: Double = 19.261360284258224

Find the square root of sum of squares of each numbers from a file using Spark with Scala

// Exclude all characters from each line and find the square root of sum of squares of each numbers

// given input file has character and numbers separated by comma
$ cat charsAndNumbers.txt
1,a,b,c,2,3,4
2,3,4,x,y,z
s,t,u,5,2
m,n,8,10
5,2,1,a,x,y
7,a,x,2,6,h

scala.math.sqrt( (1*1) + (2*2) + (3*3) + (4*4)
+ (2*2) + (3*3) + (4*4)
+ (5*5) + (2*2)
+ (8*8) + (10*10)
+ (5*5) + (2*2) + (1*1)
+ (7*7) + (2*2) + (6*6))

scala> val r1 = sc.textFile("/home/hadoop/Desktop/vow/charsAndNumbers.txt")

scala> r1.foreach(println)
1,a,b,c,2,3,4
2,3,4,x,y,z
s,t,u,5,2
m,n,8,10
5,2,1,a,x,y
7,a,x,2,6,h

val r2 = r1.map(x => {
val fields = x.split(",")
var s = 0
for(f <- fields)
{
try
{
s = s + (f.toInt * f.toInt)
}
catch
{
case ex: Exception => {
}
}
}
s
})

scala> r2.collect
res1: Array[Int] = Array(30, 29, 29, 164, 30, 89)

scala> (1*1) + (2*2) + (3*3) + (4*4)
res2: Int = 30

scala> (2*2) + (3*3) + (4*4)
res3: Int = 29

scala> (5*5) + (2*2)
res4: Int = 29

scala> (8*8) + (10*10)
res5: Int = 164

scala> (5*5) + (2*2) + (1*1)
res6: Int = 30

scala> (7*7) + (2*2) + (6*6)
res7: Int = 89

scala> r2.foreach(println)
30
29
29
164
30
89

scala> val result = r2.reduce(_+_)
result: Int = 371

scala> val finalResult = scala.math.sqrt(result)
finalResult: Double = 19.261360284258224

scala> scala.math.sqrt( (1*1) + (2*2) + (3*3) + (4*4)
| + (2*2) + (3*3) + (4*4)
| + (5*5) + (2*2)
| + (8*8) + (10*10)
| + (5*5) + (2*2) + (1*1)
| + (7*7) + (2*2) + (6*6))
res8: Double = 19.261360284258224

Wednesday, 27 March 2019

Compare 2 different strings and finding matching letters and count of matched

// Here we are going to compare 2 different strings and finding matching letters and count of matched

val firstString = "uuai aao ioaau eieoiou"
val secondString = "i love india and singapore"
val m = new Array[Char](50)
var i = 0
var leftString = ""

for (c <- firstString) {
m(i) = c
i = i + 1
}

for (c <- m.distinct.sorted(Ordering.Char)){
if (c != null) {
leftString = leftString + c.toString
}
}

m.distinct.sorted(Ordering.Char)

var output = Map[String,String]()
var count = 0
for (lc <- leftString) {
count = secondString.count(_ == lc)
output = output + (lc.toString ->count.toString)
}

println(output)

scala> println(output)
Map(e -> 2, -> 0, u -> 0, a -> 3, i -> 4, -> 4, o -> 2)