#Series Example with 0,1,2,3,. are indexes
import pandas as pd
s1 = pd.Series([1,2,3,4,5,6,7,8,9,10])
print(s1)
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 10
dtype: int64
type(s1)
pandas.core.series.Series
#Series example with Custom indexes
import pandas as pd
s1 = pd.Series([86,63,85,81,90],index=["Tamil","English","Maths","Science","Social"])
print(s1)
Tamil 86
English 63
Maths 85
Science 81
Social 90
dtype: int64
#Passing dictionary object to the Series
#Keys of a given Dictionary will become Indexes
import pandas as pd
subjectDict = {"Tamil":85, "English":63, "Maths":85, "Science":81, "Social":90}
s1 = pd.Series(subjectDict)
print(s1)
Tamil 85
English 63
Maths 85
Science 81
Social 90
dtype: int64
# 'b' and 'd' are not there in the given indexes, so, NaN as the value assigned to them
import pandas as pd
s1 = pd.Series({"a":10,"c":30,"e":40},index=["b","c","d","a"])
print(s1)
b NaN
c 30.0
d NaN
a 10.0
dtype: float64
s1 = pd.Series([5,7,3,2,88,22,-1,0,33])
print(s1[3])
2
print(s1[:2])
0 5
1 7
dtype: int64
print(s1[-1:])
8 33
dtype: int64
print(s1[:6])
0 5
1 7
2 3
3 2
4 88
5 22
dtype: int64
#Arithmetic operations
s1 = pd.Series([10,20,30,40])
s2 = pd.Series([11,22,33,44])
s3 = s1 + s2
print(s3)
0 21
1 42
2 63
3 84
dtype: int64
s1 = pd.Series([11,66,77,55])
s2 = pd.Series([5,22,22,44])
s3 = s1 - s2
print(s3)
0 6
1 44
2 55
3 11
dtype: int64
print(s1+15)
0 26
1 81
2 92
3 70
dtype: int64
print(s2 ** 1.3)
0 8.103283
1 55.609563
2 55.609563
3 136.926807
dtype: float64
s1 = pd.Series([1,2,3])
s2 = pd.Series([6,7,8])
print(s1,s2)
0 1
1 2
2 3
dtype: int64 0 6
1 7
2 8
dtype: int64
print(s1+s2)
0 7
1 9
2 11
dtype: int64
print(s1*s2)
0 6
1 14
2 24
dtype: int64
print(s1-s2, s2-s1)
0 -5
1 -5
2 -5
dtype: int64 0 5
1 5
2 5
dtype: int64
#DataFrame Example
import pandas as pd
subjectDict = {"Subjects":["Tamil","English","Maths","Science","Social"],"Marks":[86,63,85,81,90]}
df = pd.DataFrame(subjectDict)
print(df)
Subjects Marks
0 Tamil 86
1 English 63
2 Maths 85
3 Science 81
4 Social 90
import pandas as pd
subjectDict = {"Names":["Arjun","Ram","Biswa","Kalai","Nila"],"Age":[78,37,88,43,93]}
df = pd.DataFrame(subjectDict)
print(df)
Names Age
0 Arjun 78
1 Ram 37
2 Biswa 88
3 Kalai 43
4 Nila 93
df = pd.read_csv("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")
df.head()
df = pd.read_csv("E:\\PyExa\\iris.csv")
df.head()
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
1 4.9 3.0 1.4 0.2 Setosa
2 4.7 3.2 1.3 0.2 Setosa
3 4.6 3.1 1.5 0.2 Setosa
4 5.0 3.6 1.4 0.2 Setosa
df.tail()
sepal.length sepal.width petal.length petal.width variety
145 6.7 3.0 5.2 2.3 Virginica
146 6.3 2.5 5.0 1.9 Virginica
147 6.5 3.0 5.2 2.0 Virginica
148 6.2 3.4 5.4 2.3 Virginica
149 5.9 3.0 5.1 1.8 Virginica
print(df.shape)
(150, 5) # 150 X 5 ==> 150 Rows X 5 columns
df.describe()
sepal.length sepal.width petal.length petal.width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.057333 3.758000 1.199333
std 0.828066 0.435866 1.765298 0.762238
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
df.iloc[0:3,0:2]
#1st 3 Rows and 1st 2 Columns
sepal.length sepal.width
0 5.1 3.5
1 4.9 3.0
2 4.7 3.2
df.iloc[0:4,0:4]
#1st 4 Rows and 1st 4 Columns
sepal.length sepal.width petal.length petal.width
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
df.loc[0:7,("petal.length","petal.width","variety")]
#1st 7 rows and specified columns
petal.length petal.width variety
0 1.4 0.2 Setosa
1 1.4 0.2 Setosa
2 1.3 0.2 Setosa
3 1.5 0.2 Setosa
4 1.4 0.2 Setosa
5 1.7 0.4 Setosa
6 1.4 0.3 Setosa
7 1.5 0.2 Setosa
#Drop variety column in the dataframe
s1 = df.drop("variety",axis=1)
print (s1.head())
sepal.length sepal.width petal.length petal.width
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
#Drop 3 rows
s1 = df.drop([1,2,3],axis=0)
print(s1.head())
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
4 5.0 3.6 1.4 0.2 Setosa
5 5.4 3.9 1.7 0.4 Setosa
6 4.6 3.4 1.4 0.3 Setosa
7 5.0 3.4 1.5 0.2 Setosa
df.mean()
sepal.length 5.843333
sepal.width 3.057333
petal.length 3.758000
petal.width 1.199333
dtype: float64
df.median()
sepal.length 5.80
sepal.width 3.00
petal.length 4.35
petal.width 1.30
dtype: float64
df.min()
sepal.length 4.3
sepal.width 2
petal.length 1
petal.width 0.1
variety Setosa
dtype: object
df.max()
sepal.length 7.9
sepal.width 4.4
petal.length 6.9
petal.width 2.5
variety Virginica
dtype: object
#applying user defined function
def half(s):
return s*0.5
s1 = df[["sepal.length","petal.length"]].apply(half) #half is the udf
print(df[["sepal.length","petal.length"]].head())
print(s1.head())
sepal.length petal.length
0 5.1 1.4
1 4.9 1.4
2 4.7 1.3
3 4.6 1.5
4 5.0 1.4
sepal.length petal.length
0 2.55 0.70
1 2.45 0.70
2 2.35 0.65
3 2.30 0.75
4 2.50 0.70
#user defined function to double the dataframe values
def double_make(s):
return s*2
print(df[["sepal.width","petal.width"]].head(5))
s1 = df[["sepal.width","petal.width"]].apply(double_make)
print(s1.head())
sepal.width petal.width
0 3.5 0.2
1 3.0 0.2
2 3.2 0.2
3 3.1 0.2
4 3.6 0.2
sepal.width petal.width
0 7.0 0.4
1 6.0 0.4
2 6.4 0.4
3 6.2 0.4
4 7.2 0.4
#grouping and counting of particular column
s1 = df["variety"].value_counts()
print(s1)
Virginica 50
Setosa 50
Versicolor 50
Name: variety, dtype: int64
#Sort order
s
df.sort_values(by="sepal.length").head()
sepal.length sepal.width petal.length petal.width variety
13 4.3 3.0 1.1 0.1 Setosa
42 4.4 3.2 1.3 0.2 Setosa
38 4.4 3.0 1.3 0.2 Setosa
8 4.4 2.9 1.4 0.2 Setosa
41 4.5 2.3 1.3 0.3 Setosa
No comments:
Post a Comment