Notebook PYTHON DATA SCIENCE
Notebook PYTHON DATA SCIENCE
Name,Email,Phone Number,Address
Bob Smith,[email protected],123-456-7890,123 Fake Street
Mike Jones,[email protected],098-765-4321,321 Fake venue
[ ]: import csv
[ ]: import json
1
print(row)
[ ]: import os
[ ]: os.makedirs('Test.csv')
[ ]: os.rmdir('/content/Test.csv')
3 PANDAS
[ ]: import pandas
print('Now Pandas is imprted and ready to use')
[ ]: import pandas as pd
[ ]: print(pd.__version__)
1.1.5
Series
DataFrame
[ ]: ms=pd.Series(data, index)
0 11
1 17
2 22
dtype: int64
1 11
2 17
2
3 22
dtype: int64
[ ]: print(data['a'])
11
[ ]: series1+series2
[ ]: series1-series2
[ ]: series1*series2
[ ]: series1/series2
DATAFRAME
pd.DataFrame(data,index)
Name Age
0 Abhishek 30
1 Ram 28
2 Kartik 3
[ ]: v.loc[0]
[ ]: Name Abhishek
Age 30
Name: 0, dtype: object
[ ]: v.iloc[0]
3
[ ]: Name Abhishek
Age 30
Name: 0, dtype: object
[ ]: print(v.loc[3])
Name Abhishek
Age 30
Name: 3, dtype: object
[ ]: v.iloc[3]
Name Age
0 Abhishek 30
1 Ram 28
2 Kartik 3
[ ]: v.iloc[[0,1]]
[ ]: Name Age
0 Abhishek 30
1 Ram 28
[ ]: v.iloc[[0,2]]
4
[ ]: Name Age
0 Abhishek 30
2 Kartik 3
[ ]: v.iloc[0:3]
[ ]: Name Age
0 Abhishek 30
1 Ram 28
2 Kartik 3
[ ]: v.loc[['a','b'],['Name', 'Age']]
[ ]: Name Age
a Abhishek 30
b Ram 28
[ ]: v.shape
[ ]: (3, 3)
[ ]: pd.concat([df1,df2], axis=0)
[ ]: pd.concat([df1,df2], axis=1)
5
[ ]: left =pd.DataFrame({'KEY': ['A0','A1', 'A2', 'A3'],
'B': ['B0','B1', 'B2', 'B3'],
'C': ['C0','C1', 'C2', 'C3']})
right =pd.DataFrame({'KEY': ['A0','A1', 'A2', 'A4'],
'D': ['B0','B1', 'B6', 'B3'],
'E': ['C0','C5', 'C2', 'C3']})
[ ]: KEY B C D E
0 A0 B0 C0 B0 C0
1 A1 B1 C1 B1 C5
2 A2 B2 C2 B6 C2
[ ]: KEY B C D E
0 A0 B0 C0 B0 C0
1 A1 B1 C1 B1 C5
2 A2 B2 C2 B6 C2
3 A3 B3 C3 NaN NaN
4 A4 NaN NaN B3 C3
[ ]: left=pd.DataFrame({
'B': ['B0','B1', 'B2', 'B3'],
'C': ['C0','C1', 'C2', 'C3'],
}, index = ['A0','A1', 'A2' ,'A3'])
right=pd.DataFrame({
'D': ['D4','D5', 'D6', 'D7'],
'E' : ['E4', 'E5', 'E6', 'E7']}, index = ['A0','A1', 'A2'␣
,→,'A4'])
[ ]: print(left)
B C
A0 B0 C0
A1 B1 C1
A2 B2 C2
A3 B3 C3
[ ]: left.join(right)
[ ]: B C D E
A0 B0 C0 D4 E4
A1 B1 C1 D5 E5
A2 B2 C2 D6 E6
A3 B3 C3 NaN NaN
6
[ ]: right.join(left)
[ ]: D E B C
A0 D4 E4 B0 C0
A1 D5 E5 B1 C1
A2 D6 E6 B2 C2
A4 D7 E7 NaN NaN
[ ]: B C D E
A0 B0 C0 D4 E4
A1 B1 C1 D5 E5
A2 B2 C2 D6 E6
A3 B3 C3 NaN NaN
A4 NaN NaN D7 E7
[ ]: data
[ ]: B C
A0 1 2
A1 2 1
A2 1 2
A3 3 4
[ ]: data['C'].unique()
[ ]: array([2, 1, 4])
[ ]: data['B'].nunique()
[ ]: 3
[ ]: data['C'].value_counts()
[ ]: 2 2
1 1
4 1
Name: C, dtype: int64
[ ]: data['B'].apply(lambda x: x*x)
[ ]: A0 1
A1 4
7
A2 1
A3 9
Name: B, dtype: int64
[ ]: data.columns
[ ]: data.index
[ ]: data.sort_values('B')
[ ]: B C
A0 1 2
A2 1 2
A1 2 1
A3 3 4
[ ]: import pandas as pd
[ ]: data = pd.read_csv('/content/SAMPLE.csv')
print(data)
[ ]: data = pd.read_csv('/content/SAMPLE.csv')
print(data.to_string())
[ ]: len(data)
[ ]: 12
[ ]: nd=pd.read_json('/content/sample_data/anscombe.json')
print(nd.to_string())
8
[ ]: pd.read_excel('/content/MID SEMESTER EXAMINATION.xlsx')
Name age
0 Abhishek 31
1 Pawan 27
2 Kartik 3
[ ]: v.to_csv('WRITECSV')
[ ]: view=pd.read_csv('/content/WRITECSV')
print(view)
[ ]: v.to_csv('WRITECSV', index=False)
[ ]: view=pd.read_csv('/content/WRITECSV')
view
[ ]: v.to_excel('WRITEEXCEL.xlsx', index=False)
[ ]: view=pd.read_excel('/content/WRITEEXCEL.xlsx')
view
[ ]: data = pd.read_csv('/content/SAMPLE.csv')
print(data.head(6))
[ ]: data = pd.read_csv('/content/SAMPLE.csv')
print(data.tail(6))
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
print(data.to_string())
9
60.0
5 6 0901IT181006 ALAKH NIRANJAN THAKURIYA 05-25-2021 11.0
55.0
6 7 0901IT181007 ALOK KUMAR 05-25-2021 13.0
65.0
7 8 0901IT181008 AMAN DIXIT 25-05-2021 NaN
NaN
8 9 0901IT181009 AMIT BAMNIYA 25-05-2021 12.0
60.0
9 10 0901IT181010 ANKIT KUMAR 25-05-2021 12.5
62.5
10 11 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0
65.0
11 12 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0
65.0
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
newdata=data.dropna()
print(newdata.to_string())
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
newdata=data.dropna(axis=1)
print(newdata.to_string())
10
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
data.fillna(12)
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
data['MARKS'].fillna(12)
[ ]: 0 15.0
1 12.0
2 16.0
3 17.0
4 12.0
5 11.0
6 13.0
7 12.0
8 12.0
9 12.5
10 13.0
11 13.0
Name: MARKS, dtype: float64
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
data['MARKS'].fillna(12, inplace=True)
data
[ ]: data = pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
data = data.rename(columns= {'STUDENT_NAME':'NAME_STUDENT'})
print(data.to_string())
[ ]: data =pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
x = data["MARKS"].mean()
data["MARKS"].fillna(x, inplace=True)
11
data
[ ]: data =pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
x = data["MARKS"].median()
data["MARKS"].fillna(x, inplace=True)
data
[ ]: data =pd.read_csv('/content/SAMPLE_FOR_CLEANING.csv')
x = data["MARKS"].mode()[0]
data["MARKS"].fillna(x, inplace=True)
data
[ ]: data.describe()
[ ]: data.info()
12
[2]: import pandas as pd
13
4 0901IT181005 AKSHAT KOTHAVADE 25-05-2021 12.0 60.0
5 0901IT181006 ALAKH NIRANJAN THAKURIYA 05-25-2021 11.0 55.0
6 0901IT181007 ALOK KUMAR 05-25-2021 13.0 65.0
7 0901IT181008 AMAN DIXIT 25-05-2021 21.0 NaN
8 0901IT181009 AMIT BAMNIYA 25-05-2021 12.0 60.0
9 0901IT181010 ANKIT KUMAR 25-05-2021 12.5 62.5
10 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0 65.0
11 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0 65.0
[ ]: print(data.duplicated())
[ ]: data.drop_duplicates()
[ ]: data.corr()
14
7 0901IT181008 AMAN DIXIT 25-05-2021 NaN NaN
8 0901IT181009 AMIT BAMNIYA 25-05-2021 12.0 60.0
9 0901IT181010 ANKIT KUMAR 25-05-2021 12.5 62.5
10 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0 65.0
11 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0 65.0
[21]: data.plot()
plt.show()
15
11 0901IT181011 ANKIT RAJ TIRKEY 25-May-21 13.0 65.0
[ ]: data.plot()
plt.show()
[24]: data.plot(kind='bar')
plt.show()
16