In [3]:
import pandas as pd
In [4]: import numpy as np
titanic = pd.read_csv(r'C:\Users\hp\Documents\Gen AI and Data Science\11Nov cla
In [5]:
In [6]: [Link]()
Out[6]
PassengerId Survived Pclass Name Sex
:
Age SibSp Parch Ticket Far
Montvila
,
886 887 0 Rev male 27.0 0 0 211536
2 . 13.0
Juoza
s
Graham,
Miss. Margaret
887 888 1 1 Edith female19.0 0 0 112053 30.0
Johnston,
Miss
888 889 0 . W./
female NaN 1 2 C.
3 Catherin
e 660
Hele 7
n
"Carri
23.
e"
4
30.0
Behr, Mr.
889 890 1 1Karl Howell male26.0 0 0 111369
890 891 0
3
Doole
y, male 32.0 0 0 370376 7.7
Mr
.
Patric
k
In [7]:
Out[7]: PassengerId Survived Pclass Age SibSp Parch
[Link]()
count 891.00000 891.00000 714.00000 891.00000 891.00000 891.0
0 0 0 0 0 0
891.000000
mean 0.383838 2.308642 29.699118 0.523008 0.381594 32.20
446.000000
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.69
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.00
25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.91
50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.45
75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.00
max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.3
2
In [8]: del titanic["Name"]
[Link]()
Out[8]: Passenger Survive Pclas Sex Ag SibS Parch Ticket Fare
Id d s e p Cabin
A/5
0 1 0 3 male 22. 1 0 7.2500
0 NaN
2117
1
1 2 1 1 femal 38. 1 0 PC 17599 71.2833 C85
e 0
STON/O2.
2 3 1 3 femal 26. 0 0 7.9250
e 0 NaN
3101282
3 4 1 1 femal 35. 1 0 113803 53.1000
e 0 C123
4 5 0 3 male 35. 0 0 373450 8.0500 NaN
0
In [9]: del titanic["Ticket"]
[Link]()
Out[9]: Passenger Survive Pclas Sex Ag SibS Parc Fare Cabi Embarke
Id d s e p h n d
0 1 0 3 male 22. 1 0 7.2500 NaN S
0
1 2 1 1 femal 38. 1 0 71.283 C85 C
e 0 3
2 3 1 3 femal 26. 0 0 7.9250 NaN S
e 0
3 4 1 1 femal 35. 1 0 53.100 C123 S
e 0 0
4 5 0 3 male 35. 0 0 8.0500 NaN S
0
In [10]: del titanic["Fare"]
[Link]()
Out[10] Passenger Survive Pclas Sex Ag SibS Parc Cabi Embarke
: Id d s e p h n d
0 1 0 3 male 22. 1 0 NaN S
0
1 2 1 1 femal 38. 1 0 C85 C
e 0
2 3 1 3 femal 26. 0 0 NaN S
e 0
3 4 1 1 femal 35. 1 0 C123 S
e 0
4 5 0 3 male 35. 0 0 NaN S
0
In [11]: del titanic["Cabin"]
[Link]()
Out[11] Passenge Survive Pclas Sex Ag SibS Parc Embarke
: rId d s e p h d
0 1 0 3 male 22. 1 0 S
0
1 2 1 1 femal 38. 1 0 C
e 0
2 3 1 3 femal 26. 0 0 S
e 0
3 4 1 1 femal 35. 1 0 S
e 0
4 5 0 3 male 35. 0 0 S
0
In de getNumber(str):
[12]: f if str
=="male":
titanic["Gender"]=titanic["Sex"].apply(getNumber)
return 1 [Link]()
else:
return 2
Out[12] Passenger Survive Pclas Sex Ag SibS Parc Embarke Gende
: Id d s e p h d r
0 1 0 3 male 22. 1 0 S 1
0
1 2 1 1 femal 38. 1 0 C 2
e 0
2 3 1 3 femal 26. 0 0 S 2
e 0
3 4 1 1 femal 35. 1 0 S 2
e 0
4 5 0 3 male 35. 0 0 S 1
0
In [14]: del titanic["Sex"]
[Link]()
Out[14] PassengerId Survive Pclas Ag SibS Parc Embarke Gende
: d s e p h d r
0 1 0 3 22. 1 0 S 1
0
1 2 1 1 38. 1 0 C 2
0
2 3 1 3 26. 0 0 S 2
0
3 4 1 1 35. 1 0 S 2
0
4 5 0 3 35. 0 0 S 1
0
In [20]: [Link]()
Out[20] Passenger Survive Pclas Age SibS Parc Embarke Gende
: Id d s p h d r
0 False False False Fals False False False False
e
1 False False False Fals False False False False
e
2 False False False Fals False False False False
e
3 False False False Fals False False False False
e
4 False False False Fals False False False False
e
... ... ... ... ... ... ... ... ...
886 False False False Fals False False False False
e
887 False False False Fals False False False False
e
888 False False False True False False False False
889 False False False Fals False False False False
e
890 False False False Fals False False False False
e
891 rows × 8 columns
In [22]: [Link]().sum()
Out[22] PassengerId 0
:
Survived 0
Pclass 0
Age 177
SibSp 0
Parch 0
Embarked 2
Gender 0
dtype: int64
meanS = titanic[[Link]==1].[Link]()
In
[24]:
meanS
In
28.343689655172415
[26]:
Out[26]
:
titanic["age"] = [Link]([Link]([Link]) & titanic["Survived"]==1, meanS tit
In
[28]:
Out[28] PassengerId Survive Pclas Ag SibS Parc Embarke Gende age
: d s e p h d r
0 1 0 3 22. 1 0 S 1 22.
0 0
1 2 1 1 38. 1 0 C 2 38.
0 0
2 3 1 3 26. 0 0 S 2 26.
0 0
3 4 1 1 35. 1 0 S 2 35.
0 0
4 5 0 3 35. 0 0 S 1 35.
0 0
In [30]: [Link]().sum()
Out[30] PassengerId 0
:
Survived 0
Pclass 0
Age 177
SibSp 0
Parch 0
Embarked 2
Gender 0
age 125
dtype: int64
In [32]: meaNS = titanic[[Link]==0].[Link]()
meaNS
Out[32]: 30.62617924528302
In [36]: [Link](meaNS, inplace = True)
[Link]()
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]:
FutureWarning: A value is trying to be set on a copy of a DataFrame or
Series through chained assi gnment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never
work becau se the intermediate object on which we are setting values
always behaves as a cop y.
For example, when doing 'df[col].method(value, inplace=True)', try using
'[Link] od({col: value}, inplace=True)' or df[col] =
df[col].method(value) instead, to pe rform the operation inplace on the
original object.
[Link](meaNS, inplace = True)
Out[36] PassengerId Survive Pclas Ag SibS Parc Embarke Gende age
: d s e p h d r
0 1 0 3 22. 1 0 S 1 22.
0 0
1 2 1 1 38. 1 0 C 2 38.
0 0
2 3 1 3 26. 0 0 S 2 26.
0 0
3 4 1 1 35. 1 0 S 2 35.
0 0
4 5 0 3 35. 0 0 S 1 35.
0 0
In [38]: [Link]().sum()
Out[38] PassengerId 0
:
Survived 0
Pclass 0
Age 177
SibSp 0
Parch 0
Embarked 2
Gender 0
age 0
dtype: int64
In [40]: del titanic["Age"]
[Link]()
Out[40] PassengerId Survive Pclas SibS Parc Embarke Gende age
: d s p h d r
0 1 0 3 1 0 S 1 22.
0
1 2 1 1 1 0 C 2 38.
0
2 3 1 3 0 0 S 2 26.
0
3 4 1 1 1 0 S 2 35.
0
4 5 0 3 0 0 S 1 35.
0
In [42]: survivedQ = titanic[[Link] == 'Q'][[Link] ==
1].shape[0] survivedC = titanic[[Link] == 'C']
[[Link] == 1].shape[0] survivedS = titanic[[Link]
== 'S'][[Link] == 1].shape[0] print(survivedQ)
print(survivedC)
print(survivedS)
30
93
217
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
survivedQ = titanic[[Link] == 'Q'][[Link] ==
1].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
survivedC = titanic[[Link] == 'C'][[Link] ==
1].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
In [44]: survivedQ = titanic[[Link] == 'Q'][[Link] ==
0].shape[0] survivedC = titanic[[Link] == 'C']
[[Link] == 0].shape[0] survivedS = titanic[[Link]
== 'S'][[Link] == 0].shape[0] print(survivedQ)
print(survivedC)
print(survivedS)
47
75
427
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
survivedQ = titanic[[Link] == 'Q'][[Link] ==
0].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
survivedC = titanic[[Link] == 'C'][[Link] ==
0].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
In [46]: [Link](inplace=True)
[Link]()
Out[46] PassengerId Survive Pclas SibS Parc Embarke Gende age
: d s p h d r
0 1 0 3 1 0 S 1 22.
0
1 2 1 1 1 0 C 2 38.
0
2 3 1 3 0 0 S 2 26.
0
3 4 1 1 1 0 S 2 35.
0
4 5 0 3 0 0 S 1 35.
0
In [48]: [Link]().sum()
Out[48] PassengerId 0
:
Survived 0
Pclass 0
SibSp 0
Parch 0
Embarked 0
Gender 0
age 0
dtype: int64
[Link](columns = {'age':'Age'}, inplace=True)
In
[Link]()
[50]:
Out[50] PassengerId Survive Pclas SibS Parc Embarke Gende Ag
: d s p h d r e
0 1 0 3 1 0 S 1 22.
0
1 2 1 1 1 0 C 2 38.
0
2 3 1 3 0 0 S 2 26.
0
3 4 1 1 1 0 S 2 35.
0
4 5 0 3 0 0 S 1 35.
0
In [52]: [Link](columns={'Gender':'Sex'}, inplace=True)
[Link]()
Out[52] PassengerId Survive Pclas SibS Parc Embarke Sex Age
: d s p h d
0 1 0 3 1 0 S 1 22.0
1 2 1 1 1 0 C 2 38.0
2 3 1 3 0 0 S 2 26.0
3 4 1 1 1 0 S 2 35.0
4 5 0 3 0 0 S 1 35.0
In def getEmb(str):
[54]:
if str=="S":
return 1
elif str=='Q':
return 2
else:
return 3
titanic["Embark"]=titanic["Embarked"].apply(getEmb) [Link]()
Out[54] PassengerId Survive Pclas SibS Parc Embarke Sex Age Embar
: d s p h d k
0 1 0 3 1 0 S 1 22.0 1
1 2 1 1 1 0 C 2 38.0 3
2 3 1 3 0 0 S 2 26.0 1
3 4 1 1 1 0 S 2 35.0 1
4 5 0 3 0 0 S 1 35.0 1
In [56]: del titanic['Embarked']
[Link](columns={'Embark':'Embarked'}, inplace=True)
[Link]()
Out[56] PassengerId Survive Pclas SibS Parc Sex Age Embarke
: d s p h d
0 1 0 3 1 0 1 22.0 1
1 2 1 1 1 0 2 38.0 3
2 3 1 3 0 0 2 26.0 1
3 4 1 1 1 0 2 35.0 1
4 5 0 3 0 0 1 35.0 1
In [58]: import [Link] as plt
from matplotlib import style
males = (titanic['Sex'] == 1).sum()
females = (titanic['Sex'] ==
2).sum() print(males)
print(females)
p = [males,
females]
[Link](p,
labels = ['Male', 'Female'],
colors = ['green',
'yellow'], explode =
(0.15, 0),
startangle =
0) [Link]('equal')
577
312
In [60]: MaleS=titanic[[Link]==1][[Link]==1].shape[0]
print(MaleS)
MaleN=titanic[[Link]==1][[Link]==0].shape[0]
print(MaleN)
FemaleS=titanic[[Link]==2][[Link]==1].shape[0]
print(FemaleS)
FemaleN=titanic[[Link]==2][[Link]==0].shape[0]
print(FemaleN)
109
468
231
81
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
MaleS=titanic[[Link]==1][[Link]==1].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
MaleN=titanic[[Link]==1][[Link]==0].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
FemaleS=titanic[[Link]==2][[Link]==1].shape[0]
C:\Users\hp\AppData\Local\Temp\ipykernel_14236\[Link]: UserWarning:
Bool ean Series key will be reindexed to match DataFrame index.
FemaleN=titanic[[Link]==2][[Link]==0].shape[0]
In [62]: chart=[MaleS,MaleN,FemaleS,FemaleN]
colors=['lightskyblue','yellowgreen','Yellow','Orange']
labels=["Survived Male","Not Survived Male","Survived Female","Not
Survived Fema explode=[0,0.05,0,0.1]
[Link](chart,labels=labels,colors=colors,explode=explode,startangle=100,c
ounter [Link]("equal")
[Link]()
In [ ]: