#QUESTION 1
import numpy as np
ARR1=[Link](2,3)
print(ARR1)
print("MEAN:",[Link](ARR1,axis=1))
print("STANDAR DEV:",[Link](ARR1,axis=1))
print("VARIANCE:",[Link](ARR1,axis=1))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=[Link](1,100,(a,b))
print(Arr)
print([Link])
print(type(Arr))
print([Link])
r=[Link](Arr,(b,a))
print("Array After reshape:\n",r)
#-----------------------------------------------------------------------------
-------------------
import numpy as np
a=[Link]([0,2,3,0,4,5,[Link]])
print([Link](a==0))
print([Link](a!=0))
print([Link]([Link](a)))
#-----------------------------------------------------------------------------
-------------------
import numpy as np
Array1=[Link](1,10,6)
Array2=[Link](1,10,6)
Array3=[Link](1,10,6)
print("Array1 = ",Array1)
print("Array2 = ",Array2)
print("Array3 = ",Array3)
Array4=Array2-Array3
print("Array4 = ",Array4)
Array5=Array1*2
print("Array5 = ",Array5)
print("Covariance of Array1 and Array4=\n",[Link](Array1,Array4))
print("Covariance of Array1 and Array5=\n",[Link](Array1,Array5))
print("Corealation of Array1 and Array4=\n",[Link](Array1,Array4))
print("Corealation of Array1 and Array5=\n",[Link](Array1,Array5))
#-----------------------------------------------------------------------------
--------------------
import numpy as np
Array1=[Link](1,10,10)
Array2=[Link](1,10,10)
print("Sum is:",[Link](Array1[:5],Array2[:5]))
print("Product is:",[Link](Array1[5:10],Array2[5:10]))
#-----------------------------------------------------------------------------
---------------------
#a = [Link]([[4,3, 1],[5 ,7, 0],[9, 9, 3],[8, 2, 4]])
a=eval(input("Enter the no. of rows:"))
b=eval(input("Enter the no. of columns:"))
Arr=[Link](1,100,(a,b))
print(Arr)
c=eval(input("Enter the rows1 to interchange:"))
d=eval(input("Enter the row2 to interchange:"))
Arr[[c,d],:]=Arr[[d,c],:]
print("Array After swapping")
print(Arr)
c1=int(input("Column No. to flip"))
Arr[:,c1]=[Link](Arr[:,c1])
print("Array After reversing column")
print(Arr)
#Question 3
import pandas as pd
import numpy as np
a=[Link]([Link](50,3),columns=['A','B','C'])
print(a)
null_val=int(0.1*[Link])
print(null_val)
ind_null_val=[Link]([Link],null_val)
[Link][ind_null_val]=[Link]
print(a)
#-----------------------------------------------------------------------------
----
col=[Link](thresh=45,axis=1)
print(col)
#-----------------------------------------------------------------------------
---
print("No. of missing values:",[Link]().sum().sum())
#-----------------------------------------------------------------------------
---
print(a.sort_values(by=['A']))
#-----------------------------------------------------------------------------
---
print(a.drop_duplicates("A"))
#-----------------------------------------------------------------------------
---
print("Covariance of Column1 and Column2=\n",a['A'].cov(a['B']))
print("Corelation of Column1 and Column2=\n",a['B'].cov(a['C']))
#-----------------------------------------------------------------------------
----
print([Link](a['B'], 5, precision=2))
#-----------------------------------------------------------------------------
----
#Question 7
data={"Family
Name":['Shah','Vats','Vats','Kumar','Vats','Kumar','Shah','Shah','Kumar','Vats
'],
"Gender":['Male','Male','Female','Female','Female','Male','Male','Female
','Female','Male'],
"Income":[44000,65000,43150,66500,255000,103000,55000,112400,81030,71900
]}
df=[Link](data)
print(df)
print([Link]('Family Name')['Income'].sum())
print([Link]('Family Name')['Income'].agg(['max','min']))
print(df[df['Income']<80000])
females=df[df['Gender']=='Female']
Avg_income=females
df2=[Link](df[df['Income']<df['Income'].mean()].index)
print(df2)
#IRIS PRACTICLE
import pandas as pd
import [Link] as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\[Link]")
# a. Display data types and info on the dataset
print([Link]())
# b. Find number of missing values in each column
missing_values = [Link]().sum()
print("Missing Values:\n", missing_values)
# c. Plot bar chart for frequency of each class label
[Link](figsize=(6, 4))
[Link](width=2)
[Link]('Class Label')
[Link]('Frequency')
[Link]('Frequency of Each Class Label')
[Link]()
# d. Scatter plot for Petal Length vs Sepal Length with regression line
[Link](figsize=(8, 6))
[Link](x='Sepal_length', y='Petal_length', data=a)
[Link]('Sepal Length')
[Link]('Petal Length')
[Link]('Scatter plot: Petal Length vs Sepal Length with Regression Line')
[Link]()
# e. Density distribution for feature Petal Width
[Link](figsize=(8, 6))
[Link](data=a['Petal_width'], shade=True)
[Link]('Petal Width')
[Link]('Density')
[Link]('Density Distribution of Petal Width')
[Link]()
# f. Pair plot for pairwise bivariate distribution
[Link](a)
[Link]()
# g. Heatmap for any two numeric attributes (e.g., Sepal Length and Petal
Width)
numeric_attributes = ['Sepal_length', 'Petal_width']
[Link](a[numeric_attributes].corr(), annot=True, cmap='coolwarm')
[Link]('Correlation Heatmap for Numeric Attributes')
[Link]()
# h. Compute statistics for each numeric feature
statistics = [Link]()
mode = [Link]().iloc[0]
std_error = [Link]()
confidence_interval = 1.96 * ([Link]() / (len(a) ** 0.5))
# Print computed statistics
print("Statistics for each numeric feature:\n", statistics)
print("\nMode for each numeric feature:\n", mode)
print("\nStandard Error for each numeric feature:\n", std_error)
print("\nConfidence Interval for each numeric feature:\n",
confidence_interval)
# i. Compute correlation coefficients between each pair of features and plot
heatmap
correlation_matrix = [Link]()
[Link](figsize=(8, 6))
[Link](correlation_matrix, annot=True, cmap='coolwarm')
[Link]('Correlation Heatmap for Iris Dataset')
[Link]()
#TITANIC PRACTICLE
import pandas as pd
import [Link] as plt
import seaborn as sns
a=pd.read_excel(r"C:\Users\HP\Downloads\[Link]")
df=[Link](deep=True)
# a. Clean the data by dropping the column with the largest number of missing
values
missing_values = [Link]().sum()
column_to_drop = missing_values.idxmax()
[Link](column_to_drop, axis=1, inplace=True)
print(df)
# b. Find the total number of passengers with age more than 30
passengers_over_30 = a[a['Age'] > 30]
total_passengers_over_30 = len(passengers_over_30)
print("No. of passengers over 30 :",total_passengers_over_30)
# c. Find the total fare paid by passengers of the second class
total_fare_second_class = a[a['Pclass'] == 2]['Fare'].sum()
print("Total fare of second class : ",total_fare_second_class)
# d. Compare the number of survivors of each passenger class
survivors_per_class = [Link]('Pclass')['Survived'].sum()
print("No. of Survivors of each class\n",survivors_per_class)
# e. Compute descriptive statistics for age attribute gender-wise
descriptive_stats_age_gender = [Link]('Sex')['Age'].describe()
print("Descriptive statistics for age attribute gender
wise\n",descriptive_stats_age_gender)
# f. Draw a scatter plot for passenger fare paid by Female and Male passengers
separately
[Link](figsize=(8, 6))
[Link](data=a, x='Fare', y='Sex', hue='Sex')
[Link]('Scatter plot of Fare Paid by Gender')
[Link]('Fare')
[Link]('Gender')
[Link]()
# g. Compare density distribution for features age and passenger fare
[Link](figsize=(10, 6))
[Link](data=a['Age'], label='Age', shade=True)
[Link](data=a['Fare'], label='Fare', shade=True)
[Link]('Density Distribution of Age and Fare')
[Link]('Value')
[Link]('Density')
[Link]()
[Link]()
# h. Draw a pie chart for three passenger classes
class_counts = a['Pclass'].value_counts()
[Link](figsize=(6, 6))
[Link](class_counts, labels=['Class 3', 'Class 1', 'Class 2'],
autopct='%1.1f%%', colors=['skyblue', 'lightgreen', 'lightcoral'])
[Link]('Passenger Class Distribution')
[Link]()
# i. Find % of survived passengers for each class and analyze
survived_per_class = [Link]('Pclass')['Survived'].mean() * 100
print("% of survived passengers for each class\n",survived_per_class)
#question4
import pandas as pd
a=pd.read_excel(r"C:\Users\HP\Documents\[Link]")
print(a)
b=pd.read_excel(r"C:\Users\HP\Documents\[Link]")
print(b)
print("c")
f4=[Link]([a,b])
f5=f4.drop_duplicates(["name"])
print(len(f5))
print("d")
index=f4.set_index(['name','Date'])
print([Link]())
print("a")
c=[Link](a,b,on='name')
print(c)
print("b")
print(f4[~f4['name'].isin(c['name'])])