Project 3 - Diabetes Prediction.ipynb - Colab
Project 3 - Diabetes Prediction.ipynb - Colab
ipynb - Colab
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
pd.read_csv?
1 1 85 66 29 0 26.6 0.351 31 0
3 1 89 66 23 94 28.1 0.167 21 0
(768, 9)
count 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000
mean 3.845052 120.894531 69.105469 20.536458 79.799479 31.992578 0.471876 33.240885 0.348958
std 3.369578 31.972618 19.355807 15.952218 115.244002 7.884160 0.331329 11.760232 0.476951
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.078000 21.000000 0.000000
25% 1.000000 99.000000 62.000000 0.000000 0.000000 27.300000 0.243750 24.000000 0.000000
50% 3.000000 117.000000 72.000000 23.000000 30.500000 32.000000 0.372500 29.000000 0.000000
75% 6.000000 140.250000 80.000000 32.000000 127.250000 36.600000 0.626250 41.000000 1.000000
max 17.000000 199.000000 122.000000 99.000000 846.000000 67.100000 2.420000 81.000000 1.000000
diabetes_dataset['Outcome'].value_counts()
0 500
1 268
Name: Outcome, dtype: int64
0 --> Non-Diabetic
1 --> Diabetic
diabetes_dataset.groupby('Outcome').mean()
https://colab.research.google.com/drive/1oxnhMTlomJ4HVhPuowpPFyMt1mwuOuQo?usp=sharing#printMode=true 1/4
11/28/24, 10:18 PM Project 3 - Diabetes Prediction.ipynb - Colab
Outcome
print(X)
print(Y)
0 1
1 0
2 1
3 0
4 1
..
763 0
764 0
765 0
766 1
767 0
Name: Outcome, Length: 768, dtype: int64
Data Standardization
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
print(standardized_data)
X = standardized_data
Y = diabetes_dataset['Outcome']
print(X)
print(Y)
https://colab.research.google.com/drive/1oxnhMTlomJ4HVhPuowpPFyMt1mwuOuQo?usp=sharing#printMode=true 2/4
11/28/24, 10:18 PM Project 3 - Diabetes Prediction.ipynb - Colab
[-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078
-0.19067191]
[ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732
-0.10558415]
...
[ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336
-0.27575966]
[-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101
1.17073215]
[-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505
-0.87137393]]
0 1
1 0
2 1
3 0
4 1
..
763 0
764 0
765 0
766 1
767 0
Name: Outcome, Length: 768, dtype: int64
classifier = svm.SVC(kernel='linear')
Model Evaluation
Accuracy Score
input_data = (5,166,72,19,175,25.8,0.587,51)
https://colab.research.google.com/drive/1oxnhMTlomJ4HVhPuowpPFyMt1mwuOuQo?usp=sharing#printMode=true 3/4
11/28/24, 10:18 PM Project 3 - Diabetes Prediction.ipynb - Colab
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = classifier.predict(std_data)
print(prediction)
if (prediction[0] == 0):
print('The person is not diabetic')
else:
print('The person is diabetic')
https://colab.research.google.com/drive/1oxnhMTlomJ4HVhPuowpPFyMt1mwuOuQo?usp=sharing#printMode=true 4/4