Housing prices linear regression
Housing prices linear regression
import pandas as pd
data=pd.read_csv("Housing.csv")
data
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
... ... ... ... ... ... ... ... ... ... ... ...
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
data.head(10)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
data.shape #tells us the number of rows and columns present in the csv file.
(545, 13)
data.info() #this returns not null values,column,datatype,and information about the data.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 545 non-null int64
1 area 545 non-null int64
2 bedrooms 545 non-null int64
3 bathrooms 545 non-null int64
4 stories 545 non-null int64
5 mainroad 545 non-null object
6 guestroom 545 non-null object
7 basement 545 non-null object
8 hotwaterheating 545 non-null object
9 airconditioning 545 non-null object
10 parking 545 non-null int64
11 prefarea 545 non-null object
12 furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
from sklearn.preprocessing import LabelEncoder, MinMaxScaler #this command will convert object datatype into integer
le=LabelEncoder() #it converts the categorical entries into numerical entries.
data["mainroad"]=le.fit_transform(data["mainroad"])
data
#change raw feature vectors into a representation that is more suitable for the downstream estimators-sklearn.preproc
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
... ... ... ... ... ... ... ... ... ... ... ...
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 0 1 2
1 12250000 8960 4 4 4 1 0 0 0 1 3
2 12250000 9960 3 2 2 1 0 1 0 0 2
3 12215000 7500 4 2 2 1 0 1 0 1 3
4 11410000 7420 4 1 2 1 1 1 0 1 2
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 0 1 2
1 12250000 8960 4 4 4 1 0 0 0 1 3
2 12250000 9960 3 2 2 1 0 1 0 0 2
3 12215000 7500 4 2 2 1 0 1 0 1 3
4 11410000 7420 4 1 2 1 1 1 0 1 2
x=data.drop(columns=["price"])
y=data["price"]
y=y.values.reshape(-1,1)
scaler=MinMaxScaler()
x=scaler.fit_transform(x)
y=scaler.fit_transform(y)
lr=LinearRegression()
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)
mae=mean_absolute_error(y_test,y_predict)
mse=mean_squared_error(y_test,y_predict)
r2=r2_score(y_test,y_predict)
print(mae,mse,r2)