Developing a linear Regression Model Using Scikit-Learn
Lab Task Guideline
Understanding Linear Regression
Linear regression is a statistical method that models the relationship between a dependent
variable (y) and one or more independent variables (X) by fitting a linear equation.
Equation: y = b0 + b1X1 + b2X2 + ... + bn*Xn
Lab Task Step
Step 01: Import Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt # For visualization
Step 2: Prepare/Generate Data
You can either:
Load a dataset (e.g., CSV file)
Create synthetic data for practice
Option 1: Create synthetic data
# Generate random data
np.random.seed(42)
X = np.random.rand(100, 1) # 100 samples, 1 feature
y = 2 + 3 * X + np.random.randn(100, 1) # Linear relationship with noise
Option 2: Load a dataset (example with Boston Housing)
from sklearn.datasets import load_diabetes
data = load_diabetes()
X = data.data
y = data.target
Step 3: Split Data into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
Step 4: Create and Train the Linear Regression Model
# Create model instance
model = LinearRegression()
# Train the model
model.fit(X_train, y_train)
Step 5: Make Predictions
# Predict on test set
y_pred = model.predict(X_test)
Step 6: Evaluate the Model
# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")
# For simple linear regression (1 feature), you can visualize
if X.shape[1] == 1:
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted')
plt.xlabel('Feature')
plt.ylabel('Target')
plt.title('Linear Regression Fit')
plt.legend()
plt.show()
Step 7: Interpret the Results
# Print coefficients
print(f"Intercept (b0): {model.intercept_[0]:.2f}" if X.shape[1] == 1 else f"Intercept (b0):
{model.intercept_:.2f}")
for i, coef in enumerate(model.coef_[0] if X.shape[1] == 1 else model.coef_):
print(f"Coefficient for X{i+1}: {coef:.2f}")
3. Complete Code (Simple Linear Regression)
# Import libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# Generate data
np.random.seed(42)
X = np.random.rand(100, 1)
y = 2 + 3 * X + np.random.randn(100, 1)
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train model
model = LinearRegression()
model.fit(X_train, y_train)
# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")
print(f"Intercept: {model.intercept_[0]:.2f}")
print(f"Coefficient: {model.coef_[0][0]:.2f}")
# Visualize
plt.scatter(X_test, y_test, color='blue')
plt.plot(X_test, y_pred, color='red')
plt.title('Simple Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.show()