coe-projects
February 8, 2024
[ ]: # Predictive Analytics for Sales Forecasting
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error
import [Link] as plt
# load data
sales_data = pd.read_csv('[Link]')
print(sales_data.head())
features = sales_data[['Feature1', 'Feature2', '...']]
target = sales_data['Sales']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target,␣
↪test_size=0.2, random_state=42)
# linear regression model
model = LinearRegression()
# Training the model
[Link](X_train, y_train)
#using Predictive modelling for estimate predictions
predictions = [Link](X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')
# Visualizing with matplotlib
[Link](X_test, y_test, color='blue', label='Actual Sales')
[Link](X_test, predictions, color='red', label='Predicted Sales')
[Link]('Feature')
[Link]('Sales')
[Link]()
[Link]()
1
[ ]: #sentiment analysis on social media data
from textblob import TextBlob
# Sample social media comments/reviews
comments = [
"I love this product! It's amazing!",
"The service was terrible, never using it again.",
"I'm not sure about this brand, needs improvement.",
"Best experience ever, highly recommend!"
]
# Perform sentiment analysis on each comment
for comment in comments:
blob = TextBlob(comment)
sentiment = [Link]
if sentiment > 0:
print(f"'{comment}' - Positive")
elif sentiment < 0:
print(f"'{comment}' - Negative")
else:
print(f"'{comment}' - Neutral")
'I love this product! It's amazing!' - Positive
'The service was terrible, never using it again.' - Negative
'I'm not sure about this brand, needs improvement.' - Negative
'Best experience ever, highly recommend!' - Positive
[ ]: # health care Analytics disease
# important libraries
import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns
# libraries for modeling & prediction
from sklearn.model_selection import train_test_split
from [Link] import RandomForestClassifier
from [Link] import accuracy_score, classification_report
from [Link] import SimpleImputer
# Loading file
health_data = pd.read_csv('[Link]')
print(health_data.head())
2
# missing values ko handle kri using SimpleImputer
imputer = SimpleImputer(strategy='mean')
health_data = [Link](imputer.fit_transform(health_data),␣
↪columns=health_data.columns)
# relevant features for the model
features = health_data[['Feature1', 'Feature2', '...']]
target = health_data['Disease']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target,␣
↪test_size=0.2, random_state=42)
# Random Forest prediction
model = RandomForestClassifier(n_estimators=100, random_state=42)
[Link](X_train, y_train)
# prediction of data
predictions = [Link](X_test)
# data modeling part
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report(y_test, predictions))
# Visualize using seaborn and matplotlib
[Link](figsize=(8, 6))
[Link](x=predictions, palette='Set2', label='Predicted')
[Link](x=y_test, palette='Pastel1', label='Actual')
[Link]('Disease')
[Link]('Count')
[Link]('Disease Prediction: Predicted vs. Actual')
[Link]()
[Link]()
[ ]: # recomendation system for e commerce
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from [Link] import rmse
# Load data
reader = Reader(line_format='user item rating', sep=',', rating_scale=(1, 5))
data = Dataset.load_from_file('[Link]', reader=reader)
# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2)
3
# Build and train the model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)
[Link](trainset)
# Evaluate the model
predictions = [Link](testset)
rmse(predictions)
# Make recommendations for a user
user_id = '123'
items_to_ignore = [item[0] for item in [Link][int(user_id)]]
top_n = 10
recommendations = model.get_neighbors(int(user_id), k=top_n)
print(recommendations)
[1]: # natural language processing for text classification
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from [Link] import accuracy_score, classification_report
# Sample data - replace with your own dataset
documents = [
("This product is great", "positive"),
("The service was terrible", "negative"),
("The delivery was fast", "positive"),
("I'm never buying from them again", "negative"),
("Worst experience ever", "negative")
]
# Split data into features and labels
X = [doc[0] for doc in documents]
y = [doc[1] for doc in documents]
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
↪random_state=42)
# Feature extraction using TF-IDF vectorization
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = [Link](X_test)
4
# Build and train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
[Link](X_train_vec, y_train)
# Predictions
y_pred = [Link](X_test_vec)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
Accuracy: 0.00
Classification Report:
precision recall f1-score support
negative 0.00 0.00 0.00 1.0
positive 0.00 0.00 0.00 0.0
accuracy 0.00 1.0
macro avg 0.00 0.00 0.00 1.0
weighted avg 0.00 0.00 0.00 1.0
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
5
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to
0.0 in labels with no predicted samples. Use `zero_division` parameter to
control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0
in labels with no true samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
[ ]: # time series analysis for stock price prediction
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import RandomForestRegressor
from [Link] import mean_squared_error
import [Link] as plt
# Load historical stock price data
# Replace 'stock_data.csv' with your dataset file path or API call to fetch data
stock_data = pd.read_csv('[Link]')
# Calculate financial indicators (e.g., moving averages, RSI, MACD, etc.)
# Here, we'll use a simple moving average as an example
window = 30
stock_data['SMA'] = stock_data['Close'].rolling(window=window).mean()
# Define features and target variable
features = ['SMA'] # Add more financial indicators as needed
target = 'Close'
# Drop rows with missing values
stock_data.dropna(inplace=True)
# Split data into features and target variable
X = stock_data[features]
y = stock_data[target]
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
↪random_state=42)
# Train a Random Forest regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
6
rf_regressor.fit(X_train, y_train)
# Make predictions
y_pred_train = rf_regressor.predict(X_train)
y_pred_test = rf_regressor.predict(X_test)
# Evaluate the model
train_rmse = [Link](mean_squared_error(y_train, y_pred_train))
test_rmse = [Link](mean_squared_error(y_test, y_pred_test))
print(f"Train RMSE: {train_rmse:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")
# Plot actual vs. predicted prices
[Link](figsize=(10, 6))
[Link](stock_data.index, stock_data['Close'], label='Actual Price')
[Link](stock_data.index, [Link]((y_pred_train, y_pred_test)),␣
↪label='Predicted Price')
[Link]('Stock Price Prediction')
[Link]('Date')
[Link]('Price')
[Link]()
[Link]()