SENTIMENTAL ANALYSIS
PROGRAM:
import pandas as pd
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Ensure necessary NLTK resources are downloaded
nltk.download('punkt')
nltk.download('stopwords')
# Simple Sentiment Analysis Function using TextBlob
def simple_sentiment_analysis(text):
analysis = TextBlob(text)
return 'Positive' if analysis.sentiment.polarity > 0 else 'Negative' if analysis.sentiment.polarity
< 0 else 'Neutral'
# Text Preprocessing for Advanced Analysis
def preprocess_text(text):
tokens = word_tokenize(text)
tokens = [word.lower() for word in tokens if word.isalpha()]
tokens = [word for word in tokens if word not in stopwords.words('english')]
return ' '.join(tokens)
# Advanced Sentiment Analysis with scikit-learn
def advanced_sentiment_analysis(df):
# Preprocess texts
df['cleaned_text'] = df['text'].apply(preprocess_text)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(df['cleaned_text'], df['label'], test_size=0.2,
random_state=42)
# Vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
# Training a Linear SVM classifier
classifier = LinearSVC()
classifier.fit(X_train_tfidf, y_train)
# Making predictions and evaluating the model
y_pred = classifier.predict(X_test_tfidf)
print("Advanced Analysis Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
# Example Text for Simple Sentiment Analysis
text_example = "This is an amazing library for natural language processing!"
print("Simple Sentiment Analysis Result:", simple_sentiment_analysis(text_example))
# Sample Data for Advanced Sentiment Analysis
data = {'text': ["I love this phone, its camera is amazing!", "This movie was boring and too long.",
"What a beautiful day!", "I'm not sure how I feel about this."], 'label': ['Positive', 'Negative',
'Positive', 'Neutral']}
df = pd.DataFrame(data)
# Perform Advanced Sentiment Analysis
advanced_sentiment_analysis(df)
OUTPUT:
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Unzipping
tokenizers/punkt.zip. [nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data]
Unzipping corpora/stopwords.zip.
Simple Sentiment Analysis Result: Positive Advanced Analysis Accuracy: 0.0 Classification
Report:
precision recall f1-score support
Negative 0.00 0.00 0.00 1.0
Positive 0.00 0.00 0.00 0.0
accuracy 0.00 1.0
macro avg 0.00 0.00 0.00 1.0
weighted avg 0.00 0.00 0.00 1.0
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels
with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with
no true samples. Use `zero_division` parameter to control this behavior. _warn_prf(average,
modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels
with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with
no true samples. Use `zero_division` parameter to control this behavior. _warn_prf(average,
modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels
with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344:
UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with
no true samples. Use `zero_division` parameter to control this behavior. _warn_prf(average,
modifier, msg_start, len(result))