Practical: 1 Document Sentiment Classification with TF-IDF and Logistic Regression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
# Sample texts and labels
texts = ["I love this product!", "This is the worst experience ever.", "It's okay, not great but
not bad either."]
labels = ["Positive", "Negative", "Neutral"]
# Data preprocessing
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)
y = labels
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# Model training
model = LogisticRegression()
model.fit(X_train, y_train)
# Prediction
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
Practical 2: Aspect-Based Sentiment Analysis
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample texts
text = "The battery life of this phone is great, but the camera quality is poor."
# Aspect extraction and sentiment analysis
doc = nlp(text)
aspects = []
for token in doc:
if token.dep_ in ['nsubj', 'dobj']:
aspects.append(token.text)
print("Aspects:", aspects)
Practical 3: Sentiment Rating Prediction with Regression Models
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
# Sample data
texts = ["I love this product!", "This is the worst experience ever."]
ratings = [5, 1]
# Data preprocessing
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, ratings, test_size=0.33,
random_state=42)
# Model training
model = SVR(kernel='linear')
model.fit(X_train, y_train)
# Prediction and Evaluation
y_pred = model.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
Practical 4: Unsupervised Sentiment Analysis Using Lexicon-Based Methods
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer
# Sample texts
texts = ["I love this product!", "This is the worst experience ever."]
# Use TextBlob for sentiment analysis
analyzer = NaiveBayesAnalyzer()
for text in texts:
sentiment = analyzer.analyze(text)
print(f"Text: {text}, Sentiment: {sentiment.classification}, Confidence:
{sentiment.p_pos}")
Practical 5: Sentence Subjectivity and Sentiment Classification
from textblob import TextBlob
# Sample sentences
sentences = ["I love this product!", "The weather is nice today.", "I am not happy with the
service."]
for sentence in sentences:
blob = TextBlob(sentence)
subjectivity = blob.sentiment.subjectivity
sentiment = 'Positive' if blob.sentiment.polarity > 0 else 'Negative' if
blob.sentiment.polarity < 0 else 'Neutral'
print(f"Sentence: '{sentence}', Subjectivity: {'Subjective' if subjectivity > 0.5 else
'Objective'}, Sentiment: {sentiment}")
Practical 6: Sentence Subjectivity Classification Using Supervised Learning
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
# Sample data
texts = ["I love this product!", "The weather is nice today.", "I am not happy with the
service."]
labels = ["Subjective", "Objective", "Subjective"]
# Data preprocessing
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)
y = labels
# Model training
model = LogisticRegression()
model.fit(X, y)
# Prediction
y_pred = model.predict(X)
print(classification_report(y, y_pred))
Practical 7: Frequency-Based Aspect Extraction
from nltk import FreqDist, word_tokenize
import nltk
nltk.download('punkt')
# Sample text
reviews = ["The battery life is great.", "The camera quality is poor. Battery life is not good.",
"The phone is fast but the battery drains quickly."]
# Tokenization and frequency analysis
all_words = []
for review in reviews:
tokens = word_tokenize(review.lower())
all_words.extend(tokens)
freq_dist = FreqDist(all_words)
most_common_aspects = freq_dist.most_common(10)
print("Most Common Aspects:", most_common_aspects)
Practical 8: Implementing Rules of Sentiment Composition
from textblob import TextBlob
# Sample text
text = "I love the product, but the delivery was terrible."
# Sentiment analysis
blob = TextBlob(text)
sentences = blob.sentences
# Aggregating sentiment scores
total_polarity = sum(sentence.sentiment.polarity for sentence in sentences)
average_polarity = total_polarity / len(sentences)
sentiment = 'Positive' if average_polarity > 0 else 'Negative' if average_polarity < 0 else
'Neutral'
print(f"Overall Sentiment: {sentiment}")
Practical 9: Handling Negation in Sentiment Analysis
from textblob import TextBlob
# Sample text
text = "I do not like this product."
# Sentiment analysis
blob = TextBlob(text)
print(f"Text: '{text}', Sentiment: {'Positive' if blob.sentiment.polarity > 0 else 'Negative' if
blob.sentiment.polarity < 0 else 'Neutral'}")
Practical 10: Aspect and Entity Extraction
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample text
text = "The battery life of this phone is fantastic, but the camera quality is poor."
# Entity extraction
doc = nlp(text)
for ent in doc.ents:
print(f"Entity: {ent.text}, Label: {ent.label_}")
Practical 11: Create a sentiment lexicon from scratch using text data.
from nltk.corpus import movie_reviews
from nltk import FreqDist
# Download movie_reviews dataset
import nltk
nltk.download('movie_reviews')
# Extract words from the movie reviews dataset
words = [word for fileid in movie_reviews.fileids() for word in
movie_reviews.words(fileid)]
freq_dist = FreqDist(words)
# Create sentiment lexicon with basic scores
positive_words = [word for word in freq_dist if freq_dist[word] > 500]
negative_words = [word for word in freq_dist if freq_dist[word] < 200]
print("Positive Words:", positive_words[:10])
print("Negative Words:", negative_words[:10])
Practical 12: Dictionary-Based Sentiment Analysis
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer
# Sample text and sentiment lexicon
text = "I love the food but the service was terrible."
lexicon = {'love': 1, 'terrible': -1} # Simple sentiment lexicon
# Basic sentiment analysis
analyzer = NaiveBayesAnalyzer()
sentiments = [lexicon.get(word, 0) for word in text.lower().split()]
overall_sentiment = sum(sentiments)
print(f"Sentiment Score: {overall_sentiment}")
Practical 13: Corpus-Based Sentiment Analysis Approach
from nltk.corpus import movie_reviews
from nltk import FreqDist, bigrams
import nltk
nltk.download('movie_reviews')
# Build a co-occurrence matrix
corpus = [list(movie_reviews.words(fileid)) for fileid in
movie_reviews.fileids()]
word_pairs = [pair for review in corpus for pair in bigrams(review)]
pair_freq = FreqDist(word_pairs)
# Example of calculating sentiment from co-occurrence
print(pair_freq.most_common(10))
Practical 14: Sentiment Word Embedding
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
# Sample text data
text = "I am very happy with the product but the delivery was delayed."
# Tokenize text
tokens = word_tokenize(text.lower())
# Load a pre-trained Word2Vec model
model = Word2Vec(tokens, vector_size=100, window=5, min_count=1, sg=1)
# Example: Get word vectors and perform a simple sentiment analysis
happy_vector = model.wv['happy']
delayed_vector = model.wv['delayed']
print("Happy Vector:", happy_vector)
print("Delayed Vector:", delayed_vector)
Practical 15: Analysis of Comparative Opinions
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample text with comparative opinions
text = "The new model is better than the old one, but the price is higher."
# Analyze text
doc = nlp(text)
comparatives = [token for token in doc if token.dep_ == 'amod' and
token.text in ['better', 'worse', 'more', 'less']]
print(f"Comparative Terms: {[comp.text for comp in comparatives]}")
Practical 16: Identifying Comparative Sentences
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample text
text = "The new phone is much better than the old one."
# Identify comparative sentences
doc = nlp(text)
comparative_sentences = [sent.text for sent in doc.sents if any(tok.dep_ in
['amod', 'comp'] for tok in sent)]
print("Comparative Sentences:", comparative_sentences)
Practical 17: Identifying the Preferred Entity Set
import spacy
# Load spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample comparative text
text = "The iPhone 12 is better than the Samsung Galaxy S21 in terms of
performance."
# Analyze text
doc = nlp(text)
entities = [ent.text for ent in doc.ents]
preferred_entity = "iPhone 12"
print(f"Entities: {entities}")
print(f"Preferred Entity: {preferred_entity}")