NLP (1)
NLP (1)
PROGRAM 1
import nltk
text = " If you drive to the sun at 55 mph, it would take you about 193 years"
print(sent_tokenize(text))
print(word_tokenize(text))
OUTPUT :
PROGRAM 2
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(example_sent)
filtered_sentence = []
for w in word_tokens:
if w not in stop_words:
filtered_sentence.append(w)
print(word_tokens)
print(filtered_sentence)
OUTPUT :
PROGRAM 3
lemmatizer = WordNetLemmatizer()
OUTPUT :
PROGRAM 4
ps = PorterStemmer()
words = ["likes","liked","likely","liking"]
for w in words:
print(w,":",ps.stem(w))
OUTPUT :
PROGRAM 5
list = []
wordFromList2 = wordnet.synsets(word2)[0]
s = wordFromList1.wup_similarity(wordFromList2)
list.append(s)
print(max(list))
OUTPUT :
PROGRAM 6
import nltk
def extract_named_entities(text):
words = word_tokenize(text)
pos_tags = pos_tag(words)
named_entities = ne_chunk(pos_tags)
return named_entities
text ="Albert Einsten was born in Germany. He was awareded the nobel prize in physics in 1921"
named_entities = extract_named_entities(text)
named_entities.draw()
OUTPUT :
PROGRAM 7
import nltk
def analyse_sentiment(text):
sid = SentimentIntensityAnalyzer()
sentiment_score = sid.polarity_scores(text)
return sentiment_score
sentiment_scores = analyse_sentiment(text)
OUTPUT :
PROGRAM 8
import os
tts.save("output.mp3")
os.system("start output.mp3")
if __name__ == "__main__":
text_to_speech(text)
OUTPUT :
PROGRAM 9
import speech_recognition as sr
recognizer = sr.Recognizer()
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
print('transcription:{}'.format(text))
except sr.UnknownValueError:
except sr.RequestError:
audio_file_path ="Y:/ttsMP3.com_VoiceText_2025-1-29_11-40-18.wav"
audio_file_to_text(audio_file_path)
OUTPUT :
PROGRAM 10
import nltk
pairs = [
],
r"hi|hello|hey",
],
[
],
["I'm doing well, thank you!", "I'm great, how about you?"]
],
r"sorry (.*)",
],
r"quit",
],
chatbot.converse()
OUTPUT :
PROGRAM 12
import re
emails = [
return [email for email in emails if any(re.search(keyword, email, re.IGNORECASE) for keyword in
keywords)]
print("Spam Emails:")
print(email)
OUTPUT :
PROGRAM 13
import nltk
import re
# Sample text
print(text)
# Lowercase
text = text.lower()
# Remove punctuation
# Tokenize
tokenize_text = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
# Stemming
ps = PorterStemmer()
word = ["Likes","Liking","Liked"]
# Lemmatization
lemmatizer = WordNetLemmatizer()
word1 = ["rocks","corpora"]
print("Lower Text:",text)
print("Tokenization:",tokenize_text)
print("Remove Punctuation:",removepun_text)
print("Stemmed Words:",stemmed_words)
print("Lemmatized Words:",lemmatized_words)
OUTPUT :
PROGRAM 14
import nltk
def pos_tagging(sentence):
words = word_tokenize(sentence)
tagged_words = pos_tag(words)
return tagged_words
sentence = "The quick brown fox jumps over the lazy dog."
tagged_sentence = pos_tagging(sentence)
OUTPUT :
PROGRAM 15
import nltk
import re
def extract_entities(sent):
chunked = ne_chunk(pos_tag(word_tokenize(sent)))
entities = []
current_entity = []
if type(subtree) == Tree:
entity_type = subtree.label()
entities.append((entity_name, entity_type))
return entities
def extract_relations(text):
sentences = nltk.sent_tokenize(text)
relations = []
entities = extract_entities(sent)
if len(entities) >= 2:
relations.append(relation)
return relations
# Sample text
text = "Barack Obama was born in Hawaii. He was elected president of the United States in 2008."
relations = extract_relations(text)
OUTPUT :
PROGRAM 16
import nltk
import gensim
nltk.download('stopwords')
nltk.download('punkt')
documents = [
"Natural language processing techniques are used to analyze and generate human language.",
"Deep learning models have significantly improved the accuracy of speech recognition systems.",
stop_words = set(stopwords.words('english'))
def preprocess(doc):
tokens = word_tokenize(doc.lower())
return tokens
dictionary = corpora.Dictionary(processed_docs)
OUTPUT :