0% found this document useful (0 votes)
4 views

NLP (1)

Uploaded by

hemavarshu1521
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

NLP (1)

Uploaded by

hemavarshu1521
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

NLP

PROGRAM 1

import nltk

from nltk.tokenize import sent_tokenize, word_tokenize

text = " If you drive to the sun at 55 mph, it would take you about 193 years"

print(sent_tokenize(text))

print(word_tokenize(text))

OUTPUT :

PROGRAM 2

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

example_sent = """This is a sample sentence,

showing off the stop words filtration."""

stop_words = set(stopwords.words('english'))

word_tokens = word_tokenize(example_sent)

filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]

filtered_sentence = []

for w in word_tokens:

if w not in stop_words:

filtered_sentence.append(w)

print(word_tokens)

print(filtered_sentence)

OUTPUT :
PROGRAM 3

from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

print("rocks :", lemmatizer.lemmatize("rocks"))

print("corpora :", lemmatizer.lemmatize("corpora"))

print("better :", lemmatizer.lemmatize("better", pos="a"))

OUTPUT :

PROGRAM 4

from nltk.stem import PorterStemmer

from nltk.tokenize import word_tokenize

ps = PorterStemmer()

words = ["likes","liked","likely","liking"]

for w in words:

print(w,":",ps.stem(w))

OUTPUT :

PROGRAM 5

from nltk.corpus import wordnet

list1 = ['Compare', 'require']

list2 = ['choose', 'copy']

list = []

for word1 in list1:

for word2 in list2:


wordFromList1 = wordnet.synsets(word1)[0]

wordFromList2 = wordnet.synsets(word2)[0]

s = wordFromList1.wup_similarity(wordFromList2)

list.append(s)

print(max(list))

OUTPUT :

PROGRAM 6

import nltk

from nltk import word_tokenize, pos_tag, ne_chunk

def extract_named_entities(text):

words = word_tokenize(text)

pos_tags = pos_tag(words)

named_entities = ne_chunk(pos_tags)

return named_entities

text ="Albert Einsten was born in Germany. He was awareded the nobel prize in physics in 1921"

named_entities = extract_named_entities(text)

named_entities.draw()

OUTPUT :
PROGRAM 7

import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer

def analyse_sentiment(text):

sid = SentimentIntensityAnalyzer()

sentiment_score = sid.polarity_scores(text)

return sentiment_score

text = "I love this product! it is a"

sentiment_scores = analyse_sentiment(text)

print(f"Sentiment scores : {sentiment_scores}")

OUTPUT :

PROGRAM 8

from gtts import gTTS

import os

def text_to_speech(text, lang='en'):

tts = gTTS(text=text, lang=lang)

tts.save("output.mp3")

os.system("start output.mp3")

if __name__ == "__main__":

text = "Hello, this is a text-to-speech conversion example."

text_to_speech(text)

OUTPUT :
PROGRAM 9

import speech_recognition as sr

recognizer = sr.Recognizer()

def audio_file_to_text (file_path):

with sr.AudioFile(file_path) as source:

audio_data = recognizer.record(source)

try:

text = recognizer.recognize_google(audio_data)

print('transcription:{}'.format(text))

except sr.UnknownValueError:

print("Sorry, I couldn't understand")

except sr.RequestError:

print("couldn't request results from google speech recognition service")

audio_file_path ="Y:/ttsMP3.com_VoiceText_2025-1-29_11-40-18.wav"

audio_file_to_text(audio_file_path)

OUTPUT :

PROGRAM 10

import nltk

from nltk.chat.util import Chat, reflections

pairs = [

r"my name is (.*)",

["Hello %1, how can I help you today?",]

],

r"hi|hello|hey",

["Hello!", "Hi there!", "Hey!"]

],
[

r"what is your name?",

["I am a chatbot created using NLTK.",]

],

r"how are you?",

["I'm doing well, thank you!", "I'm great, how about you?"]

],

r"sorry (.*)",

["It's okay, no worries.", "No problem at all."]

],

r"quit",

["Goodbye! Have a great day!"]

],

chatbot = Chat(pairs, reflections)

print("Hi! I'm a chatbot. Type 'quit' to exit.")

chatbot.converse()

OUTPUT :
PROGRAM 12

import re

emails = [

"1.Congratulation you've won a free iPhone click here to claim now",

"2.Dear customer, your bank account needs verification",

"3.Meeting at 5pm, please be on time",

"4.Win $10,000 easily! just sign up now"

spam_keywords = ["click here", "win", "verification"]

def filter_spam(emails, keywords):

return [email for email in emails if any(re.search(keyword, email, re.IGNORECASE) for keyword in
keywords)]

spam_emails = filter_spam(emails, spam_keywords)

print("Spam Emails:")

for email in spam_emails:

print(email)

OUTPUT :

PROGRAM 13

import nltk

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

from nltk.stem import PorterStemmer, WordNetLemmatizer

import re

# Sample text

text = "This is a Sample Sentence.!!"

print(text)

# Lowercase
text = text.lower()

# Remove punctuation

removepun_text = re.sub(r'[^\w\s]', '', text)

# Tokenize

tokenize_text = word_tokenize(text)

# Remove stopwords

stop_words = set(stopwords.words('english'))

words = [word for word in tokenize_text if word not in stop_words]

# Stemming

ps = PorterStemmer()

word = ["Likes","Liking","Liked"]

stemmed_words = [ps.stem(word) for word in word]

# Lemmatization

lemmatizer = WordNetLemmatizer()

word1 = ["rocks","corpora"]

lemmatized_words = [lemmatizer.lemmatize(word) for word in word1]

print("Lower Text:",text)

print("Tokenization:",tokenize_text)

print("Stop Word Removal:",words)

print("Remove Punctuation:",removepun_text)

print("Stemmed Words:",stemmed_words)

print("Lemmatized Words:",lemmatized_words)

OUTPUT :
PROGRAM 14

import nltk

from nltk.tokenize import word_tokenize

from nltk.tag import pos_tag

def pos_tagging(sentence):

words = word_tokenize(sentence)

tagged_words = pos_tag(words)

return tagged_words

sentence = "The quick brown fox jumps over the lazy dog."

tagged_sentence = pos_tagging(sentence)

print("Tagged Sentence:", tagged_sentence)

OUTPUT :

PROGRAM 15

import nltk

import re

from nltk import word_tokenize, pos_tag, ne_chunk

from nltk.tree import Tree

def extract_entities(sent):

chunked = ne_chunk(pos_tag(word_tokenize(sent)))

entities = []

current_entity = []

for subtree in chunked:

if type(subtree) == Tree:

entity_name = " ".join([token for token, pos in subtree.leaves()])

entity_type = subtree.label()

entities.append((entity_name, entity_type))

return entities
def extract_relations(text):

sentences = nltk.sent_tokenize(text)

relations = []

for sent in sentences:

entities = extract_entities(sent)

if len(entities) >= 2:

for i, ent1 in enumerate(entities):

for ent2 in entities[i + 1:]:

relation = (ent1[0], ent1[1], ent2[0], ent2[1])

relations.append(relation)

return relations

# Sample text

text = "Barack Obama was born in Hawaii. He was elected president of the United States in 2008."

relations = extract_relations(text)

for relation in relations:

print(f"Entity 1: {relation[0]} (Type: {relation[1]}) - Entity 2: {relation[2]} (Type: {relation[3]})")

OUTPUT :

PROGRAM 16

import nltk

import gensim

from gensim import corpora

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

# Download necessary NLTK data files

nltk.download('stopwords')
nltk.download('punkt')

# Sample text data

documents = [

"Artificial intelligence and machine learning are revolutionizing various industries.",

"Natural language processing techniques are used to analyze and generate human language.",

"Deep learning models have significantly improved the accuracy of speech recognition systems.",

"Computer vision enables machines to interpret and understand visual information.",

"Neural networks are a key component of many modern AI systems."

# Preprocess the text data

stop_words = set(stopwords.words('english'))

def preprocess(doc):

tokens = word_tokenize(doc.lower())

tokens = [word for word in tokens if word.isalnum()]

tokens = [word for word in tokens if word not in stop_words]

return tokens

processed_docs = [preprocess(doc) for doc in documents]

# Create a dictionary and corpus

dictionary = corpora.Dictionary(processed_docs)

corpus = [dictionary.doc2bow(doc) for doc in processed_docs]

# Train the LDA model

lda_model = gensim.models.ldamodel.LdaModel(corpus, num_topics=2, id2word=dictionary,


passes=10)

# Print the topics


for idx, topic in lda_model.print_topics(-1):

print(f"Topic {idx}: {topic}")

OUTPUT :

You might also like