0% found this document useful (0 votes)
24 views1 page

Gen AI VTUCircle

The document contains various code snippets related to natural language processing and machine learning, utilizing libraries such as Gensim, Scikit-learn, and Transformers. It includes functionalities for loading word embeddings, finding similar words, visualizing word embeddings using PCA and t-SNE, and performing sentiment analysis. Additionally, it demonstrates how to generate text and enrich prompts using OpenAI's API.

Uploaded by

kariw44224
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views1 page

Gen AI VTUCircle

The document contains various code snippets related to natural language processing and machine learning, utilizing libraries such as Gensim, Scikit-learn, and Transformers. It includes functionalities for loading word embeddings, finding similar words, visualizing word embeddings using PCA and t-SNE, and performing sentiment analysis. Additionally, it demonstrates how to generate text and enrich prompts using OpenAI's API.

Uploaded by

kariw44224
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

EXP 01 EXP 02 EXP 03 EXP 04 EXP 05

import gensim.downloader as api import gensim.downloader as api import matplotlib.pyplot as plt plt.show() import gensim.downloader as api import gensim.downloader as api
from scipy.spatial.distance import cosine from sklearn.decomposition import PCA from sklearn.manifold import TSNE def find_similar_words(input_word, top_n=5): import openai from gensim.models import KeyedVectors
import matplotlib.pyplot as plt import numpy as np try: import random import random
print("Loading Word2Vec model...") model = api.load("word2vec-google-news-300") from gensim.models import Word2Vec similar_words = import os model = api.load("glove-wiki-gigaword-100")
model = api.load("word2vec-google-news-300") words = ['computer', 'internet', 'software', medical_corpus = [ model.wv.most_similar(input_word, topn=top_n) word_vectors = api.load("glove-wiki-gigaword- def generate_similar_words(seed_word, topn=10):
print("Model loaded successfully.\n") 'hardware', 'keyboard', 'mouse', 'server', 'network', "The patient was diagnosed with diabetes and print(f"Words similar to '{input_word}':") 100") if seed_word in model:
'programming', 'database'] hypertension.", for word, similarity in similar_words: def get_similar_words(word, topn=3): return [word for word, _ in
vector = model['king'] vectors = [model[word] for word in words] "MRI scans reveal abnormalities in the brain print(f" {word} ({similarity:.2f})") similar = word_vectors.most_similar(word, model.most_similar(seed_word, topn=topn)]
print("First 10 dimensions of 'king' vector:") pca = PCA(n_components=2) tissue.", except KeyError: topn=topn) else:
print(vector[:10], "\n") reduced = pca.fit_transform(vectors) "The treatment involves antibiotics and regular print(f"'{input_word}' not found in return [w[0] for w in similar] return []
input_word = 'computer' monitoring.", vocabulary.") def enrich_prompt(prompt): def create_paragraph(seed_word):
print("Top 10 words most similar to 'king':") similar_words = model.most_similar(input_word, "Symptoms include fever, fatigue, and muscle find_similar_words("treatment") words = prompt.split() similar_words =
for word, similarity in model.most_similar('king'): topn=5) pain.", find_similar_words("vaccine") new_prompt = [] generate_similar_words(seed_word, topn=10)
print(f"{word}: {similarity:.4f}") print(f"Top 5 words similar to '{input_word}':") "The vaccine is effective against several viral for word in words: if not similar_words:
print() for word, score in similar_words: infections.", if word in word_vectors: return f"No similar words found for
print(f"{word}: {score:.4f}") "Doctors recommend physical therapy for EXP 06 similar_words = get_similar_words(word) '{seed_word}'."
result = model.most_similar(positive=['king', plt.figure(figsize=(8, 6)) recovery.", from transformers import pipeline enriched = f"{word} ({', random.shuffle(similar_words)
'woman'], negative=['man'], topn=1) for i, word in enumerate(words): "The clinical trial results were published in the sentiment_pipeline = pipeline("sentiment- '.join(similar_words)})" selected_words = similar_words[:5]
print("Analogy - 'king' - 'man' + 'woman' ≈ ?") plt.scatter(reduced[i, 0], reduced[i, 1]) journal.", analysis") new_prompt.append(enriched)
print(f"Result: {result[0][0]} (Similarity: {result[0] plt.annotate(word, (reduced[i, 0], reduced[i, 1])) "The surgeon performed a minimally invasive input_sentences = [ else: paragraph = f"In a world defined by {seed_word},
[1]:.4f})\n") plt.title("PCA Visualization of Technology Word procedure.", "The new phone I bought is absolutely new_prompt.append(word) "
Embeddings") "The prescription includes pain relievers and anti- amazing!", return " ".join(new_prompt) paragraph += f"people found themselves
print("Analogy - 'paris' + 'italy' - 'france' ≈ ?") plt.xlabel("PC1") inflammatory drugs.", "Worst customer service ever. I'm never coming openai.api_key = os.getenv("OPENAI_API_KEY") surrounded by concepts like {',
for word, similarity in plt.ylabel("PC2") "The diagnosis confirmed a rare genetic back.", def generate_response(prompt): '.join(selected_words[:-1])}, and {selected_words[-
model.most_similar(positive=['paris', 'italy'], plt.grid(True) disorder." "The experience was average, nothing special.", response = openai.ChatCompletion.create( 1]}. "
negative=['france']): plt.show() ] "Fast delivery and the packaging was perfect.", model="gpt-3.5-turbo", paragraph += f"These ideas shaped the way they
print(f"{word}: {similarity:.4f}") processed_corpus = [sentence.lower().split() for "The product broke within two days. Very messages=[ thought, acted, and dreamed. Every step forward in
print() sentence in medical_corpus] disappointed." {"role": "user", "content": prompt} their journey reflected the essence of
model = Word2Vec(sentences=processed_corpus, ] ] '{seed_word}', "
print("Analogy - 'walking' + 'swimming' - 'walk' ≈ ?") EXP 07 vector_size=100, window=5, min_count=1, results = sentiment_pipeline(input_sentences) ) paragraph += f"bringing them closer to
for word, similarity in from transformers import pipeline workers=4, epochs=50) print("Sentiment Analysis Results:\n") return response['choices'][0]['message'] understanding the true meaning of
model.most_similar(positive=['walking', summarizer = pipeline("summarization", words = list(model.wv.index_to_key) for sentence, result in zip(input_sentences, results): ['content'] {selected_words[0]}."
'swimming'], negative=['walk']): model="t5-small") embeddings = np.array([model.wv[word] for word print(f"Input Sentence: {sentence}") original_prompt = "Describe the future of artificial return paragraph
print(f"{word}: {similarity:.4f}") text = """ in words]) print(f"Predicted Sentiment: {result['label']}, intelligence in healthcare." seed = "freedom"
print() Generated Some Text tsne = TSNE(n_components=2, random_state=42, Confidence Score: {result['score']:.2f}\n") enriched_prompt = print(create_paragraph(seed))
""" perplexity=5) enrich_prompt(original_prompt)
similarity = 1 - cosine(model['king'], summary = summarizer(text, max_length=60, tsne_result = tsne.fit_transform(embeddings) print("Original Prompt:", original_prompt)
model['queen']) min_length=30, do_sample=False) plt.figure(figsize=(10, 8)) original_response =
print(f"Cosine similarity between 'king' and plt.scatter(tsne_result[:, 0], tsne_result[:, 1], generate_response(original_prompt)
'queen': {similarity:.4f}") print(summary[0]['summary_text']) color="blue") print("\nOriginal Response:", original_response)
for i, word in enumerate(words): print("\nEnriched Prompt:", enriched_prompt)
plt.text(tsne_result[i, 0] + 0.02, tsne_result[i, 1] + enriched_response =
0.02, word, fontsize=12) generate_response(enriched_prompt)
plt.title("Word Embeddings Visualization (Medical print("\nEnriched Response:", enriched_response)
Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)

EXP 01 EXP 02 EXP 03 EXP 04 EXP 05


import gensim.downloader as api import gensim.downloader as api import matplotlib.pyplot as plt plt.show() import gensim.downloader as api import gensim.downloader as api
from scipy.spatial.distance import cosine from sklearn.decomposition import PCA from sklearn.manifold import TSNE def find_similar_words(input_word, top_n=5): import openai from gensim.models import KeyedVectors
import matplotlib.pyplot as plt import numpy as np try: import random import random
print("Loading Word2Vec model...") model = api.load("word2vec-google-news-300") from gensim.models import Word2Vec similar_words = import os model = api.load("glove-wiki-gigaword-100")
model = api.load("word2vec-google-news-300") words = ['computer', 'internet', 'software', medical_corpus = [ model.wv.most_similar(input_word, topn=top_n) word_vectors = api.load("glove-wiki-gigaword- def generate_similar_words(seed_word, topn=10):
print("Model loaded successfully.\n") 'hardware', 'keyboard', 'mouse', 'server', 'network', "The patient was diagnosed with diabetes and print(f"Words similar to '{input_word}':") 100") if seed_word in model:
'programming', 'database'] hypertension.", for word, similarity in similar_words: def get_similar_words(word, topn=3): return [word for word, _ in
vector = model['king'] vectors = [model[word] for word in words] "MRI scans reveal abnormalities in the brain print(f" {word} ({similarity:.2f})") similar = word_vectors.most_similar(word, model.most_similar(seed_word, topn=topn)]
print("First 10 dimensions of 'king' vector:") pca = PCA(n_components=2) tissue.", except KeyError: topn=topn) else:
print(vector[:10], "\n") reduced = pca.fit_transform(vectors) "The treatment involves antibiotics and regular print(f"'{input_word}' not found in return [w[0] for w in similar] return []
input_word = 'computer' monitoring.", vocabulary.") def enrich_prompt(prompt): def create_paragraph(seed_word):
print("Top 10 words most similar to 'king':") similar_words = model.most_similar(input_word, "Symptoms include fever, fatigue, and muscle find_similar_words("treatment") words = prompt.split() similar_words =
for word, similarity in model.most_similar('king'): topn=5) pain.", find_similar_words("vaccine") new_prompt = [] generate_similar_words(seed_word, topn=10)
print(f"{word}: {similarity:.4f}") print(f"Top 5 words similar to '{input_word}':") "The vaccine is effective against several viral for word in words: if not similar_words:
print() for word, score in similar_words: infections.", if word in word_vectors: return f"No similar words found for
print(f"{word}: {score:.4f}") "Doctors recommend physical therapy for EXP 06 similar_words = get_similar_words(word) '{seed_word}'."
result = model.most_similar(positive=['king', plt.figure(figsize=(8, 6)) recovery.", from transformers import pipeline enriched = f"{word} ({', random.shuffle(similar_words)
'woman'], negative=['man'], topn=1) for i, word in enumerate(words): "The clinical trial results were published in the sentiment_pipeline = pipeline("sentiment- '.join(similar_words)})" selected_words = similar_words[:5]
print("Analogy - 'king' - 'man' + 'woman' ≈ ?") plt.scatter(reduced[i, 0], reduced[i, 1]) journal.", analysis") new_prompt.append(enriched)
print(f"Result: {result[0][0]} (Similarity: {result[0] plt.annotate(word, (reduced[i, 0], reduced[i, 1])) "The surgeon performed a minimally invasive input_sentences = [ else: paragraph = f"In a world defined by {seed_word},
[1]:.4f})\n") plt.title("PCA Visualization of Technology Word procedure.", "The new phone I bought is absolutely new_prompt.append(word) "
Embeddings") "The prescription includes pain relievers and anti- amazing!", return " ".join(new_prompt) paragraph += f"people found themselves
print("Analogy - 'paris' + 'italy' - 'france' ≈ ?") plt.xlabel("PC1") inflammatory drugs.", "Worst customer service ever. I'm never coming openai.api_key = os.getenv("OPENAI_API_KEY") surrounded by concepts like {',
for word, similarity in plt.ylabel("PC2") "The diagnosis confirmed a rare genetic back.", def generate_response(prompt): '.join(selected_words[:-1])}, and {selected_words[-
model.most_similar(positive=['paris', 'italy'], plt.grid(True) disorder." "The experience was average, nothing special.", response = openai.ChatCompletion.create( 1]}. "
negative=['france']): plt.show() ] "Fast delivery and the packaging was perfect.", model="gpt-3.5-turbo", paragraph += f"These ideas shaped the way they
print(f"{word}: {similarity:.4f}") processed_corpus = [sentence.lower().split() for "The product broke within two days. Very messages=[ thought, acted, and dreamed. Every step forward in
print() sentence in medical_corpus] disappointed." {"role": "user", "content": prompt} their journey reflected the essence of
model = Word2Vec(sentences=processed_corpus, ] ] '{seed_word}', "
print("Analogy - 'walking' + 'swimming' - 'walk' ≈ ?") EXP 07 vector_size=100, window=5, min_count=1, results = sentiment_pipeline(input_sentences) ) paragraph += f"bringing them closer to
for word, similarity in from transformers import pipeline workers=4, epochs=50) print("Sentiment Analysis Results:\n") return response['choices'][0]['message'] understanding the true meaning of
model.most_similar(positive=['walking', summarizer = pipeline("summarization", words = list(model.wv.index_to_key) for sentence, result in zip(input_sentences, results): ['content'] {selected_words[0]}."
'swimming'], negative=['walk']): model="t5-small") embeddings = np.array([model.wv[word] for word print(f"Input Sentence: {sentence}") original_prompt = "Describe the future of artificial return paragraph
print(f"{word}: {similarity:.4f}") text = """ in words]) print(f"Predicted Sentiment: {result['label']}, intelligence in healthcare." seed = "freedom"
print() Generated Some Text tsne = TSNE(n_components=2, random_state=42, Confidence Score: {result['score']:.2f}\n") enriched_prompt = print(create_paragraph(seed))
""" perplexity=5) enrich_prompt(original_prompt)
similarity = 1 - cosine(model['king'], summary = summarizer(text, max_length=60, tsne_result = tsne.fit_transform(embeddings) print("Original Prompt:", original_prompt)
model['queen']) min_length=30, do_sample=False) plt.figure(figsize=(10, 8)) original_response =
print(f"Cosine similarity between 'king' and plt.scatter(tsne_result[:, 0], tsne_result[:, 1], generate_response(original_prompt)
'queen': {similarity:.4f}") print(summary[0]['summary_text']) color="blue") print("\nOriginal Response:", original_response)
for i, word in enumerate(words): print("\nEnriched Prompt:", enriched_prompt)
plt.text(tsne_result[i, 0] + 0.02, tsne_result[i, 1] + enriched_response =
0.02, word, fontsize=12) generate_response(enriched_prompt)
plt.title("Word Embeddings Visualization (Medical print("\nEnriched Response:", enriched_response)
Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)

EXP 01 EXP 02 EXP 03 EXP 04 EXP 05


import gensim.downloader as api import gensim.downloader as api import matplotlib.pyplot as plt plt.show() import gensim.downloader as api import gensim.downloader as api
from scipy.spatial.distance import cosine from sklearn.decomposition import PCA from sklearn.manifold import TSNE def find_similar_words(input_word, top_n=5): import openai from gensim.models import KeyedVectors
import matplotlib.pyplot as plt import numpy as np try: import random import random
print("Loading Word2Vec model...") model = api.load("word2vec-google-news-300") from gensim.models import Word2Vec similar_words = import os model = api.load("glove-wiki-gigaword-100")
model = api.load("word2vec-google-news-300") words = ['computer', 'internet', 'software', medical_corpus = [ model.wv.most_similar(input_word, topn=top_n) word_vectors = api.load("glove-wiki-gigaword- def generate_similar_words(seed_word, topn=10):
print("Model loaded successfully.\n") 'hardware', 'keyboard', 'mouse', 'server', 'network', "The patient was diagnosed with diabetes and print(f"Words similar to '{input_word}':") 100") if seed_word in model:
'programming', 'database'] hypertension.", for word, similarity in similar_words: def get_similar_words(word, topn=3): return [word for word, _ in
vector = model['king'] vectors = [model[word] for word in words] "MRI scans reveal abnormalities in the brain print(f" {word} ({similarity:.2f})") similar = word_vectors.most_similar(word, model.most_similar(seed_word, topn=topn)]
print("First 10 dimensions of 'king' vector:") pca = PCA(n_components=2) tissue.", except KeyError: topn=topn) else:
print(vector[:10], "\n") reduced = pca.fit_transform(vectors) "The treatment involves antibiotics and regular print(f"'{input_word}' not found in return [w[0] for w in similar] return []
input_word = 'computer' monitoring.", vocabulary.") def enrich_prompt(prompt): def create_paragraph(seed_word):
print("Top 10 words most similar to 'king':") similar_words = model.most_similar(input_word, "Symptoms include fever, fatigue, and muscle find_similar_words("treatment") words = prompt.split() similar_words =
for word, similarity in model.most_similar('king'): topn=5) pain.", find_similar_words("vaccine") new_prompt = [] generate_similar_words(seed_word, topn=10)
print(f"{word}: {similarity:.4f}") print(f"Top 5 words similar to '{input_word}':") "The vaccine is effective against several viral for word in words: if not similar_words:
print() for word, score in similar_words: infections.", if word in word_vectors: return f"No similar words found for
print(f"{word}: {score:.4f}") "Doctors recommend physical therapy for EXP 06 similar_words = get_similar_words(word) '{seed_word}'."
result = model.most_similar(positive=['king', plt.figure(figsize=(8, 6)) recovery.", from transformers import pipeline enriched = f"{word} ({', random.shuffle(similar_words)
'woman'], negative=['man'], topn=1) for i, word in enumerate(words): "The clinical trial results were published in the sentiment_pipeline = pipeline("sentiment- '.join(similar_words)})" selected_words = similar_words[:5]
print("Analogy - 'king' - 'man' + 'woman' ≈ ?") plt.scatter(reduced[i, 0], reduced[i, 1]) journal.", analysis") new_prompt.append(enriched)
print(f"Result: {result[0][0]} (Similarity: {result[0] plt.annotate(word, (reduced[i, 0], reduced[i, 1])) "The surgeon performed a minimally invasive input_sentences = [ else: paragraph = f"In a world defined by {seed_word},
[1]:.4f})\n") plt.title("PCA Visualization of Technology Word procedure.", "The new phone I bought is absolutely new_prompt.append(word) "
Embeddings") "The prescription includes pain relievers and anti- amazing!", return " ".join(new_prompt) paragraph += f"people found themselves
print("Analogy - 'paris' + 'italy' - 'france' ≈ ?") plt.xlabel("PC1") inflammatory drugs.", "Worst customer service ever. I'm never coming openai.api_key = os.getenv("OPENAI_API_KEY") surrounded by concepts like {',
for word, similarity in plt.ylabel("PC2") "The diagnosis confirmed a rare genetic back.", def generate_response(prompt): '.join(selected_words[:-1])}, and {selected_words[-
model.most_similar(positive=['paris', 'italy'], plt.grid(True) disorder." "The experience was average, nothing special.", response = openai.ChatCompletion.create( 1]}. "
negative=['france']): plt.show() ] "Fast delivery and the packaging was perfect.", model="gpt-3.5-turbo", paragraph += f"These ideas shaped the way they
print(f"{word}: {similarity:.4f}") processed_corpus = [sentence.lower().split() for "The product broke within two days. Very messages=[ thought, acted, and dreamed. Every step forward in
print() sentence in medical_corpus] disappointed." {"role": "user", "content": prompt} their journey reflected the essence of
model = Word2Vec(sentences=processed_corpus, ] ] '{seed_word}', "
print("Analogy - 'walking' + 'swimming' - 'walk' ≈ ?") EXP 07 vector_size=100, window=5, min_count=1, results = sentiment_pipeline(input_sentences) ) paragraph += f"bringing them closer to
for word, similarity in from transformers import pipeline workers=4, epochs=50) print("Sentiment Analysis Results:\n") return response['choices'][0]['message'] understanding the true meaning of
model.most_similar(positive=['walking', summarizer = pipeline("summarization", words = list(model.wv.index_to_key) for sentence, result in zip(input_sentences, results): ['content'] {selected_words[0]}."
'swimming'], negative=['walk']): model="t5-small") embeddings = np.array([model.wv[word] for word print(f"Input Sentence: {sentence}") original_prompt = "Describe the future of artificial return paragraph
print(f"{word}: {similarity:.4f}") text = """ in words]) print(f"Predicted Sentiment: {result['label']}, intelligence in healthcare." seed = "freedom"
print() Generated Some Text tsne = TSNE(n_components=2, random_state=42, Confidence Score: {result['score']:.2f}\n") enriched_prompt = print(create_paragraph(seed))
""" perplexity=5) enrich_prompt(original_prompt)
similarity = 1 - cosine(model['king'], summary = summarizer(text, max_length=60, tsne_result = tsne.fit_transform(embeddings) print("Original Prompt:", original_prompt)
model['queen']) min_length=30, do_sample=False) plt.figure(figsize=(10, 8)) original_response =
print(f"Cosine similarity between 'king' and plt.scatter(tsne_result[:, 0], tsne_result[:, 1], generate_response(original_prompt)
'queen': {similarity:.4f}") print(summary[0]['summary_text']) color="blue") print("\nOriginal Response:", original_response)
for i, word in enumerate(words): print("\nEnriched Prompt:", enriched_prompt)
plt.text(tsne_result[i, 0] + 0.02, tsne_result[i, 1] + enriched_response =
0.02, word, fontsize=12) generate_response(enriched_prompt)
plt.title("Word Embeddings Visualization (Medical print("\nEnriched Response:", enriched_response)
Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)

You might also like