【word2vec实例2】加载模型

本文展示了如何加载预训练的word2vec模型,并利用该模型进行相似度计算。通过遍历词汇列表,对每个单词找到与之最相似的类别,使用cosine相似度作为相似度指标。此外,还提供了平均向量的计算方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

current_dir = os.path.abspath('.')
w2v_file = os.path.join(current_dir, 'w2v_file_sg')
self.dic = gensim.models.Word2Vec.load(w2v_file)

for word in word_list:
    if word not in self.dic.wv.vocab:
        continue
    else:
        result = self.get_max_similar(word,sim_value)
        if result != '':
            ret.append(result)
def get_max_similar(self, word,sim_value):
    word_vec = self.dic.wv[word]
    lab1_key1=self.getKey()
    lab_key_list = lab1_key1[0]
    lab_key_lab = lab1_key1[1]
    max = 0.0
    result = ''

    for i in range(len(lab_key_list)):
        each = self.cos_sim(word_vec, lab_key_list[i])
        if each > max:
            max = each
            result = lab_key_lab[i]

    print(""+word+"】与【类别:"+result+",相似度为==="+str(max))
    if max < sim_value:
        result = ''
    return result
def cos_sim(self, arrA, arrB):
    aa = arrA.dot(arrB) / (np.linalg.norm(arrA) * np.linalg.norm(arrB))
    return aa

def average(self, key_list):
    sum = 0.0
    average = 0.0
    for key in key_list:
        sum = sum + self.dic.wv[key]

    average = sum / len(key_list)
    return average