建议先看:
如何使用glove,fasttext等词库进行word embedding?(原理篇)
再看本篇。
先睹为快:本文会用到的全部代码:
def get_coefs(word, *arr):
return word, np.asarray(arr, dtype='float32')
def load_embeddings(path):
with open(path) as f:
return dict(get_coefs(*line.strip().split(' ')) for line in f)
def build_matrix(word_index, path):
embedding_index = load_embeddings(path)
embedding_matrix = np.zeros((len(word_index) + 1, 300))
for word, i in word_index.items():
try:
embedding_matrix[i] = embedding_index[word]
except KeyError:
pass
return embedding_matrix
# f为你下载下来的glove/fasttext训练好的模型
embedding_matrix = build_matrix(tokenizer.word_index, f)
#下面是你的模型。我这里是乱写的。
def build_model(embedding_matrix,...):
words = Input(shape=(MAX_LEN,))
x = Embedding(*embedding_matrix.shape, weights=[embedding_matrix], trainable=False)(words)
lstm = CuDNNLSTM(128)(x)
predictions = Dense(2)(lstm)
model = Model(inputs=words, outputs=predictions)
model.compile(loss='binary_crossentropy', optimizer='adam'