目录
135-LSTM网络简介
LSTM 网络
Long Short Term 网络(LSTM)是一种 RNN 特殊的类型可以学习长期依赖信息在很多问题,LSTM 都取得相当巨大的成功,并得到了广泛的使用,它是RNN事实上的标准。
LSTM的窍门在于拥有一个固定权值为1的自连接,以及一个线性激活函数,因此其局部偏导为1。这样的话,误差就能在时间步中传递,而不会消失或爆炸.
LSTM通过门对通过的信息进行控制:门是一种让信息选择式通过的方法。LSTM通过门可以让信息不通过完全通过、通过一部份。
LSTM网络结构图
136-LSTM实战
【参考文档】 17-2LSTM网络.ipynb
【导出代码】
# %% [markdown]
# # 136-LSTMcell实现文本分类
# %% [markdown]
# 
# %% [markdown]
# * 三个输入
#
# * 两个输出
#
# * h_1,c_1 = nn.LSTMCell(input, (h_0, c_0))
# %%
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import glob
import numpy as np
import torch
from torch import nn
from torch.utils import data
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import torch.nn.functional as F
import torchtext
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import GloVe
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
from lxml import etree
from matplotlib.patches import Rectangle #绘制矩形框
# %%
nn.LSTMCell
# %% [markdown]
# ## 加载数据集
# %%
train_iter, test_iter = torchtext.datasets.IMDB()
train_data, test_data = list(train_iter), list(test_iter)
# %%
all_classes = set([label for (label, text) in train_data])
num_class = len(all_classes)
# %% [markdown]
# ## 处理文本
# %%
from torchtext.data.utils import get_tokenizer #【分词工具】
from torchtext.vocab import build_vocab_from_iterator #【创建词表工具】
# %%
tokenizer = get_tokenizer('basic_english')
# %%
def yield_tokens(data):
for _, text in data:
yield tokenizer(text)
vocab = build_vocab_from_iterator(yield_tokens(train_data), specials=["<pad>", "<unk>"], min_freq=3)
# %%
vocab.set_default_index(vocab["<unk>"]) #查不到单词时的默认值
# %% [markdown]
# 创建分词函数
# %%
text_pipeline = lambda x: vocab(tokenizer(x)) #1-分词、2-词表映射
label_pipeline = lambda x: int(x == 1)
# %% [markdown]
# ## 批次处理函数
# %%
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# %%
def cllote_batch(batch):
label_list, text_list = [], []
for (_label, _text) in batch: #【batch返回一个元组:标签,文本同时返回】
label_list.append(label_pipeline(_label)) #【每条评论的标签】
#【创建切片:缩短评论长度,rnn不能创建长时依赖】
pre_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)[:100] #【将每条文本评论转换成索引,再转换成张量类型。】
text_list.append(pre_text) #【每条评论的int类型的索引值的tensor】
label_list = torch.tensor(label_list)
#【每条评论长度不太,对应的tensor长度不同?】 【评论疮长度不同,批次训练要求长度相同】
text_list = torch.nn.utils.rnn.pad_sequence(text_list) #【按照最长评论进行填充】
return label_list.to(device), text_list.to(device)
# %%
#【将原始数据,每次处理512条,交给cllote_batch进行处理】
BATCHSIZE = 512
train_dl = DataLoader(dataset=train_data,
batch_size=BATCHSIZE,
shuffle=True,
collate_fn=cllote_batch) #【批次处理函数】
test_dl = DataLoader(dataset=test_data,
batch_size=BATCHSIZE,
collate_fn=cllote_batch)
# %% [markdown]
# ## 序列再LSTM上展开
# %%
vocab_size = len(vocab)
embeding_dim = 300
hidden = 128
# %%
class LSTM_encode(nn.Module):
def __init__(self, input_seq_length, hidden):
super(LSTM_encode, self).__init__()
self.lstm_cell = nn.LSTMCell(input_size=input_seq_length,hidden_size=hidden)
def forward(self, inputs):
batch_s = inputs.shape[1]
ht = torch.zeros(batch_s, hidden).to(device)
ct = torch.zeros(batch_s, hidden).to(device)
for word in inputs:
ht, ct = self.lstm_cell(word, (ht, ct))
return ht, ct
# %% [markdown]
# ## 创建分类模型
# %%
class LSTM_Net(nn.Module):
def __init__(self, vocab_size, embeding_dim, hidden_unit):
super(LSTM_Net, self).__init__()
#【初始化词嵌入层】
self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embeding_dim) #【0-输入的文本(词表)大小、1-输出的大小(输入rnn_encode大小)】
self.lstmencode = LSTM_encode(embeding_dim, hidden_unit) #【初始化rnn_encode】
#【Linear输出层】
self.fc1 = nn.Linear(in_features=hidden_unit, out_features=256)
self.fc2 = nn.Linear(256, 3)
def forward(self, inputs):
x = self.embedding(inputs)
_, x = self.lstmencode(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# %% [markdown]
# ## 模型训练
# %%
model = LSTM_Net(vocab_size, embeding_dim, hidden_unit).to(device)
# %%
model
# %%
loss_fn = nn.CrossEntropyLoss() #【输出层为2】
from torch.optim import lr_scheduler
optimizer = torch.optim.Adam(model.parameters(), betas=(0.5, 0.5), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)
# %%
def train(dataloader):
total_acc, total_count, total_loss = 0, 0, 0
model.train()
for label, text in dataloader:
label, text= label.to(device), text.to(device)
predited_label = model(text)
loss = loss_fn(predited_label, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
total_loss += loss.item()*label.size(0)
return total_loss/total_count, total_acc/total_count
# %%
def test(dataloader):
total_acc, total_count, total_loss = 0, 0, 0
model.eval()
with torch.no_grad():
for label, text in dataloader:
predited_label = model(text)
loss = loss_fn(predited_label, label)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
total_loss += loss.item()*label.size(0)
return total_loss/total_count, total_acc/total_count
# %%
def fit(epochs, train_dl, test_dl):
train_loss =[]
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc = train(train_dl)
epoch_test_loss, epoch_test_acc = test(test_dl)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
exp_lr_scheduler.step()
template = ("epoch: {:2d}, train_loss: {:.5f}, train_acc: {:.1f}%," "test_loss: {:.5f}, test_acc: {:.1f}%")
print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, 100*epoch_test_acc))
print("Done!")
return train_loss, train_acc, test_loss, test_acc
# %%
EPOCHS = 10
train_loss, train_acc, test_loss, test_acc = fit(epochs=EPOCHS, train_dl=train_dl, test_dl=test_dl)
# %%
137-GRU网络
LSTM 网络的变体 GRU
GRU门限循环单元与LSTM相比,GRU结构更加简单,它有一个更新门,更新门决定了内部状态与输入state状态的融合比例,简单的说GRU与LSTM网络相比,建构简单,计算少,效果相当。
GRU结构图
【导出代码】
# 序列展开
class GRU_encode(nn.Module):
def __init__(self, embeding_dim, hidden_size):
super(GRU_encode, self).__init__()
self.gru_cell = nn.GRUCell(input_size=embeding_dim, hidden_size=hidden_size)
def forward(self, inputs):
batch_size = inputs.shape[1]
ht = torch.zeros(batch_size, hidden_size).to(device)
for word in inputs:
ht = self.gru_celll(word, ht)
return ht