import numpy as np
import tensorflow as tf
from tensorflow import layers
from tensorflow.python.ops import array_ops
from tensorflow.contrib import seq2seq
from tensorflow.contrib.seq2seq import BahdanauAttention
from tensorflow.contrib.seq2seq import LuongAttention
from tensorflow.contrib.seq2seq import AttentionWrapper
from tensorflow.contrib.seq2seq import BeamSearchDecoder
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.contrib.rnn import GRUCell
from tensorflow.contrib.rnn import MultiRNNCell
from tensorflow.contrib.rnn import DropoutWrapper
from tensorflow.contrib.rnn import ResidualWrapper
from word_sequence import WordSequence
from data_utils import _get_embed_device
"""
__init__:基本参数的保存、验证
build_model:模型构建
init_placeholders:初始化变量的占位符
build_encoder:初始化编码器
init_optimizer:初始化优化器
train:训练一个batch
predict:预测一个batch
"""
class SequenceToSequence(object):
def __init__(self,
input_vocab_size,
target_vocab_size,
batch_size=32,
embedding_size=300,
mode='train',
hidden_units=256,
depth=1,
beam_width=0,
cell_type='lstm',
dropout=0.2,
use_dropout=False,
use_residual=False,
optimizer='adam',
learning_rate=0.001,
min_learning_rate=0.000001,
decay_steps=50000,
max_gradient_norm=5.0,
max_decode_step=None,
attention_type='Bahdanau',
bidirectional=False,
time_major=False,
seed=0,
parallel_iterations=None,
share_embedding=False,
pretrained_embedding=False):
self.input_vocab_size = input_vocab_size
self.target_vocab_size = target_vocab_size
self.batch_size = batch_size
self.embedding_size = embedding_size
self.hidden_units = hidden_units
self.depth = depth
self.cell_type = cell_type.lower()
self.use_dropout = use_dropout
self.use_residual = use_residual
self.attention_type = attention_type
self.mode = mode
self.optimizer = optimizer
self.learning_rate = learning_rate
self.min_learning_rate = min_learning_rate
self.decay_steps = decay_steps
self.max_gradient_norm = max_gradient_norm
self.keep_prob = 1.0 - dropout
self.bidirectional = bidirectional
self.seed = seed
self.pretrain_embedding = pretrained_embedding
if isinstance(parallel_iterations, int):
self.parallel_iterations = parallel_iterations
else:
self.parallel_iterations = batch_size
self.time_major = time_major
self.share_embedding = share_embedding
self.initializer = tf.random_normal_initializer(-0.05, 0.05, dtype=tf.float32)
assert self.cell_type in ('geu', 'lstm'), '无效的cell type格式'
if share_embedding:
assert input_vocab_size == target_vocab_size, '如果share_embedding为true,输入输出必须相等'
assert mode in ('train', 'decode'), '无效的mode格式'
assert dropout >= 0.0 and dropout <= 1.0, 'dropout不再值域内'
assert attention_type.lower() in ('bahdanau', 'luong'), '无效的attention'
assert beam_width < target_vocab_size, 'beam_width必须小于target_vocab_size'
self.keep_prob_placeholder = tf.placeholder(tf.float32, shape=[], name='keep_prob')
self.global_step = tf.Variable(0, trainable=False, name='global_step')
self.use_beamsearch_decode = False
self.beam_width = beam_width
self.use_beamsearch_decode = True if self.beam_width > 0 else False
self.max_decode_step = max_decode_step
assert self.optimizer.lower() in (
'adadelta', 'adam', 'rmsprop', 'momentum', 'sgd'), '优化器只能是adadelta、adam、rmsprop、momentum或sgd'
self.build_model()
def build_model(self):
"""
1、初始化训练、预测所需变量
2、构建编码器encoder
3、构建解码器decoder
4、构建优化器optimizer
5、保存
"""
self.init_placeholders()
encoder_outputs, encoder_state = self.build_encoder()
self.build_decoder(encoder_outputs, encoder_state)
if self.mode == 'train':
self.init_optimizer()
self.saver = tf.train.Saver()
def init_placeholders(self):
self.add_loss = tf.placeholder(dtype=tf.float32, name="add_loss")
self.encoder_inputs = tf.placeholder(dtype=tf.int32, shape=(self.batch_size, None), name='encoder_inputs')
self.encoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,),
name='encoder_input_length')
if self.mode == 'train':
self.decoder_inputs = tf.placeholder(dtype=tf.int32, shape=(self.batch_size, None), name='decoder_inputs')
self.rewards = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, 1), name='rewards')
self.decoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,),
name='decoder_inputs_length')
self.docoder_start_token = tf.ones(shape=(self.batch_size, 1), dtype=tf.int32) * WordSequence.START
self.decoder_inouts_train = tf.concat([self.docoder_start_token, self.decoder_inputs], axis=1)
"""构建一个单独的rnn单元"""
def build_single_cell(self, n_hideen, use_residual):
if self.cell_type == 'geu':
cell_type = GRUCell
else:
cell_type = LSTMCell
cell = cell_type(n_hideen)
if self.use_dropout:
cell = DropoutWrapper(cell, dtype=tf.float32, output_keep_prob=self.keep_prob_placeholder, seed=self.seed)
if use_residual:
cell = ResidualWrapper(cell)
return cell
"""构建单独的编码cell"""
def build_encoder_cell(self):
return MultiRNNCell(
[self.build_single_cell(self.hidden_units, use_residual=self.use_residual) for _ in range(self.depth)])
"""构建编码器"""
def build_encoder(self):
with tf.variable_scope('encoder'):
encoder_cell = self.build_encoder_cell()
with tf.device(_get_embed_device(self.input_vocab_size)):
if self.pretrain_embedding:
self.encoder_embeddings = tf.Variable(
tf.constant(0.0, shape=(self.input_vocab_size, self.embedding_size)), trainable=True,
name='embedding')
self.encoder_embeddings_placeholder = tf.placeholder(tf.float32,
(self.input_vocab_size, self.embedding_size))
self.encoder_embeddings_init = self.encoder_embeddings.assign(self.encoder_embeddings_placeholder)
else:
self.encoder_embeddings = tf.get_variable(name='embedding',
shape=(self.input_vocab_size, self.embedding_size),
initializer=self.initializer, dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(params=self.encoder_embeddings,
ids=self.encoder_inputs)
if self.use_res