基于lstm+taransforner机器翻译-中藏翻译-完整代码数据

项目视频讲解:

基于lstm+taransforner机器翻译-中藏翻译_哔哩哔哩_bilibili

数据展示:

 

 

# coding:utf-8
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 示例数据(重复数据以增加样本量)
# data = [ ... ] # 原始示例数据,已注释掉

# 提取源语言和目标语言句子
data=[]
with open('测试集.txt','r',encoding='utf-8') as f:
f = f.read().splitlines()
for line in f:
temp=[]
temp.append(line.split('%%')[0])
temp.append(line.split('%%')[1][:-2])
data.append(temp)
print(data)
source_texts = [pair[0] for pair in data]
target_texts = [pair[1] for pair in data]

# 训练和测试集拆分
source_train, source_test, target_train, target_test = train_test_split(source_texts, target_texts, test_size=0.2, random_state=42)

# 对源语言进行tokenizer
source_tokenizer = Tokenizer()
source_tokenizer.fit_on_texts(source_train)
source_train_seq = source_tokenizer.texts_to_sequences(source_train)
source_test_seq = source_tokenizer.texts_to_sequences(source_test)

# 对目标语言进行tokenizer
target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_train)
target_train_seq = target_tokenizer.texts_to_sequences(target_train)
target_test_seq = target_tokenizer.texts_to_sequences(target_test)

# 填充序列
max_source_length = max(len(seq) for seq in source_train_seq)
max_target_length = max(len(seq) for seq in target_train_seq)
source_train_seq = pad_sequences(source_train_seq, maxlen=max_source_length, padding='post')
print('source_train_seq.shape',source_train_seq.shape)
source_test_seq = pad_sequences(source_test_seq, maxlen=max_source_length, padding='post')
print('source_train_seq.shape',source_train_seq.shape)
target_train_seq = pad_sequences(target_train_seq, maxlen=max_target_length, padding='post')
target_test_seq = pad_sequences(target_test_seq, maxlen=max_target_length, padding='post')

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Bidirectional,

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

甜辣uu

谢谢关注再接再厉

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值