比如我们给定一句话中的一个词,看看它周围的词,然后随机挑选一个,我们希望网络能够输出一个概率值,这个概率值能够告诉我们到底这个词离我们选择的词的远近程度,比如这么一句话 ‘A dog is playing with a ball’,如果我们选的词是 ‘ball’,那么 ‘playing’ 就要比 ‘dog’ 离我们选择的词更近。
import torch from torch import nn,optim import jieba context_size=2 embedding_size=10 txt="""天也欢喜,地也欢喜,人也欢喜, 欢喜我遇到了你,你也遇到了我。 当时是你心里有了一个我, 我心里也有了一个你, 从今后是朝朝暮暮在一起。 地久天长,同心比翼,相敬相爱相扶持, 偶然发脾气,也要规劝勉励。 在工作中学习,在服务上努力, 追求真理,抗战到底。 为着大我忘却小己,直等到最后胜利。 再生一两个孩子,一半儿像我,一半儿像你."""#这里采用陶行知老先生写给妻子的诗作文语料库 word_list=jieba.lcut(txt) for i in [',',' ','\n','。']: while i in word_list: word_list.remove(i) trigram = [((word_list[i], word_list[i+1]), word_list[i+2]) for i inrange(len(word_list)-2)] vocb=set(word_list) word_idx={word: i for i,word inenumerate(vocb)} inx_to_word={word_idx[word]:word for word in word_idx}
for e inrange(100): train_loss = 0 for word, label in trigram: # 使用前 100 个作为训练集 word = torch.LongTensor([word_idx[i] for i in word]).cuda() # 将两个词作为输入 label = torch.LongTensor([word_idx[label]]).cuda() # 前向传播 out = net(word) loss = criterion(out, label) train_loss += loss.item() # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() if (e + 1) % 20 == 0: print('epoch: {}, Loss: {:.6f}'.format(e + 1, train_loss / len(trigram)))
net = net.eval() word, label = trigram[30] print('input: {}'.format(word)) print('label: {}'.format(label)) print() word = torch.LongTensor([word_idx[i] for i in word]).cuda() out = net(word) pred_label_idx = out.max(1)[1].item() predict_word = inx_to_word[pred_label_idx] print('real word is {}, predicted word is {}'.format(label, predict_word))
from unittest import result import torch from torch import nn from torch.autograd import Variable import numpy as np
training_data = [("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]), ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])]
word_to_idx = {} tag_to_idx = {} for context, tag in training_data: for word in context: if word.lower() notin word_to_idx: word_to_idx[word.lower()] = len(word_to_idx) for label in tag: if label.lower() notin tag_to_idx: tag_to_idx[label.lower()] = len(tag_to_idx) word_to_idx tag_to_idx
alphabet = 'abcdefghijklmnopqrstuvwxyz' char_to_idx = {} for i inrange(len(alphabet)): char_to_idx[alphabet[i]] = i
classlstm_tagger(nn.Module): def__init__(self, n_word, n_char, char_dim, word_dim, char_hidden, word_hidden, n_tag): super(lstm_tagger, self).__init__() self.word_embed = nn.Embedding(n_word, word_dim) self.char_lstm = char_lstm(n_char, char_dim, char_hidden) self.word_lstm = nn.LSTM(word_dim + char_hidden, word_hidden) self.classify = nn.Linear(word_hidden, n_tag) defforward(self, x, word): char = [] for w in word: # 对于每个单词做字符的 lstm char_list = make_sequence(w, char_to_idx) char_list = char_list.unsqueeze(1) # (seq, batch, feature) 满足 lstm 输入条件 char_infor = self.char_lstm(Variable(char_list)) # (batch, char_hidden) char.append(char_infor) char = torch.stack(char, dim=0) # (seq, batch, feature) x = self.word_embed(x) # (batch, seq, word_dim) x = x.permute(1, 0, 2) # 改变顺序(seq,batch,word_dim) x = torch.cat((x, char), dim=2) # 沿着特征通道将每个词的词嵌入和字符 lstm 输出的结果拼接在一起(seq,batch,word_dim+char_dim) x, _ = self.word_lstm(x) # s,b,h=x.shape # x=x.reshape(-1,h)# 重新 reshape 进行分类线性层 out = self.classify(x) return out net = lstm_tagger(len(word_to_idx), len(char_to_idx), 10, 100, 50, 128, len(tag_to_idx)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=1e-2) # 开始训练 for e inrange(300): train_loss = 0 for word, tag in training_data: word_list = make_sequence(word, word_to_idx).unsqueeze(0) # 添加第一维 batch tag = make_sequence(tag, tag_to_idx) word_list = Variable(word_list) tag = Variable(tag) # 前向传播 out = net(word_list, word) out=out.reshape(-1,out.size(2)) # print(out.shape,tag.shape) loss = criterion(out, tag) train_loss += loss.item() # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() if (e + 1) % 50 == 0: print('Epoch: {}, Loss: {:.5f}'.format(e + 1, train_loss / len(training_data)))
net = net.eval() test_sent = 'Everybody ate the apple' test = make_sequence(test_sent.split(), word_to_idx).unsqueeze(0) out = net(Variable(test), test_sent.split()) out=out.detach().cpu().numpy() test_word=test_sent.split() defget_key(d,value): return [k for k,v in d.items() if v==value] result={} for i inrange(out.shape[0]): max_idx=np.argmax(out[i]) result[test_word[i]]=get_key(tag_to_idx,max_idx)