标准RNN
可直接调用torch.nn.RNN()
$$
h_t=tanh(w_{ih}*x_t+b_{ih}+w_{hh}*h_{h-1}+b_{hh})
$$
参数:
input_size
:输入$x_t$的特征维度。
hidden_size
:输出$h_t$的特征维度
layers
:网络层数
nonlinearity
:非线性激活函数,默认为tanh
bias
:是否使用偏执,默认为True
batch_first
:是否将输入$x_t(batch,seq,feature)$,默认为False,batch是第二维度
dropout
:接受0~1之间的值,会在网络中除了最后一层之外的输出层加上dropout
bidirectional
:是否双向,默认为False,如果设置为True,则为双向循环神经网络
网络的输入:
网络的输入为一个序列$x_t$和记忆输入$h_0$
$x_t(seq,batch,feature)$
$h_0(layers*direction,batch,hidden)$
direction 表示方向,如果双向RNN则direction=2
hidden 表示输出的特征维度
网络的输出:
网络输出$output$和$h_n$:
$output(seq,batch,hidden*direction)$
$h_n(layers*direction,batch,hidden)$
import torchfrom torch import nnbasic_rnn=nn.RNN(input_size=20 ,hidden_size=50 ,num_layers=2 ) print (basic_rnn.weight_ih_l0.shape)input_test=torch.randn(100 ,32 ,20 ) h_0=torch.randn(2 ,32 ,50 ) output_test,h_n=basic_rnn(input_test,h_0) print (output_test.shape)print (h_n.shape)
LSTM
lstm=nn.LSTM(input_size=20 ,hidden_size=50 ,num_layers=2 ) input_test=torch.randn(100 ,32 ,20 ) lstm_out,(h_n,c_n)=lstm(input_test) print (lstm.weight_ih_l0.shape)print (lstm_out.shape)print (h_n.shape)print (c_n.shape)
lstm参数是标准RNN的4倍
GRU
gru=nn.GRU(input_size=20 ,hidden_size=50 ,num_layers=2 ) input_test=torch.randn(100 ,32 ,20 ) gru_out,(h_n,c_n)=gru(input_test) print (gru.weight_ih_l0.shape)print (gru_out.shape)print (h_n.shape)print (c_n.shape)
LSTM实现MNIST图片识别
from pickletools import optimizefrom turtle import forwardimport torchfrom torch import nnfrom torch.utils.data import DataLoaderfrom torchvision import datasets,transformsimport os,sysfrom visdom import Visdomviz=Visdom() viz.line([0. ],[0. ],win='train_loss' ,opts=dict (title='train loss' )) os.chdir(sys.path[0 ]) class MyRnn (nn.Module): def __init__ (self,in_dim,hidden_dim,n_layer,n_class ) -> None : super (MyRnn,self).__init__() self.n_layer=n_layer self.hidden_dim=hidden_dim self.lstm=nn.LSTM(in_dim,hidden_dim,n_layer,batch_first=True ) self.classifer=nn.Linear(hidden_dim,n_class) def forward (self,x ): out,(h_n,c_0)=self.lstm(x) out=out[:,-1 ,:] out=self.classifer(out) return out if torch.cuda.is_available(): model=MyRnn(784 ,50 ,2 ,10 ).cuda() else : model=MyRnn(784 ,50 ,2 ,10 ) criter=nn.CrossEntropyLoss() optimizer=torch.optim.SGD(model.parameters(),lr=1e-2 ) epochs=30 data_tf=transforms.Compose( [transforms.ToTensor(), transforms.Normalize([0.5 ],[0.5 ])] ) train_data=datasets.MNIST( root='./data' ,train=True ,transform=data_tf,download=True ) test_data=train_data=datasets.MNIST( root='./data' ,train=False ,transform=data_tf) train_loader=DataLoader(train_data,batch_size=32 ,shuffle=True ) test_loader=DataLoader(test_data,batch_size=32 ,shuffle=False ) global_step=0 for epoch in range (epochs): for batch in train_loader: optimizer.zero_grad() img,label=batch if torch.cuda.is_available(): img=img.cuda() label=label.cuda() img=img.reshape(img.size(0 ),img.size(1 ),784 ) global_step+=1 output=model(img) loss=criter(output,label) viz.line([loss.item()],[global_step],win='train_loss' ,update='append' ) loss.backward() optimizer.step() print ("epochs:{},loss:{:.6f}" .format (epoch,loss)) model.eval () eval_loss=0 eval_acc=0 for batch in test_loader: img,label=batch if torch.cuda.is_available(): img=img.cuda() label=label.cuda() img=img.reshape(img.size(0 ),img.size(1 ),784 ) out=model(img) loss=criter(out,label) eval_loss+=loss.detach()*label.size(0 ) pred=torch.max (out,dim=1 )[1 ] num_correct=(pred==label).sum () eval_acc+=num_correct.detach() print ('Test loss:{},ACC:{:.6f}' .format (eval_loss/len (test_data),eval_acc/len (test_data)))
RNN是不舍和处理图片数据的,其一是图片中的信息没有很强的序列关系,其二是RNN必须前面一个数据计算结束才能进行后面一个的数据计算,这对于大图片来说无疑是很慢的。
RNN适用场景-序列预测
通过s(k-3),s(k-2),s(k-1),s(k)
预测s(k+1)
:
from pickletools import optimizeimport numpy as npimport pandas as pdimport torchfrom torch import nn,optimimport matplotlib.pyplot as pltimport os,sysclass MyRnn (nn.Module): def __init__ (self,in_dim,hidden_dim,n_layer,n_class ) -> None : super (MyRnn,self).__init__() self.n_layer=n_layer self.hidden_dim=hidden_dim self.lstm=nn.LSTM(in_dim,hidden_dim,n_layer) self.classifer=nn.Linear(hidden_dim,n_class) def forward (self,x ): out,(h_n,c_0)=self.lstm(x) out=self.classifer(out) return out os.chdir(sys.path[0 ]) data=pd.read_csv('author\code-of-learn-deep-learning-with-pytorch\chapter5_RNN\\time-series\\data.csv' ,usecols=[1 ]) data1=data.dropna() data=np.array(data1.iloc[:,0 ]) data.astype('float32' ) max_data=max (data) min_data=min (data) data=data/(max_data-min_data) def create_dataset (dataset, look_back=2 ): dataX, dataY = [], [] for i in range (len (dataset) - look_back): a = dataset[i:(i + look_back)] dataX.append(a) dataY.append(dataset[i + look_back]) return np.array(dataX), np.array(dataY) dataX,dataY=create_dataset(data,look_back=4 ) len_train=int (len (data)*0.7 ) train_data=dataX[:len_train] train_label=dataY[:len_train] test_data=dataX[len_train:] test_label=dataY[len_train:] dataX=dataX.reshape(-1 ,1 ,4 ) train_data=train_data.reshape(-1 ,1 ,4 ) train_label=train_label.reshape(-1 ,1 ,1 ) test_data=test_data.reshape(-1 ,1 ,4 ) dataX=torch.from_numpy(dataX).float ().cuda() train_data=torch.from_numpy(train_data).float ().cuda() train_label=torch.from_numpy(train_label).float ().cuda() test_data=torch.from_numpy(test_data).float ().cuda() if torch.cuda.is_available(): model=MyRnn(4 ,50 ,2 ,1 ).cuda() cri=nn.MSELoss() optimizer=optim.Adam(model.parameters(),lr=1e-3 ) epochs=30 for epoch in range (10000 ): optimizer.zero_grad() output=model(train_data) loss=cri(output,train_label) loss.backward() optimizer.step() if epoch%100 ==0 : print ('epoch:{},loss:{:.6f}' .format (epoch,loss)) model.eval () pred=model(dataX) pred=pred.reshape(-1 ).cpu().detach().numpy()*(max_data-min_data) print (pred)plt.plot(pred, 'r' , label='prediction' ) plt.plot(data1, 'b' , label='real' ) plt.legend(loc='best' ) plt.show()