Example of classification with a fully connected neural network.
AUTHOR : Jean-Luc Parouty (CNRS/SIMaP), adaped to PyTorch by Laurent Risser (CNRS/IMT)
The MNIST dataset (Modified National Institute of Standards and Technology) is a must for Deep Learning.
It consists of 60,000 small images of handwritten numbers for learning and 10,000 for testing.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision #to get the MNIST dataset
import numpy as np
import matplotlib.pyplot as plt
import sys,os
sys.path.append('./MISC/fidle/')
import fidle_pwk_reduced as ooo
from fidle_pwk_additional import convergence_history_CrossEntropyLoss
MNIST is one of the most famous historic dataset.
Include in torchvision datasets
#get and format the training set
mnist_trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=None)
x_train=mnist_trainset.data.type(torch.DoubleTensor)
y_train=mnist_trainset.targets
#get and format the test set
mnist_testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=None)
x_test=mnist_testset.data.type(torch.DoubleTensor)
y_test=mnist_testset.targets
#check data shape and format
print("Size of the train and test observations")
print(" -> x_train : ",x_train.shape)
print(" -> y_train : ",y_train.shape)
print(" -> x_test : ",x_test.shape)
print(" -> y_test : ",y_test.shape)
print("\nRemark that we work with torch tensors and not numpy arrays:")
print(" -> x_train.dtype = ",x_train.dtype)
print(" -> y_train.dtype = ",y_train.dtype)
Que représentent les différentes dimensions dans la taille de x_train (les 60000, 28 et 28) ?
print('Before normalization : Min={}, max={}'.format(x_train.min(),x_train.max()))
xmax=x_train.max()
x_train = x_train / xmax
x_test = x_test / xmax
print('After normalization : Min={}, max={}'.format(x_train.min(),x_train.max()))
np_x_train=x_train.numpy().astype(np.float64)
np_y_train=y_train.numpy().astype(np.uint8) #convert the images into numpy arrays, as ooo.plot_images uses input 'matrices' at this format as input
ooo.plot_images(np_x_train,np_y_train , [27], x_size=5,y_size=5, colorbar=True)
ooo.plot_images(np_x_train,np_y_train, range(5,41), columns=12)
class MyModel(nn.Module):
"""
Basic fully connected neural-network
"""
def __init__(self):
hidden1 = 100
hidden2 = 100
super(MyModel, self).__init__()
self.hidden1 = nn.Linear(784, hidden1)
self.hidden2 = nn.Linear(hidden1, hidden2)
self.hidden3 = nn.Linear(hidden2, 10)
def forward(self, x):
x = x.view(-1,784)
x = self.hidden1(x)
x = F.relu(x)
x = self.hidden2(x)
x = F.relu(x)
x = self.hidden3(x)
x = F.softmax(x, dim=0)
return x
model = MyModel()
print(model)
</font>
def fit(model,X_train,Y_train,X_test,Y_test, EPOCHS = 5, BATCH_SIZE = 32):
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3) #lr is the learning rate
model.train()
history=convergence_history_CrossEntropyLoss()
history.update(model,X_train,Y_train,X_test,Y_test)
n=X_train.shape[0] #number of observations in the training data
#stochastic gradient descent
for epoch in range(EPOCHS):
batch_start=0
epoch_shuffler=np.arange(n)
np.random.shuffle(epoch_shuffler) #remark that 'utilsData.DataLoader' could be used instead
while batch_start+BATCH_SIZE < n:
#get mini-batch observation
mini_batch_observations = epoch_shuffler[batch_start:batch_start+BATCH_SIZE]
var_X_batch = Variable(X_train[mini_batch_observations,:,:]).float() #the input image is flattened
var_Y_batch = Variable(Y_train[mini_batch_observations])
#gradient descent step
optimizer.zero_grad() #set the parameters gradients to 0
Y_pred_batch = model(var_X_batch) #predict y with the current NN parameters
curr_loss = loss(Y_pred_batch, var_Y_batch) #compute the current loss
curr_loss.backward() #compute the loss gradient w.r.t. all NN parameters
optimizer.step() #update the NN parameters
#prepare the next mini-batch of the epoch
batch_start+=BATCH_SIZE
history.update(model,X_train,Y_train,X_test,Y_test)
return history
model = MyModel()
batch_size = 512
epochs = 32
history=fit(model,x_train,y_train,x_test,y_test,EPOCHS=epochs,BATCH_SIZE = batch_size)
var_x_test = Variable(x_test[:,:,:]).float()
var_y_test = Variable(y_test[:])
y_pred = model(var_x_test)
loss = nn.CrossEntropyLoss()
curr_loss = loss(y_pred, var_y_test)
val_loss = curr_loss.item()
val_accuracy = float( (torch.argmax(y_pred, dim= 1) == var_y_test).float().mean() )
print('Test loss :', val_loss)
print('Test accuracy :', val_accuracy)
ooo.plot_history(history, figsize=(6,4))
y_pred = model(var_x_test)
np_y_pred_label = torch.argmax(y_pred, dim= 1).numpy().astype(np.uint8)
np_x_test=x_test.numpy().astype(np.float64)
np_y_test=y_test.numpy().astype(np.uint8)
ooo.plot_images(np_x_test, np_y_test, range(0,60), columns=12, x_size=1, y_size=1, y_pred=np_y_pred_label)
Quel est l'intéret de *torch.argmax(y_pred, dim= 1)* ?
errors=[ i for i in range(len(np_y_test)) if np_y_pred_label[i]!=np_y_test[i] ]
errors=errors[:min(24,len(errors))]
ooo.plot_images(np_x_test, np_y_test, errors[:15], columns=6, x_size=2, y_size=2, y_pred=np_y_pred_label)
ooo.display_confusion_matrix(np_y_test,np_y_pred_label, range(10))
Vous trouverez ci-dessous deux architectures de réseaux convolutionels. Vérifiez si ils vous permettent d'améliorer la précision des résultats.
class CNN(nn.Module):
"""
Basic convolutional neural network
"""
def __init__(self):
super(CNN, self).__init__()
#Input channels = 1, output channels = 6
self.conv1 = nn.Conv2d(1, 6, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
#1536 input features, 64 output features (see sizing flow below)
self.fc1 = nn.Linear(6 * 14 * 14, 64)
#64 input features, 10 output features for our 10 defined classes
self.fc2 = nn.Linear(64, 10)
def forward(self, x):
#Make sure that the batch shape for input x is (nbBatchObs, 1, 28, 28)
x = x.view(-1,1,28,28)
#Computes the activation of the first convolution
#Size changes from (1, 28, 28) to (6, 28, 28)
x = F.relu(self.conv1(x))
#Size changes from (6, 28, 28) to (6, 14, 14)
x = self.pool(x)
#Reshape data to input to the input layer of the neural net
#Size changes from (6, 14, 14) to (1, 1176)
#Recall that the -1 infers this dimension from the other given dimension
x = x.view(-1, 6 * 14 *14)
#Computes the activation of the first fully connected layer
#Size changes from (1, 1176) to (1, 64)
x = F.relu(self.fc1(x))
#Computes the second fully connected layer (activation applied later)
#Size changes from (1, 64) to (1, 10)
x = self.fc2(x)
return(x)
cnn = CNN()
print(cnn)
class CNN2(nn.Module):
"""
Deeper convolutional neural network than CNN
"""
#Our batch shape for input x is (1, 28, 28)
def __init__(self):
super(CNN2, self).__init__()
#Input channels = 1, output channels = 6
self.conv1 = nn.Conv2d(1, 6, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(6, 6, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(6, 6, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
#1536 input features, 64 output features (see sizing flow below)
self.fc1 = nn.Linear(6 * 7 * 7, 64)
#64 input features, 10 output features for our 10 defined classes
self.fc2 = nn.Linear(64, 10)
def forward(self, x):
#Make sure that the batch shape for input x is (nbBatchObs, 1, 28, 28)
x = x.view(-1,1,28,28)
#Computes the activation of the first convolution
#Size changes from (1, 28, 28) to (6, 28, 28)
x = F.relu(self.conv1(x))
#Size changes from (6, 28, 28) to (6, 14, 14)
x = self.pool(x)
#convolution on the 6x14x14 image
x = F.relu(self.conv2(x))
#Size changes from (6, 14, 14) to (6, 7, 7)
x = self.pool(x)
#convolution on the 6x7x7 image
x = F.relu(self.conv3(x))
#Reshape data to input to the input layer of the neural net
#Size changes from (6, 7, 7) to (1, 294)
#Recall that the -1 infers this dimension from the other given dimension
x = x.view(-1, 6 * 7 *7)
#Computes the activation of the first fully connected layer
#Size changes from (1, 294) to (1, 64)
x = F.relu(self.fc1(x))
#Computes the second fully connected layer (activation applied later)
#Size changes from (1, 64) to (1, 10)
x = self.fc2(x)
return(x)
cnn2 = CNN2()
print(cnn2)
cnn = CNN()
batch_size = 512
epochs = 32
history=fit(cnn,x_train,y_train,x_test,y_test,EPOCHS=epochs,BATCH_SIZE = batch_size)
var_x_test = Variable(x_test[:,:,:]).float()
var_y_test = Variable(y_test[:])
y_pred = cnn(var_x_test)
loss = nn.CrossEntropyLoss()
curr_loss = loss(y_pred, var_y_test)
val_loss = curr_loss.item()
val_accuracy = float( (torch.argmax(y_pred, dim= 1) == var_y_test).float().mean() )
print('Test loss :', val_loss)
print('Test accuracy :', val_accuracy)
cnn2 = CNN2()
batch_size = 512
epochs = 32
history=fit(cnn2,x_train,y_train,x_test,y_test,EPOCHS=epochs,BATCH_SIZE = batch_size)
var_x_test = Variable(x_test[:,:,:]).float()
var_y_test = Variable(y_test[:])
y_pred = cnn2(var_x_test)
loss = nn.CrossEntropyLoss()
curr_loss = loss(y_pred, var_y_test)
val_loss = curr_loss.item()
val_accuracy = float( (torch.argmax(y_pred, dim= 1) == var_y_test).float().mean() )
print('Test loss :', val_loss)
print('Test accuracy :', val_accuracy)
La qualité des prédictions a largement été augmentée en utilisant une architecture de réseau adaptée à la classification d'images.