Task 1: Load the data 1
- Run the cell below to load a dictionary containing the frame intervals for each target.
This exercise focuses on using a multilayer perceptron (MLP) to estimate gaze using data from week 6 Filtering gaze data .
In exercise Filtering gaze data a dictionary was generated dividing the frames into sections, one for each gaze target.
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import nn_util
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import time
frames = nn_util.load_frames("data/frames.csv")
Dictionary loaded from data/frames.csv
cleaned_pupil_coordinates.csv
and screen coordinates in the file screen_coordinates.csv
for the grid
pattern. You may have to change the filepath. The function map_coordinates_to_targets
returns two $N \times 2$ arrays containing inputs and labels. file_name_pupil = '../W06/data/output/test_subject_3/grid/cleaned_pupil_coordinates.csv'
file_name_screen = '../W06/data/output/test_subject_3/grid/screen_coordinates.csv'
pupil_coor = np.asarray(nn_util.load_coordinates(file_name_pupil))
screen_coor = np.asarray(nn_util.load_coordinates(file_name_screen))
input, labels = nn_util.map_coordinates_to_targets(pupil_coor, frames, screen_coor)
The data set is divided into training and test data using train_test_split function from scikit-learn.
In the cell below:
train_test_split
to split the input and target data into a $80\%/20\%$ train/test sets.train_test_split
to split the training into a $75\%/25\%$ train/validation sets.plot_data_splits
from the nn_util.py
file.# nn_util.plot_data_splits(X_train, X_val, X_test) # uncomment once the splits are made
#Write your reflection here...
The following tasks introduces an affine neural network but uses non-linear optimization to find the model parameters.
In Assignment 1 Gaze Estimation you used the Linear Least Square for finding the model parameters.
nn_util.plot_least_square_results(X_train, Y_train, X_test, Y_test)
The cell below contains the definition of an affine model in Pytorch.
LinearModel
.
The class MSELoss
explicitly defines the Mean Squared Error loss function, for pedagogical reasons. Note, the Pytorch library has its own mse loss
.
class LinearModel(nn.Module):
"""
Args:
input_dim (int): Number of input features.
output_dim (int): Number of output features.
Methods:
forward(x): Passes the input through the linear layer.
"""
def __init__(self, input_dim, output_dim):
super(LinearModel, self).__init__()
self.linear1 = nn.Linear(input_dim, output_dim)
def forward(self, x):
"""Args:
x (Tensor): Input tensor.
Returns:
Tensor: Output tensor after applying the linear transformation.
"""
x = self.linear1(x)
return x
class MSELoss(nn.Module):
def __init__(self, reduction='mean'):
super(MSELoss, self).__init__()
self.reduction = reduction
def forward(self, input, target):
squared_diff = (input - target) ** 2
if self.reduction == 'mean':
return squared_diff.mean()
elif self.reduction == 'sum':
return squared_diff.sum()
else:
raise ValueError("Invalid reduction type. Use 'mean' or 'sum'.")
def train_model(model, criterion, optimizer, X_train, Y_train, X_val=None, Y_val=None, num_epochs=100):
"""
Args:
model (nn.Module): The neural network model to train.
criterion (nn.Module): The loss function to minimize.
optimizer (torch.optim.Optimizer): Optimizer for updating model parameters.
X_train (Nx2 Tensor): Training input data.
Y_train (Nx2 Tensor): Training target data.
X_val (Nx2 Tensor, optional): Validation input data. Defaults to None.
Y_val (Nx2 Tensor, optional): Validation target data. Defaults to None.
num_epochs (int): Number of training epochs.
Returns:
list: Loss values for each epoch (training).
list: Loss values for each epoch (validation).
float: Training time.
"""
start_time = time.time()
train_losses = []
val_losses = []
model_params = []
for epoch in range(num_epochs):
model.train()
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, Y_train)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
optimizer.step()
train_losses.append(loss.item())
# Validation phase (if validation data is provided)
if X_val is not None and Y_val is not None:
with torch.no_grad():
val_outputs = model(X_val)
model_params.append(model.parameters())
val_loss = criterion(val_outputs, Y_val)
val_losses.append(val_loss.item())
end_time = time.time()
training_time = end_time - start_time
return train_losses, val_losses, training_time
def test_model(model, X_test, Y_test):
"""
Evaluates a trained model on test data.
Args:
model (nn.Module): The trained neural network model.
X_test (Tensor): Test input data.
Y_test (Tensor): Test target data.
Returns:
float: Mean squared error (MSE) over the test set.
np.ndarray: Predicted values as a numpy array.
np.ndarray: True values as a numpy array.
np.ndarray: Absolute errors for x and y coordinates.
"""
model.eval()
with torch.no_grad():
test_output = model(X_test)
mse = mean_squared_error(Y_test.cpu().numpy(), test_output.cpu().numpy())
predictions = test_output.cpu().numpy()
true_values = Y_test.cpu().numpy()
errors = np.abs(true_values - predictions)
return mse, predictions, true_values, errors
# Set parameters
input_dim = 2
output_dim = 2
learning_rate = 0.1
epoch = 20000
# Generate training data
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)
model = LinearModel(input_dim, output_dim)
criterion = MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
# Train the model
losses, val_losses, training_time = train_model(model, criterion, optimizer, X_train_tensor, Y_train_tensor, X_val_tensor, Y_val_tensor, num_epochs=epoch)
# Test the model
mse, Y_pred, true_values, errors_nn = test_model(model, X_test_tensor, Y_test_tensor)
print(f'Average MSE: {mse}')
# Visualize results
nn_util.plot_results(
X_train_tensor,
Y_train_tensor,
X_test_tensor,
Y_test_tensor,
Y_pred,
errors_nn,
losses,
val_losses,
model_name='NN',
training_time=training_time
)
Average MSE: 135701.0625
You will notice, that the neural network has a difficulty in predicting gaze compared to the linear least square optimization.
# Write your reflections here...
The following steps will investigate reasons for the poorer performance and include:
The following tasks investigate the impact of outliers by analyzing a synthetic dataset with a bit of noise.
The function generate_data_grid
returns a synthetic noisy dataset without outliers.
train_test_split
to split the synthetic data into $80\%/20\%$ train/test datasets.plot_results
input_syn, target_syn, A, b = nn_util.generate_data_grid(noise_std=0.1)
Agerage MSE: 135701.0625
# Write your reflections here...
The following step investigate the impact of preprocessing of the data by normalizing the input and label data. It also investigates the impact of the learning rate and the number of iterations.
To investigate the performance of the models the function plot_results_collected
from the file nn_util.py
is used. This function takes six dictionaries as input:
The data needed to populate these data structures were provided gradually througout the exercise. It is important to maintain the key names for the specific models. Define key names such as: 'Synthetic lr: 0.01, epoch: 500'
to indicate architecture and training parameters.
DataScaler
class by implementing the normalize
function and the denormalize
function. DataScaler
to normalize the data in the cleaned_pupil.csv
and screen_coordinates.csv
files.DataScaler
to normalize the synthetic data. train_model
function.test_model
function.LinearModel
)plot_results_collected
from the nn_util.py
file, to visualize the result.class DataScaler:
def __init__(self):
self.min = None
self.max = None
def normalize(self, data):
return normalized_data
def denormalize(self, normalized_data):
return data
# Set hyperparameters
input_dim = 2
output_dim = 2
learning_rate = [0.0001, 0.1, 1.5]
epoch = [500, 2000, 10000]
criterion = MSELoss()
# Containers gaze data
models_dict = {}
losses_dict = {}
losses_val_dict = {}
training_time_dict = {}
pred_norm_dict = {}
errors_norm_dict = {}
mse_norm_dict= {}
# Containers synthetic gaze data
models_dict_syn = {}
losses_dict_syn = {}
losses_val_dict_syn = {}
training_time_dict_syn = {}
pred_norm_dict_syn = {}
errors_norm_dict_syn = {}
mse_norm_syn_dict= {}
#Train the models
for i in learning_rate:
for j in epoch:
nn_util.plot_mse_bar(mse_norm_dict)
nn_util.plot_mse_bar(mse_norm_syn_dict)