diff --git a/Code/autoencoder_model.py b/Code/autoencoder_model.py index d590719..975d07e 100644 --- a/Code/autoencoder_model.py +++ b/Code/autoencoder_model.py @@ -2,6 +2,10 @@ Module for AutoEncoder Generates 3-Chanel RGB Image from 1-Chanel Grayscale Image -------------------------------------------------------------------------------- +For each pair of consecutive values in the channels list, a Convolutional or Transposed Convolutional layer is created. +The number of input channels is the first value, and the number of output channels is the second value. +A Batch Normalization layer and a LeakyReLU activation function are added after each Convolutional or Transposed Convolutional layer. +In the case of the decoder, the final layer uses a Sigmoid activation function instead of LeakyReLU. ''' # Import Necessary Libraries @@ -11,36 +15,23 @@ class Grey2RGBAutoEncoder(nn.Module): def __init__(self): super(Grey2RGBAutoEncoder, self).__init__() - ''' # Define the Encoder - The Encoder consists of 4 Convolutional layers with ReLU activation function - Encoder takes 1-Chanel Grayscale image (1 channel) as input and outputs High-Dimentional-Representation - ''' - self.encoder = self._make_layers([1, 64, 128, 256, 512]) - - ''' + self.encoder = self._make_layers([1, 64, 128, 256]) # Define the Decoder - The Decoder consists of 4 Transpose Convolutional layers with ReLU activation function - Decoder takes High-Dimentional-Representation as input and outputs 3-Chanel RGB image - The last layer uses a Sigmoid activation function instead of ReLU - ''' - self.decoder = self._make_layers([512, 256, 128, 64, 3], decoder=True) + self.decoder = self._make_layers([256, 128, 64, 3], decoder=True) # Helper function to create the encoder or decoder layers. def _make_layers(self, channels, decoder=False): layers = [] for i in range(len(channels) - 1): - ''' - For each pair of consecutive values in the channels list, a Convolutional or Transposed Convolutional layer is created. - The number of input channels is the first value, and the number of output channels is the second value. - A ReLU activation function is added after each Convolutional layer. - ''' if decoder: layers += [nn.ConvTranspose2d(channels[i], channels[i+1], kernel_size=3, stride=1, padding=1), - nn.ReLU(inplace=True)] + nn.BatchNorm2d(channels[i+1]), + nn.LeakyReLU(inplace=True)] else: layers += [nn.Conv2d(channels[i], channels[i+1], kernel_size=3, stride=1, padding=1), - nn.ReLU(inplace=True)] + nn.BatchNorm2d(channels[i+1]), + nn.LeakyReLU(inplace=True)] if decoder: layers[-1] = nn.Sigmoid() return nn.Sequential(*layers) @@ -50,4 +41,3 @@ def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x - diff --git a/Code/data.py b/Code/data.py index 7217e30..857e416 100644 --- a/Code/data.py +++ b/Code/data.py @@ -11,6 +11,7 @@ from torch.utils.data import DataLoader, Dataset, random_split import torchvision.transforms as transforms import torch +import os # Allow loading of truncated images ImageFile.LOAD_TRUNCATED_IMAGES = True @@ -26,6 +27,7 @@ def __init__(self, grayscale_dir, rgb_dir, image_size, batch_size, valid_exts=[' self.valid_exts = valid_exts # Valid file extensions # Get list of valid image filenames self.filenames = [f for f in self.grayscale_dir.iterdir() if f.suffix in self.valid_exts] + self.length = len(self.filenames) # Define transformations: resize and convert to tensor self.transform = transforms.Compose([ transforms.Resize(self.image_size), @@ -33,7 +35,7 @@ def __init__(self, grayscale_dir, rgb_dir, image_size, batch_size, valid_exts=[' # Return the total number of images def __len__(self): - return len(self.filenames) + return self.length # Get a single item or a slice from the dataset def __getitem__(self, idx): @@ -43,8 +45,12 @@ def __getitem__(self, idx): grayscale_path = self.filenames[idx] rgb_path = self.rgb_dir / grayscale_path.name # Open images - grayscale_img = Image.open(grayscale_path) - rgb_img = Image.open(rgb_path) + try: + grayscale_img = Image.open(grayscale_path) + rgb_img = Image.open(rgb_path) + except IOError: + print(f"Error opening images {grayscale_path} or {rgb_path}") + return None # Apply transformations grayscale_img = self.transform(grayscale_img) rgb_img = self.transform(rgb_img) @@ -72,24 +78,19 @@ def transform_sequence(self, filenames): # Get batches for LSTM training def get_lstm_batches(self, val_split, sequence_length, sequence_stride=2): assert sequence_length % 2 == 0, "The sequence length must be even." - # Compute the total number of sequences that can be formed, given the stride and length - sequence_indices = range(0, len(self.filenames) - sequence_length + 1, sequence_stride) + sequence_indices = range(0, self.length - sequence_length + 1, sequence_stride) total_sequences = len(sequence_indices) - # Divide the sequences into training and validation train_size = int((1.0 - val_split) * total_sequences) train_indices = sequence_indices[:train_size] val_indices = sequence_indices[train_size:] - # Create dataset with valid sequences only train_dataset = self.create_sequence_pairs(train_indices, sequence_length) val_dataset = self.create_sequence_pairs(val_indices, sequence_length) - # Create the data loaders for training and validation datasets train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False) - return train_loader, val_loader def create_sequence_pairs(self, indices, sequence_length): @@ -97,7 +98,7 @@ def create_sequence_pairs(self, indices, sequence_length): for start in indices: end = start + sequence_length # Make sure we don't go out of bounds - if end < len(self.filenames): + if end < self.length: sequence_input = self.transform_sequence(self.filenames[start:end]) sequence_target = self.transform_sequence(self.filenames[start + 1:end + 1]) sequence_pairs.append((sequence_input, sequence_target)) diff --git a/Code/losses.py b/Code/losses.py index e69a629..2e3ad73 100644 --- a/Code/losses.py +++ b/Code/losses.py @@ -24,9 +24,7 @@ def forward(self, output, target): mse_loss = F.mse_loss(output, target) # Assume output to be raw logits: calculate log_probs and use it to compute entropy log_probs = F.log_softmax(output, dim=1) # dim 1 is the channel dimension - probs = torch.exp(log_probs) - entropy_loss = -torch.sum(probs * log_probs, dim=1).mean() - + entropy_loss = -torch.sum(torch.exp(log_probs) * log_probs, dim=1).mean() # Combine MSE with entropy loss scaled by alpha factor composite_loss = (1 - self.alpha) * mse_loss + self.alpha * entropy_loss return composite_loss diff --git a/Code/lstm_model.py b/Code/lstm_model.py index 952921c..00ccc89 100644 --- a/Code/lstm_model.py +++ b/Code/lstm_model.py @@ -3,14 +3,16 @@ Generate Intermediate Images and Return the Complete Image Sequence with Interpolated Images -------------------------------------------------------------------------------- ''' -# Import Necessary Libraries +# Importing Necessary Libraries import torch from torch import nn from torch.nn import functional as F +# Define ConvLSTMCell class class ConvLSTMCell(nn.Module): def __init__(self, input_dim, hidden_dim, kernel_size, num_features): super(ConvLSTMCell, self).__init__() + # Define the convolutional layer self.hidden_dim = hidden_dim padding = kernel_size[0] // 2, kernel_size[1] // 2 self.conv = nn.Conv2d(in_channels=input_dim + hidden_dim, @@ -19,77 +21,89 @@ def __init__(self, input_dim, hidden_dim, kernel_size, num_features): padding=padding) def forward(self, input_tensor, cur_state): + # Unpack the current state into hidden state (h_cur) and cell state (c_cur) h_cur, c_cur = cur_state + # Concatenate the input tensor and the current hidden state along the channel dimension combined = torch.cat([input_tensor, h_cur], dim=1) + # Apply the convolution to the combined tensor combined_conv = self.conv(combined) + # Split the convolution output into four parts for input gate, forget gate, output gate, and cell gate cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1) + # Apply sigmoid activation to the input, forget, and output gates i = torch.sigmoid(cc_i) f = torch.sigmoid(cc_f) o = torch.sigmoid(cc_o) + # Apply tanh activation to the cell gate g = torch.tanh(cc_g) - + # Compute the next cell state as a combination of the forget gate, current cell state, input gate, and cell gate c_next = f * c_cur + i * g + # Compute the next hidden state as the output gate times the tanh of the next cell state h_next = o * torch.tanh(c_next) - + # Return the next hidden state and cell state return h_next, c_next +# Define the ConvLSTM class class ConvLSTM(nn.Module): def __init__(self, input_dim, hidden_dims, kernel_size, num_layers, alpha=0.5): super(ConvLSTM, self).__init__() + # Set the number of layers, alpha parameter, and hidden dimensions self.num_layers = num_layers self.alpha = alpha self.hidden_dims = hidden_dims + # Initialize a ModuleList to hold the ConvLSTM cells self.cells = nn.ModuleList() - + # Loop over the number of layers and create a ConvLSTM cell for each layer for i in range(num_layers): + # The input dimension for the first layer is input_dim, for other layers it is the hidden dimension of the previous layer cur_input_dim = input_dim if i == 0 else hidden_dims[i - 1] + # Append a new ConvLSTM cell to the cells list self.cells.append(ConvLSTMCell(input_dim=cur_input_dim, hidden_dim=hidden_dims[i], kernel_size=kernel_size, num_features=4)) # LSTM has 4 gates (features) def init_hidden(self, batch_size, image_height, image_width): + # Initialize a list to hold the initial hidden and cell states init_states = [] + # Loop over the number of layers for i in range(self.num_layers): - # Note the change from self.hidden_dim to self.hidden_dims + ''' + For each layer, create a zero tensor for the hidden state and the cell state + The size of the tensor is (batch_size, hidden_dim, image_height, image_width) + The tensor is moved to the same device as the weights of the convolutional layer of the corresponding ConvLSTM cell + ''' init_states.append([torch.zeros(batch_size, self.hidden_dims[i], image_height, image_width, device=self.cells[i].conv.weight.device), torch.zeros(batch_size, self.hidden_dims[i], image_height, image_width, device=self.cells[i].conv.weight.device)]) + # Return the initial states return init_states - def forward(self, input_tensor, cur_state=None): + # Extract the batch size, sequence length, height, and width from the input tensor b, seq_len, _, h, w = input_tensor.size() - + # If no current state is provided, initialize it using the init_hidden method if cur_state is None: cur_state = self.init_hidden(b, h, w) - - # Initialize output tensors for each sequence element + # Initialize the output sequence tensor with zeros output_sequence = torch.zeros((b, seq_len - 1, self.hidden_dims[-1], h, w), device=input_tensor.device) - + # Loop over each ConvLSTM cell (layer) in the model for layer_idx, cell in enumerate(self.cells): - - # Fix: Unpack hidden and cell states for the current layer + # Extract the hidden state and cell state for the current layer h, c = cur_state[layer_idx] - - # For handling the sequence of images + # Loop over each time step in the input sequence for t in range(seq_len - 1): - # Perform forward pass through the cell - h, c = cell(input_tensor[:, t, :, :, :], (h, c)) # Updated to pass tuple `(h, c)` - - if layer_idx == self.num_layers - 1: # Only store output from the last layer + # Pass the input and current state through the cell to get the next state + h, c = cell(input_tensor[:, t, :, :, :], (h, c)) + # If this is the last layer, add the hidden state to the output sequence + if layer_idx == self.num_layers - 1: output_sequence[:, t, :, :, :] = h - - # Generate the next input from alpha-blending + # If this is not the last time step, generate the next input by alpha-blending the current and next input if t != seq_len - 2: next_input = (1 - self.alpha) * input_tensor[:, t, :, :, :] + self.alpha * input_tensor[:, t + 1, :, :, :] - h, c = cell(next_input, (h, c)) # Updated to pass tuple `(h, c)` - + h, c = cell(next_input, (h, c)) + # Update the current state for this layer cur_state[layer_idx] = (h, c) - - # No need to stack since we're assigning the results in the output tensor - - # Predict an extra frame beyond the last input frame + # After processing all time steps, predict an extra frame beyond the last input frame h, c = cell(input_tensor[:, -1, :, :, :], (h, c)) output_sequence = torch.cat([output_sequence, h.unsqueeze(1)], dim=1) - - return output_sequence, cur_state \ No newline at end of file + # Return the output sequence and the final state + return output_sequence, cur_state diff --git a/Code/main.py b/Code/main.py index 37a4b13..79e7ea7 100644 --- a/Code/main.py +++ b/Code/main.py @@ -30,10 +30,9 @@ def main(): # Initialize Dataset Object (PyTorch Tensors) try: dataset = CustomDataset(grayscale_dir, rgb_dir, (image_height, image_width), batch_size) - print('Loading Dataset Completed.') + print('Importing Dataset Complete.') except Exception as e: - print(f"Loading Dataset In-Complete : \n{e}") - + print(f"Importing Dataset In-Complete : \n{e}") # Import Loss Functions try: loss_mse = LossMSE() # Mean Squared Error Loss @@ -42,19 +41,21 @@ def main(): print('Importing Loss Functions Complete.') except Exception as e: print(f"Importing Loss Functions In-Complete : \n{e}") + print('-'*20) # Makes Output Readable # Initialize AutoEncoder Model and Import Dataloader (Training, Validation) data_autoencoder_train, data_autoencoder_val = dataset.get_autoencoder_batches(val_split=0.2) - print('AutoEncoder Model Data Initialized.') + print('AutoEncoder Model Data Imported.') model_autoencoder = Grey2RGBAutoEncoder() print('AutoEncoder Model Initialized.') + print('-'*20) # Makes Output Readable # Initialize LSTM Model and Import Dataloader (Training, Validation) data_lstm_train, data_lstm_val = dataset.get_lstm_batches(val_split=0.25, sequence_length=2) - print('LSTM Model Data Initialized.') + print('LSTM Model Data Imported.') model_lstm = ConvLSTM(input_dim=1, hidden_dims=[1,1,1], kernel_size=(3, 3), num_layers=3, alpha=0.5) print('LSTM Model Initialized.') - + print('-'*20) # Makes Output Readable ''' Initialize Trainer Objects @@ -63,10 +64,11 @@ def main(): os.makedirs('../Models/Method1', exist_ok=True) # Creating Directory for Model Saving model_save_path_ae = '../Models/Method1/model_autoencoder_m1.pth' trainer_autoencoder_baseline = Trainer(model_autoencoder, loss_mse, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae) - print('Baseline AutoEncoder Trainer Initialized.') + print('Method-1 AutoEncoder Trainer Initialized.') model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth' trainer_lstm_baseline = Trainer(model_lstm, loss_mse, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm) - print('Baseline LSTM Trainer Initialized.') + print('Method-1 LSTM Trainer Initialized.') + print('-'*10) # Makes Output Readable # Method 2 : Composite Loss (MSE + MaxEnt) for AutoEncoder and Mean Squared Error Loss for LSTM os.makedirs('../Models/Method2', exist_ok=True) # Creating Directory for Model Saving @@ -74,6 +76,7 @@ def main(): trainer_autoencoder_m2 = Trainer(model=model_autoencoder, loss_function=loss_mep, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae) print('Method-2 AutoEncoder Trainer Initialized.') print('Method-2 LSTM == Method-1 LSTM') + print('-'*10) # Makes Output Readable # Method 3 : Mean Squared Error Loss for AutoEncoder and SSIM Loss for LSTM os.makedirs('../Models/Method3', exist_ok=True) # Creating Directory for Model Saving @@ -81,11 +84,14 @@ def main(): model_save_path_lstm = '../Models/Method3/model_lstm_m3.pth' trainer_lstm_m3 = Trainer(model_lstm, loss_ssim, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm) print('Method-3 LSTM Trainer Initialized.') + print('-'*10) # Makes Output Readable # Method 4 : Proposed Method : Composite Loss (MSE + MaxEnt) for AutoEncoder and SSIM Loss for LSTM print('Method-4 AutoEncoder == Method-2 AutoEncoder') print('Method-4 LSTM == Method-3 LSTM') + print('-'*20) # Makes Output Readable + ''' Train Models, Obtain Trained Model @@ -93,42 +99,54 @@ def main(): # Method-1 try: epochs = 1 - print('M1 AutoEncoder Training Start.') + print('Method-1 AutoEncoder Training Start') model_autoencoder_m1 = trainer_autoencoder_baseline.train_autoencoder(epochs, data_autoencoder_train, data_autoencoder_val) - print('M1 AutoEncoder Training Complete.') + print('Method-1 AutoEncoder Training Complete.') except Exception as e: - print(f"M1 AutoEncoder Training Error : \n{e}") + print(f"Method-1 AutoEncoder Training Error : \n{e}") traceback.print_exc() + print('-'*10) # Makes Output Readable try: epochs = 1 - print('M1 LSTM Training Start.') + print('Method-1 LSTM Training Start') model_lstm_m1 = trainer_lstm_baseline.train_lstm(epochs, data_lstm_train, data_lstm_val) - print('M1 LSTM Training Complete.') + print('Method-1 LSTM Training Complete.') except Exception as e: - print(f"M1 LSTM Training Error : \n{e}") + print(f"Method-1 LSTM Training Error : \n{e}") traceback.print_exc() + print('-'*20) # Makes Output Readable # Method-2 try: epochs = 1 - print('M2 AutoEncoder Training Start.') + print('Method-2 AutoEncoder Training Start') model_autoencoder_m2 = trainer_autoencoder_m2.train_autoencoder(epochs, data_autoencoder_train, data_autoencoder_val) - print('M2 AutoEncoder Training Complete.') + print('Method-2 AutoEncoder Training Complete.') except Exception as e: - print(f"M2 AutoEncoder Training Error : \n{e}") + print(f"Method-2 AutoEncoder Training Error : \n{e}") traceback.print_exc() - # Method-2 LSTM == Method-1 LSTM, no need to train again + print('-'*10) # Makes Output Readable + print("Method-2 LSTM == Method-1 LSTM, No Need To Train Again.") + print('-'*20) # Makes Output Readable # Method-3 + print("Method-3 AutoEncoder == Method-1 AutoEncoder, No Need To Train Again.") + print('-'*10) # Makes Output Readable try: epochs = 1 - print('M3 LSTM Training Start.') + print('Method-3 LSTM Training Start.') model_lstm_m3 = trainer_lstm_m3.train_lstm(epochs, data_lstm_train, data_lstm_val) - print('M3 LSTM Training Complete.') + print('Method-3 LSTM Training Complete.') except Exception as e: - print(f"M3 LSTM Training Error : \n{e}") + print(f"Method-3 LSTM Training Error : \n{e}") traceback.print_exc() - # Method-3 AutoEncoder == Method-1 AutoEncoder, no need to train again + print('-'*20) # Makes Output Readable + + # Method-4 + print("Method-4 AutoEncoder == Method-2 AutoEncoder, No Need To Train Again.") + print('-'*10) # Makes Output Readable + print("Method-4 LSTM == Method-3 LSTM, No Need To Train Again.") + print('-'*20) # Makes Output Readable if __name__ == '__main__': diff --git a/Code/training.py b/Code/training.py index a4057af..0639c45 100644 --- a/Code/training.py +++ b/Code/training.py @@ -10,13 +10,15 @@ # Import Necessary Libraries import torch +import torch.nn as nn # Define Training Class class Trainer(): def __init__(self, model, loss_function, optimizer=None, model_save_path=None): - # Define the device + # Use All Available CUDA GPUs for Training (if Available) self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - # Define the model and move it to the device + if torch.cuda.device_count() > 1: + model = nn.DataParallel(model) self.model = model.to(self.device) # Define the loss function self.loss_function = loss_function @@ -30,6 +32,10 @@ def save_model(self): torch.save(self.model.state_dict(), self.model_save_path) def train_autoencoder(self, epochs, train_loader, val_loader): + # Print Names of All Available GPUs (if any) to Train the Model + if torch.cuda.device_count() > 0: + gpu_names = ', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) + print("\tGPUs being used for Training : ",gpu_names) best_val_loss = float('inf') for epoch in range(epochs): self.model.train() # Set the Model to Training Mode @@ -48,7 +54,7 @@ def train_autoencoder(self, epochs, train_loader, val_loader): val_loss = sum(self.loss_function(self.model(input.to(self.device)), target.to(self.device)).item() for input, target in val_loader) # Compute Total Validation Loss val_loss /= len(val_loader) # Compute Average Validation Loss # Print epochs and losses - print(f'AutoEncoder Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}') + print(f'\tAutoEncoder Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}') # If the current validation loss is lower than the best validation loss, save the model if val_loss < best_val_loss: best_val_loss = val_loss # Update the best validation loss @@ -57,6 +63,10 @@ def train_autoencoder(self, epochs, train_loader, val_loader): return self.model def train_lstm(self, epochs, train_loader, val_loader): + # Print Names of All Available GPUs (if any) to Train the Model + if torch.cuda.device_count() > 0: + gpu_names = ', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) + print("\tGPUs being used for Training : ",gpu_names) best_val_loss = float('inf') for epoch in range(epochs): self.model.train() # Set the model to training mode @@ -78,10 +88,10 @@ def train_lstm(self, epochs, train_loader, val_loader): val_loss += self.loss_function(output_sequence, target_sequence).item() # Accumulate loss val_loss /= len(val_loader) # Average validation loss # Print epochs and losses - print(f'Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}') + print(f'\tLSTM Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}') # Model saving based on validation loss if val_loss < best_val_loss: best_val_loss = val_loss self.save_model() # Return the trained model - return self.model \ No newline at end of file + return self.model