diff --git a/data/train/__delete_me__ b/data/train/__delete_me__ new file mode 100644 index 0000000..86db4df --- /dev/null +++ b/data/train/__delete_me__ @@ -0,0 +1 @@ +Delete this file after cloning the repository \ No newline at end of file diff --git a/data/valid/__delete_me__ b/data/valid/__delete_me__ new file mode 100644 index 0000000..86db4df --- /dev/null +++ b/data/valid/__delete_me__ @@ -0,0 +1 @@ +Delete this file after cloning the repository \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/autoencoder.py b/models/autoencoder.py new file mode 100644 index 0000000..d45b8d8 --- /dev/null +++ b/models/autoencoder.py @@ -0,0 +1,26 @@ +import torch.nn as nn + + +class Autoencoder(nn.Module): + def __init__(self, input_dim, encoding_dim): + super(Autoencoder, self).__init__() + self.encoder = nn.Sequential( + nn.Linear(input_dim, 128), + nn.ReLU(), + nn.Linear(128, 64), + nn.ReLU(), + nn.Linear(64, encoding_dim) + ) + self.decoder = nn.Sequential( + nn.Linear(encoding_dim, 64), + nn.ReLU(), + nn.Linear(64, 128), + nn.ReLU(), + nn.Linear(128, input_dim), + nn.Sigmoid() + ) + + def forward(self, x): + x = self.encoder(x) + x = self.decoder(x) + return x diff --git a/run.py b/run.py new file mode 100644 index 0000000..3b00cab --- /dev/null +++ b/run.py @@ -0,0 +1,39 @@ +import os +import torch + +from models.autoencoder import Autoencoder +from utils.dataloader import get_dataloader +from utils.trainer import train_autoencoder, visualize_reconstructions, save_model, load_model, evaluate_autoencoder +from settings import settings + + +def main(load_trained_model): + BATCH_SIZE = 32 + INPUT_DIM = 3 * 64 * 64 + ENCODING_DIM = 12 + NUM_EPOCHS = 1000 + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + dataloader = get_dataloader(settings.DATA_PATH, BATCH_SIZE) + model = Autoencoder(INPUT_DIM, ENCODING_DIM).to(device) + + if load_trained_model: + trained_model = load_model(model, settings.PATH_SAVED_MODEL, device=device) + else: + trained_model = train_autoencoder(model, dataloader, NUM_EPOCHS, device=device) + + valid_dataloader = get_dataloader(settings.VALID_DATA_PATH, BATCH_SIZE) + + save_path = os.path.join('./', settings.PATH_SAVED_MODEL) + save_model(trained_model, save_path) + print(f"Model saved to {save_path}") + + avg_valid_loss = evaluate_autoencoder(trained_model, valid_dataloader, device) + print(f"Average validation loss: {avg_valid_loss:.4f}") + + visualize_reconstructions(trained_model, valid_dataloader, num_samples=10, device=device) + + +if __name__ == "__main__": + main(False) diff --git a/settings/settings.py b/settings/settings.py new file mode 100644 index 0000000..a9cdf6c --- /dev/null +++ b/settings/settings.py @@ -0,0 +1,3 @@ +DATA_PATH = './data/train' +VALID_DATA_PATH = './data/valid' +PATH_SAVED_MODEL = './autoencoder_model.pth' diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/dataloader.py b/utils/dataloader.py new file mode 100644 index 0000000..a59f513 --- /dev/null +++ b/utils/dataloader.py @@ -0,0 +1,38 @@ +import os +import torch +from torchvision import datasets, transforms +from torchvision.transforms import ToTensor, Resize, Compose +from torch.utils.data import DataLoader, Dataset +from PIL import Image + + +def get_dataloader(data_path, batch_size): + dataset = CustomDataset(data_path) + + dataloader = DataLoader( + dataset, + batch_size=batch_size, + shuffle=True + ) + + return dataloader + + +class CustomDataset(Dataset): + def __init__(self, data_path): + self.data_path = data_path + self.image_files = os.listdir(data_path) + + self.transforms = Compose([ + Resize((64, 64)), + ToTensor() + ]) + + def __len__(self): + return len(self.image_files) + + def __getitem__(self, idx): + image_path = os.path.join(self.data_path, self.image_files[idx]) + image = Image.open(image_path).convert('RGB') + image = self.transforms(image) + return image diff --git a/utils/trainer.py b/utils/trainer.py new file mode 100644 index 0000000..a6cb81c --- /dev/null +++ b/utils/trainer.py @@ -0,0 +1,76 @@ +import os +import torch +import torch.optim as optim +import torch.nn as nn +from torchvision import transforms +from torchvision.utils import save_image, make_grid +import matplotlib.pyplot as plt +from PIL import Image + + +def train_autoencoder(model, dataloader, num_epochs=5, learning_rate=0.001, device='cpu'): + criterion = nn.MSELoss() + optimizer = optim.Adam(model.parameters(), lr=learning_rate) + + for epoch in range(num_epochs): + for data in dataloader: + img = data.to(device) + img = img.view(img.size(0), -1) + output = model(img) + loss = criterion(output, img) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}') + + return model + + +def visualize_reconstructions(model, dataloader, num_samples=10, device='cpu', save_path="./samples"): + model.eval() + samples = next(iter(dataloader)) + samples = samples[:num_samples].to(device) + samples = samples.view(samples.size(0), -1) + reconstructions = model(samples) + + samples = samples.view(-1, 3, 64, 64) + reconstructions = reconstructions.view(-1, 3, 64, 64) + + # Combine as amostras e reconstruções em uma única grade + combined = torch.cat([samples, reconstructions], dim=0) + grid_img = make_grid(combined, nrow=num_samples) + + # Visualização usando Matplotlib + plt.imshow(grid_img.permute(1, 2, 0).cpu().detach().numpy()) + plt.axis('off') + plt.show() + + if not os.path.exists(save_path): + os.makedirs(save_path) + save_image(grid_img, os.path.join(save_path, 'combined_samples.png')) + + +def save_model(model, path): + torch.save(model.state_dict(), path) + + +def load_model(model, path, device): + model.load_state_dict(torch.load(path, map_location=device)) + model.eval() + return model + + +def evaluate_autoencoder(model, dataloader, device): + model.eval() + total_loss = 0 + criterion = nn.MSELoss() + with torch.no_grad(): + for data in dataloader: + img = data.to(device) + img = img.view(img.size(0), -1) + output = model(img) + loss = criterion(output, img) + total_loss += loss.item() + return total_loss / len(dataloader)