Skip to content

Commit

Permalink
[ADD] stratify on the train / val split
Browse files Browse the repository at this point in the history
  • Loading branch information
BenCretois committed Mar 4, 2024
1 parent de13bca commit bdf162e
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 71 deletions.
2 changes: 2 additions & 0 deletions dcase_fine_tune/CONFIG.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ trainer:
num_workers: 4
patience: 20
min_sample_per_category: 10
test_size: 0.2

model:
lr: 1.0e-05
ft_entire_network: True
num_target_classes: 2
model_path: "/data/models/BEATs/BEATs_iter3_plus_AS2M.pt"
specaugment_params: null
Expand Down
6 changes: 4 additions & 2 deletions dcase_fine_tune/FTBeats.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def _build_model(self):
self.beats.load_state_dict(self.checkpoint["model"])

# 2. Classifier
self.fc = nn.Linear(self.cfg.encoder_embed_dim, self.cfg.predictor_class)
print(f"Classifier has {self.num_target_classes} output neurons")
self.fc = nn.Linear(self.cfg.encoder_embed_dim, self.num_target_classes)

def extract_features(self, x, padding_mask=None):
if padding_mask != None:
Expand All @@ -81,7 +82,7 @@ def forward(self, x, padding_mask=None):
# Get the logits
x = self.fc(x)

# Mean pool the second layer
# Mean pool the second dimension (these are the tokens)
x = x.mean(dim=1)

return x
Expand All @@ -99,6 +100,7 @@ def training_step(self, batch, batch_idx):
train_loss = self.loss(y_probs, y_true)

# 3. Compute accuracy:
self.log("train_loss", train_loss, prog_bar=True)
self.log("train_acc", self.train_acc(y_probs, y_true), prog_bar=True)

return train_loss
Expand Down
120 changes: 55 additions & 65 deletions dcase_fine_tune/FTDataModule.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,14 @@
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning import LightningDataModule
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import numpy as np

from torch.utils.data import WeightedRandomSampler


class AudioDatasetDCASE(Dataset):
def __init__(
self,
data_frame,
label_dict=None,
):
self.data_frame = data_frame
self.label_encoder = LabelEncoder()
if label_dict is not None:
self.label_encoder.fit(list(label_dict.keys()))
self.label_dict = label_dict
else:
self.label_encoder.fit(self.data_frame["category"])
self.label_dict = dict(
zip(
self.label_encoder.classes_,
self.label_encoder.transform(self.label_encoder.classes_),
)
)

def __len__(self):
return len(self.data_frame)

def get_labels(self):
labels = []

for i in range(0, len(self.data_frame)):
label = self.data_frame.iloc[i]["category"]
label = self.label_encoder.transform([label])[0]
labels.append(label)

return labels

def __getitem__(self, idx):
input_feature = torch.Tensor(self.data_frame.iloc[idx]["feature"])
label = self.data_frame.iloc[idx]["category"]

# Encode label as integer
label = self.label_encoder.transform([label])[0]

return input_feature, label

def get_label_dict(self):
return self.label_dict

class AudioDatasetDCASEV2(Dataset):
class TrainAudioDatasetDCASE(Dataset):
def __init__(
self,
data_frame,
Expand Down Expand Up @@ -98,28 +52,16 @@ def __init__(
self.test_size = test_size
self.min_sample_per_category = min_sample_per_category

self.label_encoder = LabelEncoder()
self.label_encoder.fit(self.data_frame["category"])
self.label_dict = dict(
zip(
self.label_encoder.classes_,
self.label_encoder.transform(self.label_encoder.classes_),
)
)

self.setup()
self.divide_train_val()

def setup(self, stage=None):
# load data
self.data_frame["category"] = self.label_encoder.fit_transform(self.data_frame["category"])
self.complete_dataset = AudioDatasetDCASEV2(data_frame=self.data_frame)
self.data_frame["category"] = LabelEncoder().fit_transform(self.data_frame["category"])
self.complete_dataset = TrainAudioDatasetDCASE(data_frame=self.data_frame)

def divide_train_val(self):

value_counts = self.data_frame["category"].value_counts()
self.num_target_classes = len(self.data_frame["category"].unique())

# Separate into training and validation set
train_indices, validation_indices, _, _ = train_test_split(
range(len(self.complete_dataset)),
Expand All @@ -132,22 +74,23 @@ def divide_train_val(self):
data_frame_train = self.data_frame.loc[train_indices]
data_frame_train.reset_index(drop=True, inplace=True)

# deal with class imbalance
# deal with class imbalance in the training set
value_counts = data_frame_train["category"].value_counts()
weight = 1. / value_counts
samples_weight = np.array([weight[t] for t in data_frame_train["category"]])
samples_weight = torch.from_numpy(samples_weight)
samples_weight = samples_weight.double()
self.sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

# Make the validation set
data_frame_validation = self.data_frame.loc[validation_indices]
data_frame_validation.reset_index(drop=True, inplace=True)

# generate subset based on indices
self.train_set = AudioDatasetDCASEV2(
self.train_set = TrainAudioDatasetDCASE(
data_frame=data_frame_train,
)
self.val_set = AudioDatasetDCASEV2(
self.val_set = TrainAudioDatasetDCASE(
data_frame=data_frame_validation,
)

Expand Down Expand Up @@ -195,6 +138,53 @@ def collate_fn(
return (all_images, all_labels)




class AudioDatasetDCASE(Dataset):
def __init__(
self,
data_frame,
label_dict=None,
):
self.data_frame = data_frame
self.label_encoder = LabelEncoder()
if label_dict is not None:
self.label_encoder.fit(list(label_dict.keys()))
self.label_dict = label_dict
else:
self.label_encoder.fit(self.data_frame["category"])
self.label_dict = dict(
zip(
self.label_encoder.classes_,
self.label_encoder.transform(self.label_encoder.classes_),
)
)

def __len__(self):
return len(self.data_frame)

def get_labels(self):
labels = []

for i in range(0, len(self.data_frame)):
label = self.data_frame.iloc[i]["category"]
label = self.label_encoder.transform([label])[0]
labels.append(label)

return labels

def __getitem__(self, idx):
input_feature = torch.Tensor(self.data_frame.iloc[idx]["feature"])
label = self.data_frame.iloc[idx]["category"]

# Encode label as integer
label = self.label_encoder.transform([label])[0]

return input_feature, label

def get_label_dict(self):
return self.label_dict

class predictLoader():
def __init__(
self,
Expand Down
8 changes: 4 additions & 4 deletions dcase_fine_tune/FTtrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def train_model(
auto_select_gpus=True,
callbacks=[
pl.callbacks.LearningRateMonitor(logging_interval="step"),
pl.callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=patience),
pl.callbacks.EarlyStopping(monitor="train_loss", mode="min", patience=patience),
],
default_root_dir=root_dir,
enable_checkpointing=True
Expand Down Expand Up @@ -81,16 +81,16 @@ def main(cfg: DictConfig):
batch_size=cfg["trainer"]["batch_size"],
num_workers=cfg["trainer"]["num_workers"],
tensor_length=cfg["data"]["tensor_length"],
test_size=0.2,
test_size=cfg["trainer"]["test_size"],
min_sample_per_category=cfg["trainer"]["min_sample_per_category"])

# create the model object
num_target_classes = len(df["category"].unique())
print(num_target_classes)

model = BEATsTransferLearningModel(model_path=cfg["model"]["model_path"],
num_target_classes=num_target_classes,
lr=cfg["model"]["lr"])
lr=cfg["model"]["lr"],
ft_entire_network=cfg["model"]["ft_entire_network"])

train_model(model,
Loader,
Expand Down

0 comments on commit bdf162e

Please sign in to comment.