Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fine Tuning the emotion2vec model #39

Open
buanide opened this issue Jul 24, 2024 · 4 comments
Open

Fine Tuning the emotion2vec model #39

buanide opened this issue Jul 24, 2024 · 4 comments

Comments

@buanide
Copy link

buanide commented Jul 24, 2024

How can I fine tune the emotion2vec+large model on another dataset without using the process that you have used for iemocap?

I have tried to use four features and your bash script train.sh but I got this error:

File "C:\Users\doki_engbu\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 122, in spawn_main
exitcode = _main(fd, parent_sentinel)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\doki_engbu\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 132, in _main
self = reduction.pickle.load(from_parent)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_pickle.UnpicklingError: pickle data was truncated

.

@ddlBoJack
Copy link
Owner

We plan to support fine-tuning but no specific ETA. If you mean training a downstream model with features from emotion2vec+ large, you can just extract the features and train the model.

@buanide
Copy link
Author

buanide commented Aug 4, 2024

Thank you four your answer. What class should I instantiate to train the model emontion2vec+large ?

@buanide
Copy link
Author

buanide commented Aug 5, 2024

Hi, I have tried to use the same process as you have done with iemocap but with Emodb. I got very bad results : Average WA: 34.2156862745098%; UA: 28.42471764346764%; F1: 29.36723608865055% . I have extracted features like this:

python extract_features.py --data . --model /home/stage2024/app/codes/models/emotion2vec/upstream --split file_paths --checkpoint /home/stage2024/app/codes/models/emotion2vec_base/emotion2vec_base.pt --save-dir . --layer 11

Can you have a look on my code please ?

I have done a cross validatin 80%,10%,10% as recommanded in the paper :


import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import KFold
import numpy as np
import os
from pathlib import Path
import logging
import hydra
from data import SpeechDataset
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, random_split
import sys
parent_dir = os.path.abspath(os.path.join(os.path.dirname(file), '../../../../'))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)

Assuming these imports exist based on your provided code

from data import load_ssl_features
from model import BaseModel
from utils import train_one_epoch, validate_and_test
from codes.process import labels_num_EMODB
from omegaconf import DictConfig
from codes.process import load_EmoDB

#logger = logging.getLogger(name)

def extract_features_and_labels(indices,feats,sizes,offsets,labels):
feats_list = []
sizes_list = []
offsets_list = []
labels_list = []

    for idx in indices:
        start = offsets[idx]
        end = start + sizes[idx]
        feats_list.append(feats[start:end, :])
        sizes_list.append(sizes[idx])
        offsets_list.append(0 if len(offsets_list) == 0 else offsets_list[-1] + sizes_list[-2])
        labels_list.append(labels[idx])
    
    feats = np.concatenate(feats_list, axis=0)
    sizes = np.array(sizes_list)
    offsets = np.array(offsets_list)
    labels = labels_list
    
    return feats, sizes, offsets, labels

def create_data_loaders(dataset, train_indices, test_indices,val_indices,batch_size):
feats = dataset["feats"]
sizes = dataset["sizes"]
offsets = dataset["offsets"]
labels = dataset["labels"]

train_feats, train_sizes, train_offsets, train_labels = extract_features_and_labels(train_indices,feats,sizes,offsets,labels)
train_dataset = SpeechDataset(feats=train_feats, sizes=train_sizes, offsets=train_offsets, labels=train_labels)


# Extracting validation dataset features
val_feats, val_sizes, val_offsets, val_labels = extract_features_and_labels(val_indices,feats,sizes,offsets,labels)
val_dataset = SpeechDataset(feats=val_feats, sizes=val_sizes, offsets=val_offsets, labels=val_labels)

# Extracting test dataset features
test_feats, test_sizes, test_offsets, test_labels = extract_features_and_labels(test_indices,feats,sizes,offsets,labels)
test_dataset = SpeechDataset(feats=test_feats, sizes=test_sizes, offsets=test_offsets, labels=test_labels)


# Creating DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collator, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=val_dataset.collator, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=test_dataset.collator, shuffle=False)

return train_loader, val_loader, test_loader

@hydra.main(config_path='config', config_name='default.yaml')

def train_data(cfg: DictConfig):

#print(cfg.common.seed)


#torch.manual_seed(cfg.common.seed)


label_dict = {
"anger":0,
"happiness":1,
"sadness":2,
"neutral":3

}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
data_EMODB=load_EmoDB(cfg.dataset.database_path)
dataset = load_ssl_features(data_EMODB,cfg.dataset.feat_path,cfg.dataset.feat_length, label_dict)


kfold = KFold(n_splits=10, shuffle=True, random_state=cfg.common.seed)

test_wa_avg, test_ua_avg, test_f1_avg = 0., 0., 0.


for fold, (train_indices, test_indices) in enumerate(kfold.split(np.arange(len(dataset["labels"])))):
    print(f"------Now it's {fold+1}th fold------")

    train_indices_fold, val_indices = train_test_split(train_indices, test_size=0.111, random_state=cfg.common.seed) 
    train_loader, val_loader, test_loader = create_data_loaders(dataset, train_indices_fold, test_indices,val_indices, cfg.dataset.batch_size)
  

    model = BaseModel(input_dim=768, output_dim=len(label_dict))
    model = model.to(device)
    
    optimizer = optim.RMSprop(model.parameters(), lr=cfg.optimization.lr, momentum=0.9)
    scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=cfg.optimization.lr, max_lr=1e-3, step_size_up=10)
    criterion = nn.CrossEntropyLoss()
    
    best_val_wa = 0
    best_val_wa_epoch = 0
    
    save_dir = os.path.join(str(Path.cwd()), f"model_{fold+1}.pth")
    for epoch in range(cfg.optimization.epoch):  # Adjust the number of epochs as per your requirement
        train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device)
        scheduler.step()
        
        val_wa, val_ua, val_f1 = validate_and_test(model, val_loader, device, num_classes=len(label_dict))
        
        if val_wa > best_val_wa:
            best_val_wa = val_wa
            best_val_wa_epoch = epoch
            torch.save(model.state_dict(), save_dir)
        
        print(f"Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.6f}, Validation WA: {val_wa:.2f}%; UA: {val_ua:.2f}%; F1: {val_f1:.2f}%")
    
    ckpt = torch.load(save_dir)
    model.load_state_dict(ckpt, strict=True)
    test_wa, test_ua, test_f1 = validate_and_test(model, test_loader, device, num_classes=len(label_dict))
    print(f"The {fold+1}th Fold at epoch {best_val_wa_epoch + 1}, test WA {test_wa}%; UA {test_ua}%; F1 {test_f1}%")
    
    test_wa_avg += test_wa
    test_ua_avg += test_ua
    test_f1_avg += test_f1

print(f"Average WA: {test_wa_avg/10}%; UA: {test_ua_avg/10}%; F1: {test_f1_avg/10}%")

if name == "main":
train_data()


@ddlBoJack
Copy link
Owner

Maybe using emotion2vec_base is a better way for feature representation, rather than the emotion2vec_plus series.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants