-
Notifications
You must be signed in to change notification settings - Fork 2
train_family_classifier.py
michele edited this page Oct 13, 2021
·
2 revisions
-
import configparser
- implements a basic configuration language for Python programs - configparser documentation -
import json
- json encoder and decoder - json documentation -
import os
- provides a portable way of using operating system dependent functionality - os documentation -
import sys
- system-specific parameters and functions - sys documentation -
import time
- provides various time-related functions - time documentation -
from copy import deepcopy
- creates a new object and recursively copies the original object elements - copy documentation
-
import baker
- easy, powerful access to Python functions from the command line - baker documentation -
import mlflow
- open source platform for managing the end-to-end machine learning lifecycle - mlflow documentation -
import numpy as np
- the fundamental package for scientific computing with Python - numpy documentation -
import psutil
- used for retrieving information on running processes and system utilization - psutil documentation -
import torch
- tensor library like NumPy, with strong GPU support - pytorch documentation -
from logzero import logger
- robust and effective logging for Python - logzero documentation -
from torch.optim.lr_scheduler import MultiStepLR
- used to import the pytorch multi step learning rate scheduler - torch.optim.lr_scheduler.MultiStepLR documentation
from nets.Family_Classifier_net import Net as Family_Net
from nets.generators.fresh_generators import get_generator
train_network(fresh_ds_path, checkpoint_path, training_run, epochs, train_split_proportion, valid_split_proportion, test_split_proportion, batch_size, random_seed, workers)
(function, baker command) - Train a family classifier model on the fresh dataset for the malware family classification task.
-
fresh_ds_path
(arg) - Path of the directory where to find the fresh dataset (containing .dat files) -
checkpoint_path
(arg) - Path to the model checkpoint to load (default: 'None') -
training_run
(arg) - Training run identifier (default: 0) -
epochs
(arg) - How many epochs to train for (default: 25) -
train_split_proportion
(arg) - Train subsplit proportion value (default: 7) -
valid_split_proportion
(arg) - Valid subsplit proportion value (default: 1) -
test_split_proportion
(arg) - Test subsplit proportion value (default: 2) -
batch_size
(arg) - How many samples per batch to load (default: 250) -
random_seed
(arg) - If provided, seed random number generation with this value (default: None -> no seeding) -
workers
(arg) - How many workers (threads) should the dataloader use (default: 0 -> use multiprocessing.cpu_count())
__main__
(main) - Start baker in order to make it possible to run the script and use function names and parameters as the command line interface, using optparse
-style options
root/ | ├── src/ | | | ├── FreshDatasetBuilder/ | | | | | ├── emberFeatures/ | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── features.py - - - - - - - - - - - - - - - (features python code 📖Wiki) | | | └── vectorize_features.py - - - - - - - - - - (vectorize features python code 📖Wiki) | | | | | ├── utils/ | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── fresh_dataset_utils.py - - - - - - - - - - (fresh dataset utils python code 📖Wiki) | | | └── malware_bazaar_api.py - - - - - - - - - - (malware bazaar API python code 📖Wiki) | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | └── build_fresh_dataset.py - - - - - - - - - - (fresh dataset builder python code 📖Wiki) | | | ├── Model/ | | | | | ├── nets/ | | | | | | | ├── generators/ | | | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | | ├── dataset.py - - - - - - - - - - - - - - - - (dataset (base) code 📖Wiki) | | | | ├── dataset_alt.py - - - - - - - - - - - - - - (dataset_alt code 📖Wiki) | | | | ├── fresh_dataset.py - - - - - - - - - - - - - (fresh_dataset code 📖Wiki) | | | | ├── fresh_generators.py - - - - - - - - - - - (fresh_generators code 📖Wiki) | | | | ├── generators.py - - - - - - - - - - - - - - (generators (base) code 📖Wiki) | | | | ├── generators_alt1.py - - - - - - - - - - - - (generators_alt1 code 📖Wiki) | | | | ├── generators_alt2.py - - - - - - - - - - - - (generators_alt2 code 📖Wiki) | | | | └── generators_alt3.py - - - - - - - - - - - - (generators_alt3 code 📖Wiki) | | | | | | | ├── utils/ | | | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | | └── Net.py - - - - - - - - - - - - - - - - - - (Net code 📖Wiki) | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── ALOHA_net.py - - - - - - - - - - - - - - - (ALOHA_net code 📖Wiki) | | | ├── Contrastive_Model_net.py - - - - - - - - - (Contrastive_Model_net code 📖Wiki) | | | ├── Family_Classifier_net.py - - - - - - - - - (Family_Classifier_net code 📖Wiki) | | | ├── MTJE_net.py - - - - - - - - - - - - - - - (MTJE_net code 📖Wiki) | | | ├── MTJE_net_cosine.py - - - - - - - - - - - - (MTJE_net_cosine code 📖Wiki) | | | └── MTJE_net_pairwise_distance.py - - - - - - (MTJE_net_pairwise_distance code 📖Wiki) | | | | | ├── utils/ | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── contrastive_utils.py - - - - - - - - - - - (contrastive_utils code 📖Wiki) | | | ├── opt_utils.py - - - - - - - - - - - - - - - (opt_utils code 📖Wiki) | | | ├── plot_utils.py - - - - - - - - - - - - - - (plot_utils code 📖Wiki) | | | └── ranking_metrics.py - - - - - - - - - - - - (ranking_metrics code 📖Wiki) | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | ├── evaluate.py - - - - - - - - - - - - - - - (evaluate code 📖Wiki) | | ├── evaluate_contrastive.py - - - - - - - - - (evaluate_contrastive code 📖Wiki) | | ├── evaluate_family_classifier.py - - - - - - (evaluate_family_classifier code 📖Wiki) | | ├── evaluate_fresh.py - - - - - - - - - - - - (evaluate_fresh code 📖Wiki) | | ├── gen3_speed_evaluation.py - - - - - - - - - (gen3_speed_evaluation code 📖Wiki) | | ├── plot.py - - - - - - - - - - - - - - - - - (plot code 📖Wiki) | | ├── plot_contrastive.py - - - - - - - - - - - (plot_contrastive code 📖Wiki) | | ├── plot_family_classifier.py - - - - - - - - (plot_family_classifier code 📖Wiki) | | ├── plot_fresh.py - - - - - - - - - - - - - - (plot_fresh code 📖Wiki) | | ├── train.py - - - - - - - - - - - - - - - - - (train code 📖Wiki) | | ├── train_contrastive.py - - - - - - - - - - - (train_contrastive code 📖Wiki) | | └── train_family_classifier.py - - - - - - - - (train_family_classifier code 📖Wiki) | | | ├── Sorel20mDataset/ | | | | | ├── generators/ | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── sorel_dataset.py - - - - - - - - - - - - - (sorel_dataset code 📖Wiki) | | | └── sorel_generators.py - - - - - - - - - - - (sorel_generators code 📖Wiki) | | | | | ├── utils/ | | | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | | ├── download_utils.py - - - - - - - - - - - - (download_utils code 📖Wiki) | | | └── preproc_utils.py - - - - - - - - - - - - - (preproc_utils code 📖Wiki) | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | ├── preprocess_dataset.py - - - - - - - - - - (preprocess_dataset code 📖Wiki) | | ├── preprocess_ds_multi.py - - - - - - - - - - (preprocess_ds_multi code 📖Wiki) | | └── sorel20mDownloader.py - - - - - - - - - - (sorel20mDownloader code 📖Wiki) | | | ├── utils/ | | | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | | └── workflow_utils.py - - - - - - - - - - - - - - - - - (workflow_utils code 📖Wiki) | | | ├── __init__.py - - - - - - - - - - - - - - - (python module init) | ├── config.ini - - - - - - - - - - - - - - - - (configuration file 📖Wiki) | └── main.py - - - - - - - - - - - - - - - - - (main code 📖Wiki) | ├── MLproject - - - - - - - - - - - - - - - - (MLproject file) ├── README.md - - - - - - - - - - - - - - - - (README) └── conda.yaml - - - - - - - - - - - - - - - - (conda yaml environment)