-
Notifications
You must be signed in to change notification settings - Fork 77
/
btp_dataset.py
55 lines (46 loc) · 2.34 KB
/
btp_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class BtpDataset(Dataset):
"""Btp time series dataset."""
def __init__(self, csv_file, normalize=True):
"""
Args:
csv_file (string): path to csv file
normalize (bool): whether to normalize the data in [-1,1]
"""
df = pd.read_csv(csv_file, sep=";")
df['Timestamp'] = pd.to_datetime(df["data_column"].map(str) + " " + df["orario_column"], dayfirst=True)
df = df.drop(['data_column', 'orario_column'], axis=1).set_index("Timestamp")
btp_price = df.BTP_Price
data = torch.from_numpy(np.expand_dims(np.array([group[1] for group in btp_price.groupby(df.index.date)]), -1)).float()
self.data = self.normalize(data) if normalize else data
self.seq_len = data.size(1)
#Estimates distribution parameters of deltas (Gaussian) from normalized data
original_deltas = data[:, -1] - data[:, 0]
self.original_deltas = original_deltas
self.or_delta_max, self.or_delta_min = original_deltas.max(), original_deltas.min()
deltas = self.data[:, -1] - self.data[:, 0]
self.deltas = deltas
self.delta_mean, self.delta_std = deltas.mean(), deltas.std()
self.delta_max, self.delta_min = deltas.max(), deltas.min()
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def normalize(self, x):
"""Normalize input in [-1,1] range, saving statics for denormalization"""
self.max = x.max()
self.min = x.min()
return (2 * (x - x.min())/(x.max() - x.min()) - 1)
def denormalize(self, x):
"""Revert [-1,1] normalization"""
if not hasattr(self, 'max') or not hasattr(self, 'min'):
raise Exception("You are calling denormalize, but the input was not normalized")
return 0.5 * (x*self.max - x*self.min + self.max + self.min)
def sample_deltas(self, number):
"""Sample a vector of (number) deltas from the fitted Gaussian"""
return (torch.randn(number, 1) + self.delta_mean) * self.delta_std
def normalize_deltas(self, x):
return ((self.delta_max - self.delta_min) * (x - self.or_delta_min)/(self.or_delta_max - self.or_delta_min) + self.delta_min)