-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_loading.py
103 lines (78 loc) · 2.58 KB
/
data_loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
data_loading.py
(0) MinMaxScaler: Min Max normalizer
(1) sine_data_generation: Generate sine dataset
(2) real_data_loading: Load and preprocess real data
- stock_data: https://finance.yahoo.com/quote/GOOG/history?p=GOOG
- energy_data: http://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
"""
## Necessary Packages
import numpy as np
import pandas as pd
import pickle
def MinMaxScaler(data):
"""Min Max normalizer.
Args:
- data: original data
Returns:
- norm_data: normalized data
"""
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
norm_data = numerator / (denominator + 1e-7)
return norm_data
def sine_data_generation (no, seq_len, dim):
"""Sine data generation.
Args:
- no: the number of samples
- seq_len: sequence length of the time-series
- dim: feature dimensions
Returns:
- data: generated data
"""
# Initialize the output
data = list()
# Generate sine data
for i in range(no):
# Initialize each time-series
temp = list()
# For each feature
for k in range(dim):
# Randomly drawn frequency and phase
freq = np.random.uniform(0, 0.1)
phase = np.random.uniform(0, 0.1)
# Generate sine signal based on the drawn frequency and phase
temp_data = [np.sin(freq * j + phase) for j in range(seq_len)]
temp.append(temp_data)
# Align row/column
temp = np.transpose(np.asarray(temp))
# Normalize to [0,1]
temp = (temp + 1)*0.5
# Stack the generated data
data.append(temp)
return data
from sklearn.preprocessing import MinMaxScaler
def real_data_loading(data_name, seq_len):
"""Load and preprocess real-world datasets.
Args:
- data_name: stock or energy
- seq_len: sequence length
Returns:
- data: preprocessed data.
"""
assert data_name in ['stock', 'energy']
if data_name == 'stock':
ori_data = np.loadtxt('data/stock_data.csv', delimiter=",", skiprows=1)
elif data_name == 'energy':
ori_data = np.loadtxt('data/energy_data.csv', delimiter=",", skiprows=1)
data = []
ori_data = ori_data[::-1] # Reverse for chronological order
ori_data = MinMaxScaler().fit_transform(ori_data)
temp_data = []
for i in range(0, len(ori_data) - seq_len):
_x = ori_data[i:i + seq_len]
temp_data.append(_x)
idx = np.random.permutation(len(temp_data))
for i in range(len(temp_data)):
data.append(temp_data[idx[i]])
return data