-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
193 lines (163 loc) · 8.12 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from yacs.config import CfgNode as CN
_C = CN()
# random seed number
_C.SEED = 0
# number of gpus per node. per node -> when using multiple servers
_C.NUM_GPUS = 1
_C.VISIBLE_DEVICES = 0
_C.LOG_TIME = True
_C.SHEET_NAME = 'debug' # use 'debug' for debugging #! check
_C.RUN_IDX = -1
_C.DATA_LOADER = CN()
# the number of data loading workers per gpu
_C.DATA_LOADER.NUM_WORKERS = 4
_C.DATA_LOADER.PIN_MEMORY = True
_C.DATA_LOADER.PREFETCH_FACTOR = 2
_C.DATA_LOADER.PERSISTENT_WORKERS = True
_C.DATA = CN()
_C.DATA.BASE_DIR = './data/'
_C.DATA.NAME = 'weather' #! check traffic
_C.DATA.N_VAR = 21 #! check
_C.DATA.SEQ_LEN = 96 # encoder input window length (look back window)
_C.DATA.LABEL_LEN = 48 # length entering decoder embedding (half of seq_len here) (not needed in itransformer)
_C.DATA.PRED_LEN = 96 # prediction length (mainly change this)
_C.DATA.FEATURES = 'M' # prediction target is multivariate, 'S' means input: multivariate, prediction: univariate
_C.DATA.TIMEENC = 0 # depending on how the time stamp is in the data, 0 or 1
_C.DATA.FREQ = 'h' # h or t. temporal embedding. t includes minute embedding. h includes hour, weekday, day, month embedding (not needed in iTransformer)
_C.DATA.SCALE = "standard" # initial preprocessing normalization method # standard, min-max, none
_C.DATA.TRAIN_RATIO = 0.7 # train, val, test ratio, data is split in the order of train, val, test
_C.DATA.TEST_RATIO = 0.15 # train, val, test ratio
_C.DATA.DATE_IDX = 0 # column index to drop date from raw data #! check
_C.DATA.TARGET_START_IDX = 0 # column index where prediction target starts after dropping columns (prediction variables should be at the end)
_C.TRAIN = CN()
_C.TRAIN.ENABLE = False # whether to train in main.py #! check
_C.TRAIN.SPLIT = 'train'
_C.TRAIN.BATCH_SIZE = 64 #32 #! check
_C.TRAIN.SHUFFLE = True
_C.TRAIN.DROP_LAST = True # whether to drop the last batch if the dataset length is not divisible by batch_size
# _C.TRAIN.RESUME = '' # path to checkpoint to resume training
# _C.TRAIN.CHECKPOINT_PERIOD = 200 # epoch period to save checkpoints
_C.TRAIN.EVAL_PERIOD = 1 # epoch period to evaluate on a validation set
_C.TRAIN.PRINT_FREQ = 1 # iteration frequency to print progress meter
_C.TRAIN.BEST_METRIC_INITIAL = float("inf") # initial value for best model tracking (MSE or MAE)
_C.TRAIN.BEST_LOWER = True # whether lower metric is better
_C.TRAIN.COMPONENTS_TO_DISABLE = ['momentum'] # components to disable #! check
_C.VAL = CN()
_C.VAL.SPLIT = 'val'
_C.VAL.BATCH_SIZE = _C.TRAIN.BATCH_SIZE #! check
_C.VAL.SHUFFLE = False
_C.VAL.DROP_LAST = False
_C.VAL.VIS = False
_C.TEST = CN()
_C.TEST.ENABLE = True # whether to test in main.py
_C.TEST.SPLIT = 'test'
_C.TEST.BATCH_SIZE = 1 #! check
_C.TEST.SHUFFLE = False
_C.TEST.DROP_LAST = False
_C.TEST.VIS_ERROR = True # whether to show error
_C.TEST.VIS_DATA = False # whether to show TOP, WORST data
_C.TEST.VIS_DATA_NUM = 5 # number of TOP, WORST data to show
_C.TEST.PREDICTION_ERROR_DIR = "" # directory to load prediction error, usually set to ""
_C.TEST.PREDICTION_ERROR_TYPE = "MAE" # MAE, MSE
_C.TEST.APPLY_MOVING_AVERAGE = False # apply moving average if True, for observing changes over time (related to TTA check)
_C.TEST.MOVING_AVERAGE_WINDOW = 100
_C.MODEL_NAME = 'DLinear_momentum' #! check 'iTransformer_momentum4'
_C.MODEL = CN()
_C.MODEL.task_name = 'long_term_forecast'
_C.MODEL.seq_len = _C.DATA.SEQ_LEN
_C.MODEL.label_len = _C.DATA.LABEL_LEN # not needed in iTransformer
_C.MODEL.pred_len = _C.DATA.PRED_LEN
_C.MODEL.e_layers = 2
_C.MODEL.d_layers = 1 # not needed in iTransformer
_C.MODEL.factor = 1 # was 3, used in Prob Attention (probabilistic attention) in informer
_C.MODEL.num_kernels = 6 # for Inception
_C.MODEL.enc_in = _C.DATA.N_VAR
_C.MODEL.dec_in = _C.DATA.N_VAR # not needed in iTransformer
_C.MODEL.c_out = _C.DATA.N_VAR # not needed in iTransformer
_C.MODEL.d_model = 512 # embedding dimension
_C.MODEL.d_ff = 2048 # feedforward dimension d_model -> d_ff -> d_model
_C.MODEL.moving_avg = 25 # window size of moving average, seems to be used in autoformer
_C.MODEL.output_attention = False # whether the attention weights are returned by the forward method of the attention class
_C.MODEL.dropout = 0.1 #0.1
_C.MODEL.n_heads = 8
_C.MODEL.activation = 'gelu'
_C.MODEL.METRIC_NAMES = ('MAE',)
_C.MODEL.LOSS_NAMES = ('MSE',)
_C.MODEL.FT_LOSS_NAMES = ('REG',)
# positional embedding is the position within the window
# temporal embedding is the information about time
_C.MODEL.embed = 'timeF' # not needed in iTransformer
_C.MODEL.freq = 'h' # not needed in iTransformer
_C.MODEL.TRAIN_COMPONENTS_TO_DISABLE = _C.TRAIN.COMPONENTS_TO_DISABLE
_C.MODEL.momentum_params = [0.9, 0.99, 0.999] #TODO momentum parameter
_C.SOLVER = CN()
_C.SOLVER.START_EPOCH = 0
_C.SOLVER.MAX_EPOCH = 10
_C.SOLVER.OPTIMIZING_METHOD = ['adamW']
_C.SOLVER.BASE_LR = [0.001] # warmup end learning rate
_C.SOLVER.WEIGHT_DECAY = [0.01] #[0.01]
_C.SOLVER.LR_POLICY = ['cosine'] # if removed, it returns to base_lr. for warmup, set this to cosine #cosine / decay
_C.SOLVER.COSINE_END_LR = [0.0]
_C.SOLVER.WARMUP_EPOCHS = [0.2] # linear warmup epoch
_C.SOLVER.WARMUP_START_LR = [0] # warmup start learning rate
_C.SOLVER.LR_DECAY_STEP = [1]
_C.SOLVER.LR_DECAY_RATE = [0.9]
# directory to save result txt file
_C.RESULT_DIR = None
_C.TRAIN.CHECKPOINT_DIR = None # directory to save checkpoints
_C.TRAIN.FINETUNE = CN()
_C.TRAIN.FINETUNE.ENABLE = True # whether to finetune in main.py #! check
_C.TRAIN.FINETUNE.RESUME_DIR = [0,f'00000-000000'] # checkpoint to resume training #! check (seed / time)
_C.TRAIN.FINETUNE.SPLIT = 'train'
_C.TRAIN.FINETUNE.BATCH_SIZE = 1 #! check
_C.TRAIN.FINETUNE.ACCUM_SIZE = 64 #! This is loss accumulation emulating batched training
_C.TRAIN.FINETUNE.BPTT = True
_C.TRAIN.FINETUNE.LEARN_MOMENTUM = False
_C.TRAIN.FINETUNE.REGULARIZATION = None#'ewc'
_C.TRAIN.FINETUNE.REG_COEFF = 1.
_C.TRAIN.FINETUNE.SHUFFLE = False
_C.TRAIN.FINETUNE.DROP_LAST = False # whether to drop the last batch if the dataset length is not divisible by batch_size
_C.TRAIN.FINETUNE.CHECKPOINT_DIR = None#'./results/' # directory to save checkpoints
_C.TRAIN.FINETUNE.EVAL_PERIOD = 1 # epoch period to evaluate on a validation set
_C.TRAIN.FINETUNE.PRINT_FREQ = 100 # iteration frequency to print progress meter
_C.TRAIN.FINETUNE.COMPONENTS_TO_UPDATE_A = ['learnable_matrix'] # components to update parameters #! check
_C.TRAIN.FINETUNE.COMPONENTS_TO_UPDATE_B = ['momentum_params_learnable'] # components to update parameters #! check
_C.TRAIN.FINETUNE.COMPONENTS_TO_DISABLE = [] # components to disable #! check
_C.MODEL.FINETUNE_COMPONENTS_TO_DISABLE = _C.TRAIN.FINETUNE.COMPONENTS_TO_DISABLE
_C.SOLVER_FT = CN()
_C.SOLVER_FT.START_EPOCH = 0
_C.SOLVER_FT.MAX_EPOCH = 1
_C.SOLVER_FT.OPTIMIZING_METHOD = ['adamW', 'adamW', 'adamW']
_C.SOLVER_FT.BASE_LR = [0.01 * 0.001, 0.01, 0.01 * 0.001, ] # warmup end learning rate
_C.SOLVER_FT.WEIGHT_DECAY = [0.01 * 0.01, 0.0, 0.0]
_C.SOLVER_FT.WARM_UP = 1000
# parameters for sgd only
# _C.SOLVER_FT.MOMENTUM = [0, 0]
# _C.SOLVER_FT.DAMPENING = [0.0, 0.0] # reduce the momentum's effect at the end of training. worth considering
# _C.SOLVER_FT.NESTEROV = [True, True]
_C.SOLVER_FT.LR_POLICY = None # if removed, it returns to base_lr. for warmup, set this to cosine
_C.SOLVER_FT.COSINE_END_LR = [0.0, 0.0, 0.0]
_C.SOLVER_FT.WARMUP_EPOCHS = [0, 0, 0] # linear warmup epoch
_C.SOLVER_FT.WARMUP_START_LR = [0, 0, 0] # warmup start learning rate
_C.SOLVER_FT.LR_DECAY_STEP = [1, 1, 1]
_C.SOLVER_FT.LR_DECAY_RATE = [0.9, 0.9, 0.9]
_C.VAL.FINETUNE = CN()
_C.VAL.FINETUNE.SPLIT = 'val'
_C.VAL.FINETUNE.BATCH_SIZE = 1
_C.VAL.FINETUNE.SHUFFLE = False
_C.VAL.FINETUNE.DROP_LAST = False
_C.VAL.FINETUNE.CUT_FRONT = 2
#! Currently, our momentum code uses multiple optimizers, but wandb implementation is not yet modified to support it
_C.WANDB = CN()
_C.WANDB.ENABLE = False # wandb on/off #! check
_C.WANDB.PROJECT = 'Momentum'
_C.WANDB.NAME = '' #! check
_C.WANDB.JOB_TYPE = 'train' # train or eval
_C.WANDB.NOTES = '' # a description of this run
_C.WANDB.DIR = './'
_C.WANDB.VIS_TRAIN_SCORE = False
_C.WANDB.VIS_TEST_SCORE = False
_C.WANDB.VIS_TEST_DATA = False # visualize raw data
_C.WANDB.VIS_TRAIN_TEST_HISTOGRAM = False
def get_cfg_defaults():
return _C.clone()