-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcfg.py
60 lines (53 loc) · 1.81 KB
/
cfg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
# Training room simulation parameters
# room dimensions in meters
# Room layout
room_dim_train = [5.0, 5.0, 2.0]
xyz_min_train = [0.5, 0.5, 0.25]
xyz_max_train = [4.5, 4.5, 1.75]
# currently, the code only supports training and testing in the same room
room_dim_test = room_dim_train
xyz_min_test = xyz_min_train
xyz_max_test = xyz_max_train
num_mics = 7 # 6 mics + 1 source
# accuracy threshold in meters
t = 0.3
# Training hyperparams
seed = 42
batch_size = 64
epochs = 30
warmup_epochs = 5
lr = 0.001 # learning rate
wd = 0.1 # weight decay
lam_audio = 0.1 # 1. / (4 + 1) # should be in the range [0, 1]
lam_locs = 1.0
lam_tdoas = 0.1
# Model parameters
use_ngcc = 'pre-trained'
ngcc_path = 'experiments/ngcc_anechoic/model.pth'
patch_size = 2048
embed_dim = 256
depth = 4
num_heads = 4
decoder_embed_dim = 256
decoder_depth = 4
decoder_num_heads = 4
norm_pix_loss = True
drop = 0.0
attn_drop = 0.0
all_patch_loss = False
n_mic_keep = [5, 6] # keep [min, max] many microphones when doing random masking
n_audio_keep = [6, 6] # keep [min, max] many audio patches when doing random masking
toa_prob = 0.5 # probability of including source audio (time-of-arrival problem)
# training environment
anechoic_prob = 1.0 # the probability of anechoic room in each simulation
t60 = [0.3, 0.4] # during training, t60 will be drawn uniformly from this interval [seconds]
snr_interval = [0, 30] # during training, random noise is added with snr sampled in this interval [dB]
in_fs = 16000 # sampling rate of input data
out_fs = 16000 # sampling rate used in model
max_tau = np.sqrt(np.sum(np.array(room_dim_train)**2)) / 343 * out_fs
sig_len = int(2048) # length of snippet used for tdoa estimation
random_shift = True
remove_silence = True # remove silent parts of audio before simulation
repeats = 10
random_masking = 'random'