-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathparams.py
63 lines (51 loc) · 1.21 KB
/
params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import tensorflow as tf
# PARAMETERS
# Audio Stuff
SAMPLE_RATE = 16000
N_FFT = 1024
HOP_LENGTH = 320
WIN_LENGTH = 640 # (20 ms)
N_MEL_CHANNELS = 128
MEL_FMIN = 0.0
MEL_FMAX = int(SAMPLE_RATE // 2)
CLIP_VALUE_MIN = 1e-5
CLIP_VALUE_MAX = 1e8
N_IMG_CHANNELS = 1 #3
MEL_BASIS = tf.signal.linear_to_mel_weight_matrix(
num_mel_bins=N_MEL_CHANNELS,
num_spectrogram_bins=N_FFT // 2 + 1,
sample_rate=SAMPLE_RATE,
lower_edge_hertz=MEL_FMIN,
upper_edge_hertz=MEL_FMAX)
# data
dataset_repetitions = 5
num_epochs = 1 # train for at least 50 epochs for good results
mel_spec_size = (128, 128)
# KID = Kernel Inception Distance, see related section
kid_image_size = 75
kid_diffusion_steps = 10
plot_diffusion_steps = 20
# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95
# architecture
embedding_dims = 32
embedding_max_frequency = 1000.0
widths = [32, 64, 96, 128]
block_depth = 2
# optimization
batch_size = 64
ema = 0.999
learning_rate = 2e-5
weight_decay = 1e-4
# New
#widths = [32, 64, 96, 128]
#block_depth = 2
# optimization
batch_size = 64
widths = [64, 128, 256, 512]
has_attention = [False, False, True, True]
block_depth = 4
batch_size = 16
duration_sample = 40960 #*2 if 256
duration_track = 480000