-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocessing.py
93 lines (64 loc) · 3.31 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from tensorflow.keras import layers as tfkl
from dsp_utils.core import resample, midi_to_hz, hz_to_midi
from ddsp.spectral_ops import F0_RANGE
from ddsp.spectral_ops import LD_RANGE
from utilities import at_least_3d
# TODO: downsampling methods for both inputs
class F0LoudnessPreprocessor(tfkl.Layer):
"""Resamples and scales 'f0_hz' and 'loudness_db' features. Used in the Supervised Setting."""
def __init__(self, timesteps=250, **kwargs):
super().__init__(**kwargs)
self.timesteps = timesteps
def call(self, inputs):
# Downsample features, but do not update them in the dict
f0_hz= self.resample(inputs["f0_hz"])
loudness_db = self.resample(inputs["loudness_db"])
# For NN training, scale frequency and loudness to the range [0, 1].
# Log-scale f0 features. Loudness from [-1, 0] to [1, 0].
f0_midi_scaled = hz_to_midi(f0_hz) / F0_RANGE # in the original library it is called f0_scaled
ld_scaled = (loudness_db / LD_RANGE) + 1.0
return {"f0_hz": at_least_3d(inputs["f0_hz"]), # convert both to 3d here
"loudness_db": at_least_3d(inputs['loudness_db']),
"f0_scaled": f0_midi_scaled, # f0_scaled, ld_scaled used
"ld_scaled": ld_scaled} # in the decoder in this form
def resample(self, x):
x = at_least_3d(x)
return resample(x, self.timesteps, method="linear")
# TODO: fix the Encoder_f
# Downsample the f and l using the F0LoudnessPreprocessor
# Remove this class
class LoudnessPreprocessor(tfkl.Layer):
def __init__(self, timesteps=250, **kwargs):
super().__init__(**kwargs)
self.timesteps = timesteps
def call(self, inputs):
loudness_db = inputs["loudness_db"]
# Resample features to time_steps.
loudness_db = self.resample(loudness_db)
# For NN training, scale frequency and loudness to the range [0, 1].
ld_scaled = (loudness_db / LD_RANGE) + 1.0
return {"ld_scaled":ld_scaled}
def resample(self, x):
x = at_least_3d(x)
return resample(x, self.timesteps, method="linear")
# TODO: delete??
class MidiF0LoudnessPreprocessor(tfkl.Layer):
"""Scales the loudness, converts scaled midi to hz and resamples. Used in the Unsupervised setting."""
def __init__(self, timesteps=1000, **kwargs):
super().__init__(**kwargs)
self.timesteps = timesteps
def call(self, inputs):
loudness_db, f0_scaled = inputs["loudness_db"], inputs["f0_scaled"]
# Resample features to time_steps.
f0_scaled = resample(f0_scaled, self.timesteps)
loudness_db = resample(loudness_db, self.timesteps)
# For NN training, scale frequency and loudness to the range [0, 1].
ld_scaled = (loudness_db / LD_RANGE) + 1.0
# ???????????????????????????
# Convert scaled midi to hz for the synthesizer
f0_hz = midi_to_hz(f0_scaled*F0_RANGE)
f0_hz = resample(at_least_3d(f0_hz), 1000)
return {"f0_hz":f0_hz, "loudness_db":loudness_db, "f0_scaled":f0_scaled, "ld_scaled":ld_scaled}
def resample(self, x):
x = at_least_3d(x)
return resample(x, self.timesteps, method="linear")