Skip to content

Commit

Permalink
Fixing bugs and adding features. lol.
Browse files Browse the repository at this point in the history
  • Loading branch information
brannondorsey committed May 16, 2017
1 parent 455e3e5 commit 10e9de3
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 44 deletions.
12 changes: 7 additions & 5 deletions midai/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ def defaults():
args = dict()

# MISC ---------------------------------------------------------------------
args['midai_root'] = os.path.join(os.path.dirname(__file__), '..')
args['mode'] = 'develop' # production
args['midai_root'] = os.path.join(os.path.dirname(__file__), '..')
args['mode'] = 'develop' # production
args['num_threads'] = 8

# MODEL --------------------------------------------------------------------
args['model'] = 'TimeSeqModel'
args['tasks'] = ['train', 'generate']
args['tasks'] = ['train', 'generate'] # ['train', 'generate']
args['load_search_path'] = os.path.join(args['midai_root'], 'trained_models', args['mode'], args['model'])
args['load'] = 'recent' # best, recent, path
args['load'] = None # None, 'best', 'recent', path
args['data_dir'] = os.path.join(args['midai_root'], 'data', 'collections', '2')

# TRAIN --------------------------------------------------------------------
Expand All @@ -25,7 +26,7 @@ def defaults():
args['note_representation'] = 'absolute' # relative
args['window_size'] = 20
args['batch_size'] = 32
args['num_epochs'] = 30
args['num_epochs'] = 10
args['use_generator'] = True
args['val_split'] = 0.2
args['glove_dimension'] = 25
Expand Down Expand Up @@ -55,6 +56,7 @@ def defaults():
# GENERATE -----------------------------------------------------------------
args['num_files_to_generate'] = 10
args['generated_file_length'] = 500
args['seed'] = os.path.join(args['midai_root'], 'data', 'seeds', '001.mid')

return args

Expand Down
50 changes: 34 additions & 16 deletions midai/data/input.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os, pdb
import os, pudb
import numpy as np
from midai.utils import clamp, map_range, log
from midai.data.utils import parse_midi, filter_monophonic
from midai.data.utils import parse_midi, filter_monophonic, split_data
from multiprocessing import Pool as ThreadPool

#TODO support model_class param w/ vals 'time-sequence' and 'event'
Expand All @@ -15,7 +15,24 @@ def from_midi(midi_paths=None,
shuffle=False,
num_threads=1,
glove_dimension=10):
pass
if num_threads > 1:
pool = ThreadPool(num_threads)
parsed = pool.map(parse_midi, midi_paths)
else:
parsed = list(map(parse_midi, midi_paths))

data = _windows_from_monophonic_instruments(parsed, window_size,
note_representation, encoding,
glove_dimension)

# convert data from (X0-n, y0-n) to ((X0, y0), (X1, y1), ...) format
data = list(zip(data[0], data[1]))

if shuffle:
data = np.random.permutation(data)

train, val = split_data(data, val_split)
return np.asarray(list(zip(*train))).tolist(), np.asarray(list(zip(*val))).tolist()

def from_midi_generator(midi_paths=None,
raw_midi=None,
Expand All @@ -28,17 +45,16 @@ def from_midi_generator(midi_paths=None,
num_threads=1,
glove_dimension=10):

val_split_index = int(float(len(midi_paths)) * val_split)
train_paths = midi_paths[0:val_split_index]
val_paths = midi_paths[val_split_index:]
train_paths, val_paths = split_data(midi_paths, val_split)
pudb.set_trace()

train_gen = _get_data_generator(midi_paths, raw_midi, note_representation,
train_gen = _get_data_generator(train_paths, note_representation,
encoding, window_size, batch_size,
val_split, shuffle, num_threads, glove_dimension)
shuffle, num_threads, glove_dimension)

val_gen = _get_data_generator(midi_paths, raw_midi, note_representation,
val_gen = _get_data_generator(val_paths, note_representation,
encoding, window_size, batch_size,
val_split, shuffle, num_threads, glove_dimension)
shuffle, num_threads, glove_dimension)
return train_gen, val_gen

def one_hot_2_glove_embedding(X):
Expand All @@ -60,7 +76,6 @@ def one_hot_2_glove_embedding(X):
buf.append(_glove_embeddings[index])
return buf

_glove_embeddings = None
def load_glove_embeddings(dim, glove_path):
# skip if done
if _glove_embeddings:
Expand Down Expand Up @@ -90,15 +105,13 @@ def load_glove_embeddings(dim, glove_path):
_glove_embeddings[i] = np.delete(_glove_embeddings[i], 0)

log('loaded GloVe vector embeddings with dimension: {}'.format(dim), 'VERBOSE')

_glove_embeddings = None

def _get_data_generator(midi_paths,
raw_midi,
note_representation,
encoding,
window_size,
batch_size,
val_split,
shuffle,
num_threads,
glove_dimension):
Expand Down Expand Up @@ -160,6 +173,8 @@ def _windows_from_monophonic_instruments(midi,
if m is not None:
melody_instruments = filter_monophonic(m.instruments, 1.0)
for instrument in melody_instruments:
# WARNING: This is an event model style check but it is also
# currently being applied to the time sequence model.
if len(instrument.notes) > window_size:
windows = _encode_windows(instrument,
window_size,
Expand All @@ -168,6 +183,9 @@ def _windows_from_monophonic_instruments(midi,
for w in windows:
X.append(w[0])
y.append(w[1])
else:
# log('Fewer notes than window_size permits, skipping instrument', 'WARNING')
pass
return [np.asarray(X), np.asarray(y)]

def _encode_windows(pm_instrument, window_size, note_representation, encoding, glove_dimension):
Expand Down Expand Up @@ -196,7 +214,7 @@ def _encode_windows(pm_instrument, window_size, note_representation, encoding, g
# expects pm_instrument to be monophonic.
def _encode_window_absolute_one_hot(pm_instrument, window_size):

roll = np.copy(pm_instrument.get_piano_roll(fs=4).T)
roll = np.copy(pm_instrument.get_piano_roll(fs=16).T)

# trim beginning silence
summed = np.sum(roll, axis=1)
Expand All @@ -223,7 +241,7 @@ def _encode_window_absolute_one_hot(pm_instrument, window_size):

def _encode_window_relative_one_hot(pm_instrument, window_size):

roll = np.copy(pm_instrument.get_piano_roll(fs=4).T)
roll = np.copy(pm_instrument.get_piano_roll(fs=16).T)

# trim beginning silence
summed = np.sum(roll, axis=1)
Expand Down
6 changes: 6 additions & 0 deletions midai/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ def save_midi(pm_midis, folder):
file = os.path.join(folder, '{}.mid'.format(str(_max + i + 1).rjust(4, '0')))
midi.write(file)
log('saved {} to disk'.format(file), 'VERBOSE')

def split_data(data, split):
split_index = int(float(len(data)) * (1.0 - split))
train = data[0:split_index]
val = data[split_index:]
return train, val
56 changes: 42 additions & 14 deletions midai/models/TimeSeqModel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, pdb
import os, pudb
import midai
from midai.models.base import KerasRNNModel
from midai.utils import log
Expand All @@ -21,7 +21,9 @@ def get_model(args):
else:
model.load(args['load'], best=True, recent=False)
else: # create a new model
model.create_experiment_dir(midai_root=args['midai_root'])
model.create_experiment_dir(args['note_representation'],
args['data_encoding'],
midai_root=args['midai_root'])

if not model.ready:
model.architecture(args['architecture'])
Expand All @@ -33,23 +35,51 @@ def get_model(args):
def get_data(args):

midi_paths = midai.data.utils.get_midi_paths(args['data_dir'])

kwargs = {
"midi_paths": midi_paths,
"note_representation": args['note_representation'],
"encoding": args['data_encoding'],
"window_size": args['window_size'],
"val_split": args['val_split'],
"glove_dimension": args['glove_dimension'],
"num_threads": args['num_threads']
}

if args['use_generator']:
_train, _val = \
midai.data.input.from_midi_generator(midi_paths=midi_paths,
note_representation=args['note_representation'],
encoding=args['data_encoding'],
window_size=args['window_size'],
val_split=args['val_split'],
glove_dimension=args['glove_dimension'])
_train, _val = midai.data.input.from_midi_generator(**kwargs)
else:
_train, _val = midai.data.input.from_midi(**kwargs)
return (_train, _val), midi_paths


def train(args, model, data, num_midi_files):
model.train(num_midi_files=num_midi_files, train_gen=data[0], val_gen=data[1],
batch_size=args['batch_size'], num_epochs=args['num_epochs'])

kwargs = dict()
kwargs['num_midi_files'] = num_midi_files
kwargs['num_epochs'] = args['num_epochs']
kwargs['batch_size'] = args['batch_size']

if args['use_generator']:
kwargs['train_gen'] = data[0]
kwargs['val_gen'] = data[1]
else:
kwargs['train_data'] = data[0]
kwargs['val_data'] = data[1]
model.train(**kwargs)

def generate(args, model, data):
X, _ = next(data[1])
if args['seed']:
_, val = midai.data.input.from_midi(midi_paths=[args['seed']],
note_representation=args['note_representation'],
encoding=args['data_encoding'],
window_size=args['window_size'],
val_split=args['val_split'],
glove_dimension=args['glove_dimension'])
X = val[0] # use only the first window
else:
X, _ = next(data[1])

output = model.generate(X, args['window_size'],
args['generated_file_length'],
args['num_files_to_generate'],
Expand All @@ -59,8 +89,6 @@ def generate(args, model, data):

def run(args):

args['tasks'] = ['generate']

model = get_model(args)
data, paths = get_data(args)

Expand Down
9 changes: 5 additions & 4 deletions midai/models/base/KerasModel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, glob, time, random, copy, pdb
import os, glob, time, random, copy, pudb
import numpy as np
import midai.data as data
from midai.models.base import Model
Expand Down Expand Up @@ -182,10 +182,11 @@ def _train_model(model, callbacks):
kwargs['validation_steps'] = num_midi_files * 0.2 * magic_number / batch_size
history = model.fit_generator(**kwargs)
else:
kwargs['x'] = train_data
kwargs['y'] = val_data
kwargs['x'] = train_data[0]
kwargs['y'] = train_data[1]
kwargs['validation_data'] = val_data
kwargs['batch_size'] = batch_size
kwargs['shuffle'] = False
pudb.set_trace()
history = model.fit(**kwargs)

log('Finished training model in {:.2f} seconds'.format(time.time() - start_time), 'NOTICE')
Expand Down
16 changes: 11 additions & 5 deletions midai/models/base/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@ def init(self):

# creates an experiment directory structure and returns the name
# of the created directory
def create_experiment_dir(self, experiment_dir=None, midai_root=None, mode='develop'):
def create_experiment_dir(self,
note_representation,
encoding,
experiment_dir=None,
midai_root=None,
mode='develop'):
log('creating experiment directory', 'VERBOSE')
if not experiment_dir:

if not midai_root:
raise Exception('midai_root not set and experiment_dir'\
' not provided as an argument')

path = [midai_root, 'trained_models', mode, self.name]
path = [midai_root, 'trained_models', mode, self.name,
'{}_{}'.format(note_representation, encoding)]
parent_dir = os.path.join(*path)

if not os.path.exists(parent_dir):
Expand Down Expand Up @@ -77,17 +83,17 @@ def train(self, train_data=None, val_data=None, train_gen=None, val_gen=None):
if not self.ready:
raise Exception('train called before model ready is True')

if not train_data and not val_data:
if train_data is None and val_data is None:
if not train_gen or not val_gen:
raise Exception('If train_data and val_data are omitted '\
'then you must provide train_gen and val_gen')

if not train_gen and not val_gen:
if not train_data or not val_data:
if train_data is None or val_data is None:
raise Exception('If train_gen and val_gen are omitted '\
'then you must provide train_data and val_data')

if train_data and val_data and train_gen and val_gen:
if train_data is not None and val_data is not None and train_gen and val_gen:
raise Exception('You cannot use both data and generators for training')

def evaluate(self):
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.

0 comments on commit 10e9de3

Please sign in to comment.