Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multistem #509

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,4 @@ dataset/*
*/notes
.vscode/
/notebooks
/local_scripts
/notes
/local_scripts
Binary file added assets/drum_loop.wav
Binary file not shown.
Binary file added assets/pop_song.wav
Binary file not shown.
6 changes: 6 additions & 0 deletions audiocraft/grids/musicgenstem/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""MusicGenStem grids."""
93 changes: 93 additions & 0 deletions audiocraft/grids/musicgenstem/_explorers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import typing as tp

import treetable as tt

from .._base_explorers import BaseExplorer


class LMExplorer(BaseExplorer):
eval_metrics: tp.List[str] = []

def stages(self) -> tp.List[str]:
return ['train', 'valid']

def get_grid_metrics(self):
"""Return the metrics that should be displayed in the tracking table."""
return [
tt.group(
'train',
[
tt.leaf('epoch'),
tt.leaf('duration', '.1f'), # duration in minutes
tt.leaf('ping'),
tt.leaf('ce', '.4f'), # cross entropy
tt.leaf("ppl", '.3f'), # perplexity
],
align='>',
),
tt.group(
'valid',
[
tt.leaf('ce', '.4f'),
tt.leaf('ppl', '.3f'),
tt.leaf('best_ppl', '.3f'),
],
align='>',
),
]

def process_sheep(self, sheep, history):
parts = super().process_sheep(sheep, history)

track_by = {'ppl': 'lower'} # values should be in ['lower', 'higher']
best_metrics = {k: (1 if v == 'lower' else -1) * float('inf') for k, v in track_by.items()}

def comparator(mode, a, b):
return a < b if mode == 'lower' else a > b

for metrics in history:
for key, sub in metrics.items():
for metric in track_by:
# for the validation set, keep track of best metrics (ppl in this example)
# this is so we can conveniently compare metrics between runs in the grid
if key == 'valid' and metric in sub and comparator(
track_by[metric], sub[metric], best_metrics[metric]
):
best_metrics[metric] = sub[metric]

if 'valid' in parts:
parts['valid'].update({f'best_{k}': v for k, v in best_metrics.items()})
return parts


class GenerationEvalExplorer(BaseExplorer):
eval_metrics: tp.List[str] = []

def stages(self) -> tp.List[str]:
return ['evaluate']

def get_grid_metrics(self):
"""Return the metrics that should be displayed in the tracking table."""
return [
tt.group(
'evaluate',
[
tt.leaf('epoch', '.3f'),
tt.leaf('duration', '.1f'),
tt.leaf('ping'),
tt.leaf('ce', '.4f'),
tt.leaf('ppl', '.3f'),
tt.leaf('fad', '.3f'),
tt.leaf('kld', '.3f'),
tt.leaf('text_consistency', '.3f'),
tt.leaf('chroma_cosine', '.3f'),
],
align='>',
),
]
37 changes: 37 additions & 0 deletions audiocraft/grids/musicgenstem/musicgenstem_base_32khz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from ._explorers import LMExplorer
from ...environment import AudioCraftEnvironment


@LMExplorer
def explorer(launcher):
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
launcher.slurm_(gpus=64, partition=partitions).bind_(label='64gpus')

# replace this by the desired music dataset
launcher.bind_(dset='internal/music_400k_32khz')

sub = launcher.bind_({'solver': 'musicgenstem/musicgenstem_base_32khz',
'autocast': False,
'fsdp.use': True,
'model/lm/model_scale': 'medium',
'optim.optimizer': 'adamw',
'optim.lr': 1e-4,
'generate.every': 25,
'dataset.generate.num_samples': 64,
})

sub = launcher.bind()

sub({'transformer_lm.n_q': 6,
'codebooks_pattern.delay.delays': [0, 0, 0, 1, 2, 3],})


sub({'transformer_lm.n_q': 7,
'codebooks_pattern.delay.delays': [0, 1, 0, 0, 1, 2, 3],
'multistem_compression_model_checkpoints.pretrained': 'facebook/encodec_32_khz_bass_2_drums_1_other_4'
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

"""
Evaluation with objective metrics for the pretrained MusicGen models.
This grid takes signature from the training grid and runs evaluation-only stage.

When running the grid for the first time, please use:
REGEN=1 dora grid musicgen.musicgen_pretrained_32khz_eval
and re-use the REGEN=1 option when the grid is changed to force regenerating it.

Note that you need the proper metrics external libraries setup to use all
the objective metrics activated in this grid. Refer to the README for more information.
"""

import os

from ._explorers import GenerationEvalExplorer
from ...environment import AudioCraftEnvironment
from ... import train


def eval(launcher, batch_size: int = 32, eval_melody: bool = False):
opts = {
'dset': 'audio/musiccaps_32khz',
'solver/musicgen/evaluation': 'objective_eval',
'execute_only': 'evaluate',
'+dataset.evaluate.batch_size': batch_size,
'+metrics.fad.tf.batch_size': 16,
}
# binary for FAD computation: replace this path with your own path
metrics_opts = {
'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
}
opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
opt2 = {'transformer_lm.two_step_cfg': True}

sub = launcher.bind(opts)
sub.bind_(metrics_opts)

# base objective metrics
sub(opt1, opt2)


@GenerationEvalExplorer
def explorer(launcher):
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
launcher.slurm_(gpus=4, partition=partitions)

if 'REGEN' not in os.environ:
folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
with launcher.job_array():
for sig in folder.iterdir():
if not sig.is_symlink():
continue
xp = train.main.get_xp_from_sig(sig.name)
launcher(xp.argv)
return

with launcher.job_array():
musicgen_base = launcher.bind(solver="musicgen/musicgen_base_32khz")
musicgen_base.bind_({'autocast': False, 'fsdp.use': True})

# base musicgen models
musicgen_base_small = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-small'})
eval(musicgen_base_small, batch_size=128)

musicgen_base_medium = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-medium'})
musicgen_base_medium.bind_({'model/lm/model_scale': 'medium'})
eval(musicgen_base_medium, batch_size=128)

musicgen_base_large = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-large'})
musicgen_base_large.bind_({'model/lm/model_scale': 'large'})
eval(musicgen_base_large, batch_size=128)

# melody musicgen model
musicgen_melody = launcher.bind(solver="musicgen/musicgen_melody_32khz")
musicgen_melody.bind_({'autocast': False, 'fsdp.use': True})

musicgen_melody_medium = musicgen_melody.bind({'continue_from': '//pretrained/facebook/musicgen-melody'})
musicgen_melody_medium.bind_({'model/lm/model_scale': 'medium'})
eval(musicgen_melody_medium, batch_size=128, eval_melody=True)
3 changes: 2 additions & 1 deletion audiocraft/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from . import builders, loaders
from .encodec import (
CompressionModel, EncodecModel, DAC,
HFEncodecModel, HFEncodecCompressionModel)
HFEncodecModel, HFEncodecCompressionModel, MultiStemCompressionModel)
from .audiogen import AudioGen
from .lm import LMModel
from .lm_magnet import MagnetLMModel
Expand All @@ -19,3 +19,4 @@
from .magnet import MAGNeT
from .unet import DiffusionUnet
from .watermark import WMModel
from .musicgenstem import MusicGenStem
22 changes: 17 additions & 5 deletions audiocraft/models/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
from ..modules.conditioners import (BaseConditioner, ChromaStemConditioner,
CLAPEmbeddingConditioner, ConditionFuser,
ConditioningProvider, LUTConditioner,
T5Conditioner, StyleConditioner)
T5Conditioner, StyleConditioner, MultiStemStyleConditioner)
from ..modules.diffusion_schedule import MultiBandProcessor, SampleProcessor
from ..utils.utils import dict_from_config
from .encodec import (CompressionModel, EncodecModel,
InterleaveStereoCompressionModel)
InterleaveStereoCompressionModel, MultiStemCompressionModel)
from .lm import LMModel
from .lm_magnet import MagnetLMModel
from .unet import DiffusionUnet
Expand All @@ -40,9 +40,11 @@
def get_quantizer(
quantizer: str, cfg: omegaconf.DictConfig, dimension: int
) -> qt.BaseQuantizer:
klass = {"no_quant": qt.DummyQuantizer, "rvq": qt.ResidualVectorQuantizer}[
quantizer
]
klass = {
'no_quant': qt.DummyQuantizer,
'rvq': qt.ResidualVectorQuantizer,
'dac': qt.DACResidualVectorQuantizer,
}[quantizer]
kwargs = dict_from_config(getattr(cfg, quantizer))
if quantizer != "no_quant":
kwargs["dimension"] = dimension
Expand All @@ -52,6 +54,9 @@ def get_quantizer(
def get_encodec_autoencoder(encoder_name: str, cfg: omegaconf.DictConfig):
if encoder_name == "seanet":
kwargs = dict_from_config(getattr(cfg, "seanet"))
# deprecated params
kwargs.pop("encoder_transformer", None)
kwargs.pop("decoder_transformer", None)
encoder_override_kwargs = kwargs.pop("encoder")
decoder_override_kwargs = kwargs.pop("decoder")
encoder_kwargs = {**kwargs, **encoder_override_kwargs}
Expand Down Expand Up @@ -167,6 +172,13 @@ def get_conditioner_provider(
device=device,
**model_args
)
elif model_type == 'multistem_style':
conditioners[str(cond)] = MultiStemStyleConditioner(
output_dim=output_dim,
device=device,
**model_args
)

else:
raise ValueError(f"Unrecognized conditioning model: {model_type}")
conditioner = ConditioningProvider(
Expand Down
Loading
Loading