Merge branch 'main' of github.com:facebookresearch/demucs

jlami · Sep 7, 2023 · ef66d25 · ef66d25
2 parents 88af672 + 404690c
commit ef66d25
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -159,7 +159,7 @@ but it will allow you to use Demucs without installing anything.
 
 ### Web Demo
 
-Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/demucs)
+Integrated to [Hugging Face Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/demucs)
 
 ### Graphical Interface
 
@@ -170,8 +170,11 @@ Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](
 ### Other providers
 
 Audiostrip is providing free online separation with Demucs on their website [https://audiostrip.co.uk/](https://audiostrip.co.uk/).
+
 [MVSep](https://mvsep.com/) also provides free online separation, select `Demucs3 model B` for the best quality.
 
+[Neutone](https://neutone.space/) provides a realtime Demucs model in their free VST/AU plugin that can be used in your favorite DAW.
+
 
 ## Separating tracks
 
@@ -181,11 +184,12 @@ In order to try Demucs, you can just run from any folder (as long as you properl
 demucs PATH_TO_AUDIO_FILE_1 [PATH_TO_AUDIO_FILE_2 ...]   # for Demucs
 # If you used `pip install --user` you might need to replace demucs with python3 -m demucs
 python3 -m demucs --mp3 --mp3-bitrate BITRATE PATH_TO_AUDIO_FILE_1  # output files saved as MP3
+        # use --mp3-preset to change encoder preset, 2 for best quality, 7 for fastest
 # If your filename contain spaces don't forget to quote it !!!
 demucs "my music/my favorite track.mp3"
 # You can select different models with `-n` mdx_q is the quantized model, smaller but maybe a bit less accurate.
 demucs -n mdx_q myfile.mp3
-# If you only want to separate vocals out of an audio, use `--two-stems=vocal` (You can also set to drums or bass)
+# If you only want to separate vocals out of an audio, use `--two-stems=vocals` (You can also set to drums or bass)
 demucs --two-stems=vocals myfile.mp3
 ```
 

diff --git a/demucs/audio.py b/demucs/audio.py
@@ -196,15 +196,15 @@ def as_dtype_pcm(wav, dtype):
         return i16_pcm(wav)
 
 
-def encode_mp3(wav, path, samplerate=44100, bitrate=320, verbose=False):
+def encode_mp3(wav, path, samplerate=44100, bitrate=320, quality=2, verbose=False):
     """Save given audio as mp3. This should work on all OSes."""
     C, T = wav.shape
     wav = i16_pcm(wav)
     encoder = lameenc.Encoder()
     encoder.set_bit_rate(bitrate)
     encoder.set_in_sample_rate(samplerate)
     encoder.set_channels(C)
-    encoder.set_quality(2)  # 2-highest, 7-fastest
+    encoder.set_quality(quality)  # 2-highest, 7-fastest
     if not verbose:
         encoder.silence()
     wav = wav.data.cpu()
@@ -239,16 +239,18 @@ def save_audio(wav: torch.Tensor,
                bitrate: int = 320,
                clip: tp.Literal["rescale", "clamp", "tanh", "none"] = 'rescale',
                bits_per_sample: tp.Literal[16, 24, 32] = 16,
-               as_float: bool = False):
+               as_float: bool = False,
+               preset: tp.Literal[2, 3, 4, 5, 6, 7] = 2):
     """Save audio file, automatically preventing clipping if necessary
     based on the given `clip` strategy. If the path ends in `.mp3`, this
-    will save as mp3 with the given `bitrate`.
+    will save as mp3 with the given `bitrate`. Use `preset` to set mp3 quality:
+    2 for highest quality, 7 for fastest speed
     """
     wav = prevent_clip(wav, mode=clip)
     path = Path(path)
     suffix = path.suffix.lower()
     if suffix == ".mp3":
-        encode_mp3(wav, path, samplerate, bitrate, verbose=True)
+        encode_mp3(wav, path, samplerate, bitrate, preset, verbose=True)
     elif suffix == ".wav":
         if as_float:
             bits_per_sample = 32

diff --git a/demucs/separate.py b/demucs/separate.py
@@ -113,6 +113,9 @@ def get_parser():
                         default=320,
                         type=int,
                         help="Bitrate of converted mp3.")
+    parser.add_argument("--mp3-preset", choices=range(2, 8), type=int, default=2,
+                        help="Encoder preset of MP3, 2 for highest quality, 7 for "
+                        "fastest speed. Default is 2")
     parser.add_argument("-j", "--jobs",
                         default=0,
                         type=int,
@@ -182,6 +185,7 @@ def main(opts=None):
         kwargs = {
             'samplerate': model.samplerate,
             'bitrate': args.mp3_bitrate,
+            'preset': args.mp3_preset,
             'clip': args.clip_mode,
             'as_float': args.float32,
             'bits_per_sample': 24 if args.int24 else 16,

diff --git a/docs/release.md b/docs/release.md
@@ -15,6 +15,8 @@ Will use CPU for complex numbers, when using MPS device (all other computations
 
 Optimize codes to save memory
 
+Allow changing preset of MP3
+
 ## V4.0.0, 7th of December 2022
 
 Adding hybrid transformer Demucs model.