From f774e039e34d382f4b49911b4d4aea71a4f2dc49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Thu, 16 Nov 2023 13:24:24 +0100 Subject: [PATCH 1/6] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a93e294d..78172dc2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ # Demucs Music Source Separation -[![Support Ukraine](https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB)](https://opensource.fb.com/support-ukraine) ![tests badge](https://github.com/facebookresearch/demucs/workflows/tests/badge.svg) ![linter badge](https://github.com/facebookresearch/demucs/workflows/linter/badge.svg) +**Important:** This project **is not actively maintained anymore** and only important bug fixes will be processed on the new repo. **Please do not open issues for feature request or if Demucs doesn't work perfectly for your use case :)** + + This is the 4th release of Demucs (v4), featuring Hybrid Transformer based source separation. **For the classic Hybrid Demucs (v3):** [Go this commit][demucs_v3]. If you are experiencing issues and want the old Demucs back, please fill an issue, and then you can get back to the v3 with From 6a5cefa75cc97e970c54116938510a3de3192794 Mon Sep 17 00:00:00 2001 From: Eli Dourado Date: Sun, 3 Dec 2023 13:42:44 -0500 Subject: [PATCH 2/6] Use mps by default if available Use Apple Silicon's Metal Performance Shaders if available. This can already be done using "-d mps" but changing this configuration makes it available by default. In my testing this makes source separation about 8x faster on an M2 MBP. --- demucs/separate.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/demucs/separate.py b/demucs/separate.py index d5102ede..7de5f114 100644 --- a/demucs/separate.py +++ b/demucs/separate.py @@ -41,7 +41,13 @@ def get_parser(): 'Default is "{track}/{stem}.{ext}".') parser.add_argument("-d", "--device", - default="cuda" if th.cuda.is_available() else "cpu", + default=( + "cuda" + if th.cuda.is_available() + else "mps" + if th.backends.mps.is_available() + else "cpu" + ), help="Device to use, default is cuda if available else cpu") parser.add_argument("--shifts", default=1, From 209f3a137cc224401fac4b0ef8a608b799701036 Mon Sep 17 00:00:00 2001 From: Weiqi Gao Date: Fri, 22 Dec 2023 00:42:05 +0800 Subject: [PATCH 3/6] Merge latest updates from 4.1.0a2 (#2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update README.md * Update README.md * Update README.md * Update README.md * minor fixes for 4.1.0a1 (#552) * minor fixes for 4.1.0a1 print out the exception when calling callback ensures all threads can be stopped when interrupting separation add release data for 4.0.1 * Fix model_idx_in_bag always zero * fix linter * Fix can't separate empty audio * Calls callback when skipping empty audio * Add description for aborting * Does not ignore callback exception * Fix linter * Does not ignore exception * Disable torchaudio 2.2+ * Uses epsilon to deal with empty audio * Reraises exception in callback * Ensure the pool stops when encountering exception * Update windows.md for latest instructions * Minor documentation updates (#565) * Minor documentation updates * Update readme * Update api.md * Fix segment defined in bag can't override model * merge from adefossez/demucs * Update README.md --------- Co-authored-by: Alexandre Défossez Co-authored-by: William Dye --- .gitignore | 2 +- README.md | 24 +++++++++--------- demucs/__init__.py | 2 +- demucs/api.py | 8 +++--- demucs/apply.py | 53 ++++++++++++++-------------------------- demucs/utils.py | 3 +++ docs/api.md | 6 ++--- docs/mac.md | 12 ++++----- docs/release.md | 2 +- docs/windows.md | 29 ++++++++++++++-------- requirements.txt | 2 +- requirements_minimal.txt | 2 +- 12 files changed, 71 insertions(+), 74 deletions(-) diff --git a/.gitignore b/.gitignore index 6f73669b..179cf0dd 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ Session.vim /trash /misc /mdx -.mypy_cache \ No newline at end of file +.mypy_cache diff --git a/README.md b/README.md index 78172dc2..fdb906df 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This is the 4th release of Demucs (v4), featuring Hybrid Transformer based source separation. **For the classic Hybrid Demucs (v3):** [Go this commit][demucs_v3]. -If you are experiencing issues and want the old Demucs back, please fill an issue, and then you can get back to the v3 with +If you are experiencing issues and want the old Demucs back, please file an issue, and then you can get back to Demucs v3 with `git checkout v3`. You can also go [Demucs v2][demucs_v2]. @@ -17,7 +17,7 @@ Demucs is a state-of-the-art music source separation model, currently capable of drums, bass, and vocals from the rest of the accompaniment. Demucs is based on a U-Net convolutional architecture inspired by [Wave-U-Net][waveunet]. The v4 version features [Hybrid Transformer Demucs][htdemucs], a hybrid spectrogram/waveform separation model using Transformers. -It is based on [Hybrid Demucs][hybrid_paper] (also provided in this repo) with the innermost layers are +It is based on [Hybrid Demucs][hybrid_paper] (also provided in this repo), with the innermost layers replaced by a cross-domain Transformer Encoder. This Transformer uses self-attention within each domain, and cross-attention across domains. The model achieves a SDR of 9.00 dB on the MUSDB HQ test set. Moreover, when using sparse attention @@ -125,7 +125,7 @@ python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=dem Advanced OS support are provided on the following page, **you must read the page for your OS before posting an issues**: - **If you are using Windows:** [Windows support](docs/windows.md). -- **If you are using MAC OS X:** [Mac OS X support](docs/mac.md). +- **If you are using macOS:** [macOS support](docs/mac.md). - **If you are using Linux:** [Linux support](docs/linux.md). ### For machine learning scientists @@ -141,7 +141,7 @@ pip install -e . This will create a `demucs` environment with all the dependencies installed. -You will also need to install [soundstretch/soundtouch](https://www.surina.net/soundtouch/soundstretch.html): on Mac OSX you can do `brew install sound-touch`, +You will also need to install [soundstretch/soundtouch](https://www.surina.net/soundtouch/soundstretch.html): on macOS you can do `brew install sound-touch`, and on Ubuntu `sudo apt-get install soundstretch`. This is used for the pitch/tempo augmentation. @@ -196,16 +196,18 @@ demucs --two-stems=vocals myfile.mp3 ``` -If you have a GPU, but you run out of memory, please use `--segment SEGMENT` to reduce length of each split. `SEGMENT` should be changed to a integer. Personally recommend not less than 10 (the bigger the number is, the more memory is required, but quality may increase). Create an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` is also helpful. If this still cannot help, please add `-d cpu` to the command line. See the section hereafter for more details on the memory requirements for GPU acceleration. +If you have a GPU, but you run out of memory, please use `--segment SEGMENT` to reduce length of each split. `SEGMENT` should be changed to a integer describing the length of each segment in seconds. +A segment length of at least 10 is recommended (the bigger the number is, the more memory is required, but quality may increase). Note that the Hybrid Transformer models only support a maximum segment length of 7.8 seconds. +Creating an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` is also helpful. If this still does not help, please add `-d cpu` to the command line. See the section hereafter for more details on the memory requirements for GPU acceleration. Separated tracks are stored in the `separated/MODEL_NAME/TRACK_NAME` folder. There you will find four stereo wav files sampled at 44.1 kHz: `drums.wav`, `bass.wav`, `other.wav`, `vocals.wav` (or `.mp3` if you used the `--mp3` option). -All audio formats supported by `torchaudio` can be processed (i.e. wav, mp3, flac, ogg/vorbis on Linux/Mac OS X etc.). On Windows, `torchaudio` has limited support, so we rely on `ffmpeg`, which should support pretty much anything. +All audio formats supported by `torchaudio` can be processed (i.e. wav, mp3, flac, ogg/vorbis on Linux/macOS, etc.). On Windows, `torchaudio` has limited support, so we rely on `ffmpeg`, which should support pretty much anything. Audio is resampled on the fly if necessary. -The output will be a wave file encoded as int16. +The output will be a wav file encoded as int16. You can save as float32 wav files with `--float32`, or 24 bits integer wav with `--int24`. -You can pass `--mp3` to save as mp3 instead, and set the bitrate with `--mp3-bitrate` (default is 320kbps). +You can pass `--mp3` to save as mp3 instead, and set the bitrate (in kbps) with `--mp3-bitrate` (default is 320). It can happen that the output would need clipping, in particular due to some separation artifacts. Demucs will automatically rescale each output stem so as to avoid clipping. This can however break @@ -228,8 +230,8 @@ The list of pre-trained models is: but quality can be slightly worse. - `SIG`: where `SIG` is a single model from the [model zoo](docs/training.md#model-zoo). -The `--two-stems=vocals` option allows to separate vocals from the rest (e.g. karaoke mode). -`vocals` can be changed into any source in the selected model. +The `--two-stems=vocals` option allows separating vocals from the rest of the accompaniment (i.e., karaoke mode). +`vocals` can be changed to any source in the selected model. This will mix the files after separating the mix fully, so this won't be faster or use less memory. The `--shifts=SHIFTS` performs multiple predictions with random shifts (a.k.a the *shift trick*) of the input and average them. This makes prediction `SHIFTS` times @@ -250,7 +252,7 @@ If you do not have enough memory on your GPU, simply add `-d cpu` to the command ## Calling from another Python program -The main function provides a `opt` parameter as a simple API. You can just pass the parsed command line as this parameter: +The main function provides an `opt` parameter as a simple API. You can just pass the parsed command line as this parameter: ```python # Assume that your command is `demucs --mp3 --two-stems vocals -n mdx_extra "track with space.mp3"` # The following codes are same as the command above: diff --git a/demucs/__init__.py b/demucs/__init__.py index ef5cd6f3..e02c0ada 100644 --- a/demucs/__init__.py +++ b/demucs/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -__version__ = "4.1.0a1" +__version__ = "4.1.0a2" diff --git a/demucs/api.py b/demucs/api.py index fc254fb2..20079a6b 100644 --- a/demucs/api.py +++ b/demucs/api.py @@ -195,7 +195,7 @@ def update_parameter( self._jobs = jobs if not isinstance(progress, _NotProvided): self._progress = progress - if not isinstance(callback, _NotProvided) and (callback is None or callable(callback)): + if not isinstance(callback, _NotProvided): self._callback = callback if not isinstance(callback_arg, _NotProvided): self._callback_arg = callback_arg @@ -266,7 +266,7 @@ def separate_tensor( wav = convert_audio(wav, sr, self._samplerate, self._audio_channels) ref = wav.mean(0) wav -= ref.mean() - wav /= ref.std() + wav /= ref.std() + 1e-8 out = apply_model( self._model, wav[None], @@ -284,9 +284,9 @@ def separate_tensor( ) if out is None: raise KeyboardInterrupt - out *= ref.std() + out *= ref.std() + 1e-8 out += ref.mean() - wav *= ref.std() + wav *= ref.std() + 1e-8 wav += ref.mean() return (wav, dict(zip(self._model.sources, out[0]))) diff --git a/demucs/apply.py b/demucs/apply.py index 180db7fe..c84993de 100644 --- a/demucs/apply.py +++ b/demucs/apply.py @@ -51,7 +51,7 @@ def __init__(self, models: tp.List[Model], assert other.samplerate == first.samplerate assert other.audio_channels == first.audio_channels if segment is not None: - if not isinstance(other, HTDemucs) and segment > other.segment: + if not isinstance(other, HTDemucs) or segment <= other.segment: other.segment = segment self.audio_channels = first.audio_channels @@ -150,7 +150,7 @@ def apply_model(model: tp.Union[BagOfModels, Model], num_workers: int = 0, segment: tp.Optional[float] = None, pool=None, lock=None, callback: tp.Optional[tp.Callable[[dict], None]] = None, - callback_arg: tp.Optional[dict] = None) -> tp.Optional[th.Tensor]: + callback_arg: tp.Optional[dict] = None) -> th.Tensor: """ Apply model to a given mixture. @@ -197,7 +197,7 @@ def apply_model(model: tp.Union[BagOfModels, Model], 'lock': lock, } out: tp.Union[float, th.Tensor] - res: tp.Union[float, th.Tensor, None] + res: tp.Union[float, th.Tensor] if isinstance(model, BagOfModels): # Special treatment for bag of model. # We explicitely apply multiple times `apply_model` so that the random shifts @@ -205,22 +205,15 @@ def apply_model(model: tp.Union[BagOfModels, Model], estimates: tp.Union[float, th.Tensor] = 0. totals = [0.] * len(model.sources) callback_arg["models"] = len(model.models) - kwargs["callback"] = ( - ( - lambda d, i=callback_arg["model_idx_in_bag"]: callback( - _replace_dict(d, ("model_idx_in_bag", i)) - ) - ) - if callable(callback) - else None - ) for sub_model, model_weights in zip(model.models, model.weights): + kwargs["callback"] = (( + lambda d, i=callback_arg["model_idx_in_bag"]: callback( + _replace_dict(d, ("model_idx_in_bag", i))) if callback else None) + ) original_model_device = next(iter(sub_model.parameters())).device sub_model.to(device) res = apply_model(sub_model, mix, **kwargs, callback_arg=callback_arg) - if res is None: - return res out = res sub_model.to(original_model_device) for k, inst_weight in enumerate(model_weights): @@ -252,13 +245,10 @@ def apply_model(model: tp.Union[BagOfModels, Model], offset = random.randint(0, max_shift) shifted = TensorChunk(padded_mix, offset, length + max_shift - offset) kwargs["callback"] = ( - (lambda d, i=shift_idx: callback(_replace_dict(d, ("shift_idx", i)))) - if callable(callback) - else None + (lambda d, i=shift_idx: callback(_replace_dict(d, ("shift_idx", i))) + if callback else None) ) res = apply_model(model, shifted, **kwargs, callback_arg=callback_arg) - if res is None: - return res shifted_out = res out += shifted_out[..., max_shift - offset:] out /= shifts @@ -289,17 +279,18 @@ def apply_model(model: tp.Union[BagOfModels, Model], chunk = TensorChunk(mix, offset, segment_length) future = pool.submit(apply_model, model, chunk, **kwargs, callback_arg=callback_arg, callback=(lambda d, i=offset: - callback(_replace_dict(d, ("segment_offset", i)))) - if callable(callback) else None) + callback(_replace_dict(d, ("segment_offset", i))) + if callback else None)) futures.append((future, offset)) offset += segment_length if progress: futures = tqdm.tqdm(futures, unit_scale=scale, ncols=120, unit='seconds') for future, offset in futures: - chunk_out = future.result() # type: tp.Union[None, th.Tensor] - if chunk_out is None: - pool.shutdown(wait=False, cancel_futures=True) - return chunk_out + try: + chunk_out = future.result() # type: th.Tensor + except Exception: + pool.shutdown(wait=True, cancel_futures=True) + raise chunk_length = chunk_out.shape[-1] out[..., offset:offset + segment_length] += ( weight[:chunk_length] * chunk_out).to(mix.device) @@ -320,20 +311,12 @@ def apply_model(model: tp.Union[BagOfModels, Model], assert isinstance(mix, TensorChunk) padded_mix = mix.padded(valid_length).to(device) with lock: - try: + if callback is not None: callback(_replace_dict(callback_arg, ("state", "start"))) # type: ignore - except KeyboardInterrupt: - raise - except Exception: - pass with th.no_grad(): out = model(padded_mix) with lock: - try: + if callback is not None: callback(_replace_dict(callback_arg, ("state", "end"))) # type: ignore - except KeyboardInterrupt: - raise - except Exception: - pass assert isinstance(out, th.Tensor) return center_trim(out, length) diff --git a/demucs/utils.py b/demucs/utils.py index c80fc129..a3f5993e 100755 --- a/demucs/utils.py +++ b/demucs/utils.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. from collections import defaultdict +from concurrent.futures import CancelledError from contextlib import contextmanager import math import os @@ -129,6 +130,8 @@ def __init__(self, func, _dict, *args, **kwargs): def result(self): if self._dict["run"]: return self.func(*self.args, **self.kwargs) + else: + raise CancelledError() def __init__(self, workers=0): self._dict = {"run": True} diff --git a/docs/api.md b/docs/api.md index e6d9e873..dbd858a7 100644 --- a/docs/api.md +++ b/docs/api.md @@ -47,7 +47,7 @@ for file, sources in separated: ## API References -The types of each parameter and return value is not listed in this document. To know the exact type of them, please read the type hints in api.py (most modern code editors support infering types based on type hints). +The types of each parameter and return value is not listed in this document. To know the exact type of them, please read the type hints in api.py (most modern code editors support inferring types based on type hints). ### `class Separator` @@ -79,7 +79,7 @@ progress: If true, show a progress bar. ##### Notes for callback -The function will be called with only one positional parameter whose type is `dict`. The `callback_arg` will be combined with information of current separation progress. The progress information will override the values in `callback_arg` if same key has been used. To abort the separation, raise `KeyboardInterrupt`. +The function will be called with only one positional parameter whose type is `dict`. The `callback_arg` will be combined with information of current separation progress. The progress information will override the values in `callback_arg` if same key has been used. To abort the separation, raise an exception in `callback` which should be handled by yourself if you want your codes continue to function. Progress information contains several keys (These keys will always exist): - `model_idx_in_bag`: The index of the submodel in `BagOfModels`. Starts from 0. @@ -127,7 +127,7 @@ progress: If true, show a progress bar. ##### Notes for callback -The function will be called with only one positional parameter whose type is `dict`. The `callback_arg` will be combined with information of current separation progress. The progress information will override the values in `callback_arg` if same key has been used. To abort the separation, raise `KeyboardInterrupt`. +The function will be called with only one positional parameter whose type is `dict`. The `callback_arg` will be combined with information of current separation progress. The progress information will override the values in `callback_arg` if same key has been used. To abort the separation, raise an exception in `callback` which should be handled by yourself if you want your codes continue to function. Progress information contains several keys (These keys will always exist): - `model_idx_in_bag`: The index of the submodel in `BagOfModels`. Starts from 0. diff --git a/docs/mac.md b/docs/mac.md index 6e6c3d0c..62dd235e 100644 --- a/docs/mac.md +++ b/docs/mac.md @@ -1,6 +1,6 @@ -# Mac OS X support for Demucs +# macOS support for Demucs -If you have a sufficiently recent version of OS X, you can just run +If you have a sufficiently recent version of macOS, you can just run ```bash python3 -m pip install --user -U demucs @@ -10,10 +10,10 @@ python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1 demucs -d cpu PATH_TO_AUDIO_FILE_1 ``` -If you do not already have Anaconda installed or much experience with the terminal on Mac OS X here are some detailed instructions: +If you do not already have Anaconda installed or much experience with the terminal on macOS, here are some detailed instructions: -1. Download [Anaconda 3.8 (or more recent) 64 bits for MacOS][anaconda]: -2. Open [Anaconda Prompt in MacOSX][prompt] +1. Download [Anaconda 3.8 (or more recent) 64-bit for macOS][anaconda]: +2. Open [Anaconda Prompt in macOS][prompt] 3. Follow these commands: ```bash conda activate @@ -24,5 +24,5 @@ demucs -d cpu PATH_TO_AUDIO_FILE_1 **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or with Homebrew for instance (`brew install ffmpeg`). -[anaconda]: https://www.anaconda.com/distribution/#download-section +[anaconda]: https://www.anaconda.com/download [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-nav-mac diff --git a/docs/release.md b/docs/release.md index 1c8dd537..0aee2f71 100644 --- a/docs/release.md +++ b/docs/release.md @@ -14,7 +14,7 @@ Added `--other-method`: method to get `no_{STEM}`, add up all the other stems (a Added type `HTDemucs` to type alias `AnyModel`. -## V4.0.1a1, TBD +## V4.0.1, 8th of September 2023 **From this version, Python 3.7 is no longer supported. This is not a problem since the latest PyTorch 2.0.0 no longer support it either.** diff --git a/docs/windows.md b/docs/windows.md index a84e89bf..b259b765 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -2,25 +2,32 @@ ## Installation and usage -Parts of the code are untested on Windows (in particular, training a new model). If you don't have much experience with Anaconda, python or the shell, here are more detailed instructions. Note that **Demucs is not supported on 32bits systems** (as Pytorch is not available there). +If you don't have much experience with Anaconda, python or the shell, here are more detailed instructions. Note that **Demucs is not supported on 32bits systems** (as Pytorch is not available there). - First install Anaconda with **Python 3.8** or more recent, which you can find [here][install]. - Start the [Anaconda prompt][prompt]. Then, all commands that follow must be run from this prompt. +
+ I have no coding experience and these are too difficult for me + +> Then a GUI is suitable for you. See [Demucs GUI](https://github.com/CarlGao4/Demucs-Gui) + +
+ ### If you want to use your GPU -If you have graphic cards produced by nVidia with more than 6GiB of memory, you can separate tracks with GPU acceleration. To achieve this, you must install Pytorch with CUDA. If Pytorch was already installed (you already installed Demucs for instance), first run `python.exe -m pip uninstall torch torchaudio`. -Then visit [Pytorch Home Page](https://pytorch.org/get-started/locally/) and follow the guide on it to install with CUDA support. +If you have graphic cards produced by NVIDIA with more than 2GiB of memory, you can separate tracks with GPU acceleration. To achieve this, you must install Pytorch with CUDA. If Pytorch was already installed (you already installed Demucs for instance), first run `python.exe -m pip uninstall torch torchaudio`. +Then visit [Pytorch Home Page](https://pytorch.org/get-started/locally/) and follow the guide on it to install with CUDA support. Please make sure that the version of torchaudio should no greater than 2.1 (which is the latest version when this document is written, but 2.2.0 is sure unsupported) ### Installation Start the Anaconda prompt, and run the following -bash -``` + +```cmd conda install -c conda-forge ffmpeg -python.exe -m pip install -U demucs PySoundFile +python.exe -m pip install -U demucs SoundFile ``` ### Upgrade @@ -33,9 +40,12 @@ Then to use Demucs, just start the **Anaconda prompt** and run: ``` demucs -d cpu "PATH_TO_AUDIO_FILE_1" ["PATH_TO_AUDIO_FILE_2" ...] ``` -The `"` around the filename are required if the path contains spaces. -The separated files will be under `C:\Users\YOUR_USERNAME\demucs\separated\demucs\`. +The `"` around the filename are required if the path contains spaces. A simple way to input these paths is draging a file from a folder into the terminal. +To find out the separated files, you can run this command and open the folders: +``` +explorer separated +``` ### Separating an entire folder @@ -45,7 +55,6 @@ cd FOLDER for %i in (*.mp3) do (demucs -d cpu "%i") ``` - ## Potential errors If you have an error saying that `mkl_intel_thread.dll` cannot be found, you can try to first run @@ -54,5 +63,5 @@ If you have an error saying that `mkl_intel_thread.dll` cannot be found, you can **If you get a permission error**, please try starting the Anaconda Prompt as administrator. -[install]: https://www.anaconda.com/distribution/#windows +[install]: https://www.anaconda.com/download [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-prompt-win diff --git a/requirements.txt b/requirements.txt index 26342361..294290d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ openunmix pyyaml submitit torch>=1.8.1 -torchaudio>=0.8 +torchaudio>=0.8,<2.1 tqdm treetable soundfile>=0.10.3;sys_platform=="win32" diff --git a/requirements_minimal.txt b/requirements_minimal.txt index 8c6f1e57..1940bf01 100644 --- a/requirements_minimal.txt +++ b/requirements_minimal.txt @@ -6,5 +6,5 @@ lameenc>=1.2 openunmix pyyaml torch>=1.8.1 -torchaudio>=0.8 +torchaudio>=0.8,<2.1 tqdm From a4decbdc6c56f636b58688e5117647bbdd2d0d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Thu, 21 Dec 2023 17:44:36 +0100 Subject: [PATCH 4/6] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fdb906df..1bc16ee6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ ![linter badge](https://github.com/facebookresearch/demucs/workflows/linter/badge.svg) -**Important:** This project **is not actively maintained anymore** and only important bug fixes will be processed on the new repo. **Please do not open issues for feature request or if Demucs doesn't work perfectly for your use case :)** +**This is the officially maintained Demucs** now that I (Alexandre Défossez) have left Meta to join [Kyutai](https://twitter.com/kyutai_labs). +Note that I'm not actively working on Demucs anymore, so expect slow replies and no new feature for now. + This is the 4th release of Demucs (v4), featuring Hybrid Transformer based source separation. From b3398e47604e3d4620ed252a858fcbceff5f0be7 Mon Sep 17 00:00:00 2001 From: Weiqi Gao Date: Fri, 12 Jan 2024 18:57:08 +0800 Subject: [PATCH 5/6] Extend torchaudio support to 2.1.x (#3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update README.md * Update README.md * Update README.md * Update README.md * minor fixes for 4.1.0a1 (#552) * minor fixes for 4.1.0a1 print out the exception when calling callback ensures all threads can be stopped when interrupting separation add release data for 4.0.1 * Fix model_idx_in_bag always zero * fix linter * Fix can't separate empty audio * Calls callback when skipping empty audio * Add description for aborting * Does not ignore callback exception * Fix linter * Does not ignore exception * Disable torchaudio 2.2+ * Uses epsilon to deal with empty audio * Reraises exception in callback * Ensure the pool stops when encountering exception * Update windows.md for latest instructions * Minor documentation updates (#565) * Minor documentation updates * Update readme * Update api.md * Fix segment defined in bag can't override model * merge from adefossez/demucs * Update README.md * Extend torchaudio support to 2.1.x * Use correct import statement * Calculate FFT on CPU also when device is XPU (Intel GPU) --------- Co-authored-by: Alexandre Défossez Co-authored-by: William Dye --- demucs/api.py | 1 + demucs/audio.py | 1 + demucs/audio_legacy.py | 17 +++++++++++++++++ demucs/hdemucs.py | 10 ++++++---- demucs/htdemucs.py | 9 +++++---- demucs/repitch.py | 1 + demucs/spec.py | 8 ++++---- demucs/train.py | 1 + demucs/wav.py | 1 + requirements.txt | 2 +- requirements_minimal.txt | 2 +- 11 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 demucs/audio_legacy.py diff --git a/demucs/api.py b/demucs/api.py index 20079a6b..ee8a5126 100644 --- a/demucs/api.py +++ b/demucs/api.py @@ -22,6 +22,7 @@ import subprocess +from . import audio_legacy import torch as th import torchaudio as ta diff --git a/demucs/audio.py b/demucs/audio.py index 31b29b3c..600bd55b 100644 --- a/demucs/audio.py +++ b/demucs/audio.py @@ -10,6 +10,7 @@ import lameenc import julius import numpy as np +from . import audio_legacy import torch import torchaudio as ta import typing as tp diff --git a/demucs/audio_legacy.py b/demucs/audio_legacy.py new file mode 100644 index 00000000..ab6bdce4 --- /dev/null +++ b/demucs/audio_legacy.py @@ -0,0 +1,17 @@ +# This file is to extend support for torchaudio 2.1 + +import importlib +import os +import sys +import warnings + +if not "torchaudio" in sys.modules: + os.environ["TORCHAUDIO_USE_BACKEND_DISPATCHER"] = "0" +elif os.getenv("TORCHAUDIO_USE_BACKEND_DISPATCHER", default="1") == "1": + if sys.modules["torchaudio"].__version__ >= "2.1": + os.environ["TORCHAUDIO_USE_BACKEND_DISPATCHER"] = "0" + importlib.reload(sys.modules["torchaudio"]) + warnings.warn( + "TORCHAUDIO_USE_BACKEND_DISPATCHER is set to 0 and torchaudio is reloaded.", + ImportWarning, + ) diff --git a/demucs/hdemucs.py b/demucs/hdemucs.py index 711d4715..9992b60a 100644 --- a/demucs/hdemucs.py +++ b/demucs/hdemucs.py @@ -776,16 +776,18 @@ def forward(self, mix): # demucs issue #435 ##432 # NOTE: in this case z already is on cpu # TODO: remove this when mps supports complex numbers - x_is_mps = x.device.type == "mps" - if x_is_mps: + x_is_mps_xpu = x.device.type in ["mps", "xpu"] + x_device = x.device + if x_is_mps_xpu: x = x.cpu() zout = self._mask(z, x) x = self._ispec(zout, length) # back to mps device - if x_is_mps: - x = x.to('mps') + if x_is_mps_xpu: + x = x.to(x_device) + if self.hybrid: xt = xt.view(B, S, -1, length) diff --git a/demucs/htdemucs.py b/demucs/htdemucs.py index 5d2eaaa1..56568608 100644 --- a/demucs/htdemucs.py +++ b/demucs/htdemucs.py @@ -629,8 +629,9 @@ def forward(self, mix): # demucs issue #435 ##432 # NOTE: in this case z already is on cpu # TODO: remove this when mps supports complex numbers - x_is_mps = x.device.type == "mps" - if x_is_mps: + x_is_mps_xpu = x.device.type in ["mps", "xpu"] + x_device = x.device + if x_is_mps_xpu: x = x.cpu() zout = self._mask(z, x) @@ -643,8 +644,8 @@ def forward(self, mix): x = self._ispec(zout, length) # back to mps device - if x_is_mps: - x = x.to("mps") + if x_is_mps_xpu: + x = x.to(x_device) if self.use_train_segment: if self.training: diff --git a/demucs/repitch.py b/demucs/repitch.py index ebef7364..b69c0d25 100644 --- a/demucs/repitch.py +++ b/demucs/repitch.py @@ -9,6 +9,7 @@ import subprocess as sp import tempfile +from . import audio_legacy import torch import torchaudio as ta diff --git a/demucs/spec.py b/demucs/spec.py index 29250459..d8f6ee5e 100644 --- a/demucs/spec.py +++ b/demucs/spec.py @@ -11,8 +11,8 @@ def spectro(x, n_fft=512, hop_length=None, pad=0): *other, length = x.shape x = x.reshape(-1, length) - is_mps = x.device.type == 'mps' - if is_mps: + is_mps_xpu = x.device.type in ['mps', 'xpu'] + if is_mps_xpu: x = x.cpu() z = th.stft(x, n_fft * (1 + pad), @@ -32,8 +32,8 @@ def ispectro(z, hop_length=None, length=None, pad=0): n_fft = 2 * freqs - 2 z = z.view(-1, freqs, frames) win_length = n_fft // (1 + pad) - is_mps = z.device.type == 'mps' - if is_mps: + is_mps_xpu = z.device.type in ['mps', 'xpu'] + if is_mps_xpu: z = z.cpu() x = th.istft(z, n_fft, diff --git a/demucs/train.py b/demucs/train.py index 9aa7b64b..e045b83f 100644 --- a/demucs/train.py +++ b/demucs/train.py @@ -15,6 +15,7 @@ import hydra from hydra.core.global_hydra import GlobalHydra from omegaconf import OmegaConf +from . import audio_legacy import torch from torch import nn import torchaudio diff --git a/demucs/wav.py b/demucs/wav.py index 6acb9b5d..ca1e23a3 100644 --- a/demucs/wav.py +++ b/demucs/wav.py @@ -15,6 +15,7 @@ import musdb import julius +from . import audio_legacy import torch as th from torch import distributed import torchaudio as ta diff --git a/requirements.txt b/requirements.txt index 294290d3..d4832a2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ openunmix pyyaml submitit torch>=1.8.1 -torchaudio>=0.8,<2.1 +torchaudio>=0.8,<2.2 tqdm treetable soundfile>=0.10.3;sys_platform=="win32" diff --git a/requirements_minimal.txt b/requirements_minimal.txt index 1940bf01..dcae84bc 100644 --- a/requirements_minimal.txt +++ b/requirements_minimal.txt @@ -6,5 +6,5 @@ lameenc>=1.2 openunmix pyyaml torch>=1.8.1 -torchaudio>=0.8,<2.1 +torchaudio>=0.8,<2.2 tqdm From 8174c5d2c259dabc69acd842fc4ba5111539d507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Fri, 12 Jan 2024 13:46:36 +0100 Subject: [PATCH 6/6] update changelog --- demucs/__init__.py | 2 +- docs/release.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/demucs/__init__.py b/demucs/__init__.py index e02c0ada..3bf9f708 100644 --- a/demucs/__init__.py +++ b/demucs/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -__version__ = "4.1.0a2" +__version__ = "4.1.0a3" diff --git a/docs/release.md b/docs/release.md index 0aee2f71..df8f122f 100644 --- a/docs/release.md +++ b/docs/release.md @@ -1,6 +1,6 @@ # Release notes for Demucs -## V4.1.0a1, TBD +## V4.1.0a, TBD Get models list @@ -14,6 +14,8 @@ Added `--other-method`: method to get `no_{STEM}`, add up all the other stems (a Added type `HTDemucs` to type alias `AnyModel`. +Improving recent torchaudio versions support (Thanks @CarlGao4) + ## V4.0.1, 8th of September 2023 **From this version, Python 3.7 is no longer supported. This is not a problem since the latest PyTorch 2.0.0 no longer support it either.**