diff --git a/Untitled.ipynb b/Untitled.ipynb
deleted file mode 100644
index 0f8bb127..00000000
--- a/Untitled.ipynb
+++ /dev/null
@@ -1,1031 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "947f6aac-0c93-485c-9478-5cf617837214",
- "metadata": {},
- "outputs": [],
- "source": [
- "from audiocraft.models import MusicGenStem\n",
- "import torchaudio\n",
- "import gzip\n",
- "import json\n",
- "\n",
- "from audiocraft.utils.notebook import display_audio\n",
- "\n",
- "from audiocraft.data.audio_utils import convert_audio\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "c56cd9fe-89bb-4cd9-baf8-f3468f0a3bfe",
- "metadata": {},
- "outputs": [],
- "source": [
- "model = MusicGenStem.get_pretrained('facebook/musicgen-stem-6cb')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "d6502fd1-58f8-4145-a282-9e0f03b0dc8c",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Total number of lines across all files: 700\n"
- ]
- }
- ],
- "source": [
- "def count_lines_in_json_gz_files(file_paths):\n",
- " total_lines = 0\n",
- " all_paths = []\n",
- " for file_path in file_paths:\n",
- " with gzip.open(file_path, 'rt', encoding='utf-8') as file:\n",
- " for line in file:\n",
- " total_lines += 1\n",
- " dic = json.loads(line)\n",
- " all_paths.append(dic['path'])\n",
- " return total_lines, all_paths\n",
- "\n",
- "file_paths = ['/private/home/srouard/semencodec/egs/nv_32khz_mono_filtered/test/data.jsonl.gz']\n",
- "mappers = {\n",
- " \"/datasets01/datasets01\": \"/datasets01\",\n",
- " \"/datasets01/shutterstock-music-resampled/shutterstock_32khz_wav\": \"/large_experiments/audiocraft/datasets/datasets_32khz_mono/shutterstock\",\n",
- " \"/datasets01/shutterstock-music-resampled/p5_32khz_wav\": \"/large_experiments/audiocraft/datasets/datasets_32khz_mono/p5\",\n",
- " \"/checkpoint/jadecopet/datasets/mmi/mmi_11k_32khz\": \"/large_experiments/audiocraft/datasets/datasets_32khz_mono/mmi_11k\",\n",
- " \"/fsx-shutterstock-music-resampled/dataset/p5_32khz_wav\": \"/large_experiments/audiocraft/datasets/datasets_32khz_mono/p5\",\n",
- " \"/fsx-shutterstock-music-resampled/dataset/shutterstock_32khz_wav\": \"/large_experiments/audiocraft/datasets/datasets_32khz_mono/shutterstock\"\n",
- "}\n",
- "total_lines, all_paths = count_lines_in_json_gz_files(file_paths)\n",
- "print(f'Total number of lines across all files: {total_lines}')\n",
- "converted_all_paths = []\n",
- "for path in all_paths:\n",
- " for key in mappers.keys():\n",
- " if key in path:\n",
- " newpath = path.replace(key, mappers[key])\n",
- " converted_all_paths.append(newpath)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "bd2320d8-ef8a-415f-a225-0025d659baaa",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "/large_experiments/audiocraft/datasets/datasets_32khz_mono/p5/9e/a79/1b6/d0a6/ed3c/060747643.wav\n"
- ]
- }
- ],
- "source": [
- "path = converted_all_paths[7]\n",
- "print(path)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "13f90f5a-092c-41f8-960f-a570488486b2",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "melody_waveform, sr = torchaudio.load(path)\n",
- "\n",
- "t=15\n",
- "melody_waveform = melody_waveform[..., t*sr:(t+25)*sr]\n",
- "display_audio(melody_waveform, sample_rate=32000)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "62d82dd0-852a-4e6d-8f25-cd7e106ba509",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "torch.Size([1, 800000])"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "melody_waveform.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "a90b6483-b416-4403-abfd-138552da0acf",
- "metadata": {},
- "outputs": [],
- "source": [
- "model.set_generation_params(duration=17.)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "id": "80066d53-1db4-4198-ba92-97424b7ac374",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " 602 / 602\r"
- ]
- }
- ],
- "source": [
- "output, codes = model.regenerate_instruments_from_mixture(mixture=melody_waveform, mixture_sample_rate=sr, \n",
- " which_instruments_regenerate=['drums'],\n",
- " descriptions=['rock song with derbuka percussions', \n",
- " 'rock song with rim shot drums'], \n",
- " progress=True, return_tokens=True, return_non_compressed_stems=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "id": "b1da001d-8110-4172-9dd8-098f663e632d",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "bass\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "drums\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "other\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "mixture\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "output['mixture'] = sum(output.values())\n",
- "for k, v in output.items():\n",
- " print(k)\n",
- " display_audio(v, 32000)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "b6bbdf2a-e970-426e-ae4a-bf4e5fb804eb",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2048"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.lm.special_token_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "adc44161-115b-40e9-80be-35096bd58d04",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{0, 1, 2, 3, 4, 5, 6, 7}"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "set(range(8))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "127478a3-a0a2-42d9-9dd6-675bd2716d4f",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "50"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.compression_model.frame_rate"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "id": "19167b80-afe7-4e14-9cd5-86cd964e1b2c",
- "metadata": {},
- "outputs": [
- {
- "ename": "AttributeError",
- "evalue": "'MusicGenStem' object has no attribute 'sources'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[47], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msources\u001b[49m\n",
- "\u001b[0;31mAttributeError\u001b[0m: 'MusicGenStem' object has no attribute 'sources'"
- ]
- }
- ],
- "source": [
- "model.compression_model.sources"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "id": "46e02574-9bb1-43c3-b46d-92ede0e41ff0",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['bass', 'drums', 'other']"
- ]
- },
- "execution_count": 52,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.compression_model.sources"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "id": "44d189f6-2f07-438b-b024-d2dbb62d1398",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "torch.Size([2, 1, 383360])"
- ]
- },
- "execution_count": 53,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "output['bass'].shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "id": "21e3ccc7-185a-4d1c-8876-954968ee65c8",
- "metadata": {},
- "outputs": [],
- "source": [
- "stems = model._prepare_mixture_for_compression_model(melody_waveform[None], sr, )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "id": "15e5b491-862b-4785-b265-8931fc898178",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "torch.Size([1, 3, 1, 800000])"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "stems.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "fe6b5b13-e1de-47af-a13f-962e08aa6828",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "['a', 'b'].index('a')"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b234c283-f633-4c9d-84ab-8fca4c1cd633",
- "metadata": {},
- "source": [
- "# From true stems"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "1e5251dc-8e53-4ad6-a3f9-c79bcfa7eb0e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "#path = '/private/home/srouard/audiocraft_xplo/.songs/get_lucky.mp3'\n",
- "path = '/checkpoint/robinsr/mmi_stems/mmi32_mono/test/Phillipe Bianco - Man Down/'\n",
- "\n",
- "drums, sr = torchaudio.load(path + 'drums.wav') \n",
- "bass, sr = torchaudio.load(path + 'bass.wav') \n",
- "other, sr = torchaudio.load(path + 'other.wav') \n",
- "\n",
- "t=80\n",
- "\n",
- "drums = drums[..., t*32000:(t+26)*32000]\n",
- "bass = bass[..., t*32000:(t+26)*32000]\n",
- "other = other[..., t*32000:(t+26)*32000]\n",
- "display_audio(drums, sample_rate=sr)\n",
- "display_audio(bass, sample_rate=sr)\n",
- "display_audio(other, sample_rate=sr)\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "78023c9b-715f-4dce-94c6-47d51a2eddf9",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "torch.Size([1, 3, 1, 832000])\n",
- "The model can only regenerate on sequences of maximum 25 seconds, we cropped to 25 seconds\n",
- " 502 / 502\r"
- ]
- }
- ],
- "source": [
- "output, codes = model.regenerate_instruments_from_stems(stems={'drums': drums}, stems_sample_rate=sr, \n",
- " which_instruments_regenerate=['bass'],\n",
- " descriptions=['Funky bassline', \n",
- " 'Funky bassline'], \n",
- " progress=True, return_tokens=True, return_non_compressed_stems=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "0a3c223a-95d7-4486-9a9d-c6790df7e9dc",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "bass\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "drums\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "other\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "mixture\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "output['mixture'] = sum(output.values())\n",
- "for k, v in output.items():\n",
- " print(k)\n",
- " display_audio(v, 32000)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a4c7b55d-8898-4972-9f22-c41fc68863f2",
- "metadata": {},
- "source": [
- "# MusicGen Style"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "4b895efc-4570-4f42-b770-db3444a79443",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "M\taudiocraft/models/lm.py\n",
- "M\taudiocraft/models/musicgen.py\n",
- "M\taudiocraft/modules/conditioners.py\n",
- "Already on 'style_conditioner'\n",
- "Your branch is up to date with 'origin/style_conditioner'.\n"
- ]
- }
- ],
- "source": [
- "!git checkout style_conditioner"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "38c1f150-f4ac-4d32-90cc-d177fb6c14f2",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/private/home/srouard/miniconda3/envs/audiocraft_allin1_env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n",
- "/private/home/srouard/miniconda3/envs/audiocraft_allin1_env/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
- " warnings.warn(\n"
- ]
- }
- ],
- "source": [
- "from audiocraft.models import MusicGen\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "0d5cb1ae-472e-4520-81f8-5a3a5736c555",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "WARNING: feature_extractor_cqt requires the libray 'nnAudio'\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/private/home/srouard/miniconda3/envs/audiocraft_allin1_env/lib/python3.9/site-packages/torch/_utils.py:776: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
- " return self.fget.__get__(instance, owner)()\n"
- ]
- }
- ],
- "source": [
- "model = MusicGen.get_pretrained('facebook/musicgen-style')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "9ad5a858-1fd7-4377-b47a-796907c4a376",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " 403 / 403\r"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import torchaudio\n",
- "from audiocraft.utils.notebook import display_audio\n",
- "\n",
- "model.set_generation_params(\n",
- " duration=8, # generate 8 seconds, can go up to 30\n",
- " use_sampling=True, \n",
- " top_k=250,\n",
- " cfg_coef=3., # Classifier Free Guidance coefficient \n",
- " cfg_coef_beta=5., # double CFG is necessary for text-and-style conditioning\n",
- " # Beta in the double CFG formula. between 1 and 9. When set to 1 \n",
- " # it is equivalent to normal CFG. \n",
- ")\n",
- "\n",
- "model.set_style_conditioner_params(\n",
- " eval_q=1, # integer between 1 and 6\n",
- " # eval_q is the level of quantization that passes\n",
- " # through the conditioner. When low, the models adheres less to the \n",
- " # audio conditioning\n",
- " excerpt_length=3., # the length in seconds that is taken by the model in the provided excerpt\n",
- " )\n",
- "\n",
- "melody_waveform, sr = torchaudio.load(\"/private/home/srouard/audiocraft/dataset/example/electro_1.mp3\")\n",
- "melody_waveform = melody_waveform.unsqueeze(0).repeat(3, 1, 1)\n",
- "\n",
- "descriptions = [\"8-bit old video game music\", \"Chill lofi remix\", \"80s New wave with synthesizer\"]\n",
- "\n",
- "output = model.generate_with_chroma(\n",
- " descriptions=descriptions,\n",
- " melody_wavs=melody_waveform,\n",
- " melody_sample_rate=sr,\n",
- " progress=True, return_tokens=True\n",
- ")\n",
- "display_audio(output[0], sample_rate=32000)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cc656dba-dfe3-4cbe-8ca9-f3b12ab1d88c",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.18"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}