Skip to content

Commit

Permalink
Restructured to output stems in subfolder and separate backing vocals…
Browse files Browse the repository at this point in the history
… properly
  • Loading branch information
beveradb committed Dec 5, 2024
1 parent 1b26fe9 commit ef3a2df
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 75 deletions.
135 changes: 85 additions & 50 deletions karaoke_prep/karaoke_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@ def __init__(
dry_run=False,
log_level=logging.DEBUG,
log_formatter=None,
model_names=["UVR_MDXNET_KARA_2.onnx", "model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt"],
clean_instrumental_model="model_bs_roformer_ep_317_sdr_12.9755.ckpt",
backing_vocals_models=["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt", "UVR-BVE-4B_SN-44100-1.pth"],
other_stems_models=["htdemucs_6s.yaml"],
model_file_dir=os.path.join(tempfile.gettempdir(), "audio-separator-models"),
output_dir=".",
lossless_output_format="FLAC",
lossy_output_format="MP3",
use_cuda=False,
use_coreml=False,
normalization_enabled=True,
Expand Down Expand Up @@ -89,11 +90,12 @@ def __init__(
self.artist = artist
self.title = title
self.filename_pattern = filename_pattern
self.model_names = model_names
self.clean_instrumental_model = clean_instrumental_model
self.backing_vocals_models = backing_vocals_models
self.other_stems_models = other_stems_models
self.model_file_dir = model_file_dir
self.output_dir = output_dir
self.lossless_output_format = lossless_output_format.lower()
self.lossy_output_format = lossy_output_format.lower()
self.use_cuda = use_cuda
self.use_coreml = use_coreml
self.normalization_enabled = normalization_enabled
Expand Down Expand Up @@ -717,6 +719,80 @@ def hex_to_rgb(self, hex_color):
hex_color = hex_color.lstrip("#")
return tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))

def process_audio_separation(self, audio_file, artist_title, track_output_dir):
from audio_separator.separator import Separator

self.logger.info(f"Starting audio separation process for {artist_title}")

separator = Separator(
log_level=self.log_level,
log_formatter=self.log_formatter,
model_file_dir=self.model_file_dir,
output_format=self.lossless_output_format,
)

# Create a "stems" subfolder
stems_dir = os.path.join(track_output_dir, "stems")
os.makedirs(stems_dir, exist_ok=True)
self.logger.info(f"Created stems directory: {stems_dir}")

result = {"clean_instrumental": {}, "other_stems": {}, "backing_vocals": {}}

# Step 1: Separate using clean_instrumental_model
self.logger.info(f"Step 1: Separating using clean instrumental model: {self.clean_instrumental_model}")
instrumental_path = os.path.join(
track_output_dir, f"{artist_title} (Instrumental {self.clean_instrumental_model}).{self.lossless_output_format}"
)
vocals_path = os.path.join(stems_dir, f"{artist_title} (Vocals {self.clean_instrumental_model}).{self.lossless_output_format}")

separator.load_model(model_filename=self.clean_instrumental_model)
clean_output_files = separator.separate(audio_file)

for file in clean_output_files:
if "(Vocals)" in file:
os.rename(file, vocals_path)
result["clean_instrumental"]["vocals"] = vocals_path
elif "(Instrumental)" in file:
os.rename(file, instrumental_path)
result["clean_instrumental"]["instrumental"] = instrumental_path

# Step 2: Separate using other_stems_models
self.logger.info(f"Step 2: Separating using other stems models: {self.other_stems_models}")
for model in self.other_stems_models:
self.logger.info(f"Processing with model: {model}")
separator.load_model(model_filename=model)
other_stems_output = separator.separate(audio_file)

result["other_stems"][model] = {}
for file in other_stems_output:
file_name = os.path.basename(file)
stem_name = file_name[file_name.rfind("_(") + 2 : file_name.rfind(")_")]
new_filename = f"{artist_title} ({stem_name} {model}).{self.lossless_output_format}"
other_stem_path = os.path.join(stems_dir, new_filename)
os.rename(file, other_stem_path)
result["other_stems"][model][stem_name] = other_stem_path

# Step 3: Separate clean vocals using backing_vocals_models
self.logger.info(f"Step 3: Separating clean vocals using backing vocals models: {self.backing_vocals_models}")
for model in self.backing_vocals_models:
self.logger.info(f"Processing with model: {model}")
separator.load_model(model_filename=model)
backing_vocals_output = separator.separate(vocals_path)

result["backing_vocals"][model] = {}
for file in backing_vocals_output:
if "(Vocals)" in file:
lead_vocals_path = os.path.join(stems_dir, f"{artist_title} (Lead Vocals {model}).{self.lossless_output_format}")
os.rename(file, lead_vocals_path)
result["backing_vocals"][model]["lead_vocals"] = lead_vocals_path
elif "(Instrumental)" in file:
backing_vocals_path = os.path.join(stems_dir, f"{artist_title} (Backing Vocals {model}).{self.lossless_output_format}")
os.rename(file, backing_vocals_path)
result["backing_vocals"][model]["backing_vocals"] = backing_vocals_path

self.logger.info("Audio separation process completed")
return result

def prep_single_track(self):
self.logger.info(f"Preparing single track: {self.artist} - {self.title}")

Expand Down Expand Up @@ -847,65 +923,24 @@ def prep_single_track(self):
existing_instrumental_extension = os.path.splitext(self.existing_instrumental)[1]

instrumental_path = os.path.join(track_output_dir, f"{artist_title} (Instrumental Custom){existing_instrumental_extension}")
instrumental_path_lossy = os.path.join(track_output_dir, f"{artist_title} (Instrumental Custom).{self.lossy_output_format}")

shutil.copy2(self.existing_instrumental, instrumental_path)
self.convert_to_lossy(instrumental_path, instrumental_path_lossy)

processed_track["separated_audio"]["Custom"] = {
"instrumental": instrumental_path,
"instrumental_lossy": instrumental_path_lossy,
"vocals": None,
"vocals_lossy": None,
}
else:
self.logger.info(f"Separating audio for track: {self.title} by {self.artist} using models: {', '.join(self.model_names)}")
for model_name in self.model_names:
processed_track[f"separated_audio"][model_name] = {}
instrumental_path = os.path.join(
track_output_dir, f"{artist_title} (Instrumental {model_name}).{self.lossless_output_format}"
)
vocals_path = os.path.join(track_output_dir, f"{artist_title} (Vocals {model_name}).{self.lossless_output_format}")
instrumental_path_lossy = os.path.join(
track_output_dir, f"{artist_title} (Instrumental {model_name}).{self.lossy_output_format}"
)
vocals_path_lossy = os.path.join(track_output_dir, f"{artist_title} (Vocals {model_name}).{self.lossy_output_format}")

if not (os.path.isfile(instrumental_path) and os.path.isfile(vocals_path)):
self.separate_audio(
audio_file=processed_track["input_audio_wav"],
model_name=model_name,
track_output_dir=track_output_dir,
artist_title=artist_title,
instrumental_path=instrumental_path,
vocals_path=vocals_path,
)
if os.path.isfile(instrumental_path):
self.convert_to_lossy(instrumental_path, instrumental_path_lossy)
if os.path.isfile(vocals_path):
self.convert_to_lossy(vocals_path, vocals_path_lossy)

processed_track[f"separated_audio"][model_name]["instrumental"] = instrumental_path
processed_track[f"separated_audio"][model_name]["vocals"] = vocals_path
processed_track[f"separated_audio"][model_name]["instrumental_lossy"] = instrumental_path_lossy
processed_track[f"separated_audio"][model_name]["vocals_lossy"] = vocals_path_lossy
self.logger.info(f"Separating audio for track: {self.title} by {self.artist}")
separation_results = self.process_audio_separation(
audio_file=processed_track["input_audio_wav"], artist_title=artist_title, track_output_dir=track_output_dir
)
processed_track["separated_audio"] = separation_results

self.logger.info("Script finished, audio downloaded, lyrics fetched and audio separated!")

return processed_track

def convert_to_lossy(self, input_filename, output_filename):
if input_filename is None or not os.path.isfile(input_filename):
raise Exception(f"Error: Invalid input file provided for convert_to_lossy: {input_filename}")

self.logger.info(f"Converting {self.lossless_output_format} audio to lossy {self.lossy_output_format} format")

ffmpeg_extras = "-q:a 0" if self.lossy_output_format == "mp3" else ""

ffmpeg_command = f'{self.ffmpeg_base_command} -i "{input_filename}" {ffmpeg_extras} "{output_filename}"'
self.logger.debug(f"Running command: {ffmpeg_command}")
os.system(ffmpeg_command)

def process_playlist(self):
if self.artist is None or self.title is None:
raise Exception("Error: Artist and Title are required for processing a local file.")
Expand Down
66 changes: 42 additions & 24 deletions karaoke_prep/utils/prep_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,23 @@ def main():
)

parser.add_argument(
"--model_names",
"--clean_instrumental_model",
default="model_bs_roformer_ep_317_sdr_12.9755.ckpt",
help="Optional: Model for clean instrumental separation (default: %(default)s).",
)

parser.add_argument(
"--backing_vocals_models",
nargs="+",
default=["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt", "UVR-BVE-4B_SN-44100-1.pth"],
help="Optional: List of models for backing vocals separation (default: %(default)s).",
)

parser.add_argument(
"--other_stems_models",
nargs="+",
default=[
"UVR_MDXNET_KARA_2.onnx",
"mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt",
"2_HP-UVR.pth",
"model_bs_roformer_ep_317_sdr_12.9755.yaml",
"MDX23C-8KFFT-InstVoc_HQ_2.ckpt",
"htdemucs_6s.yaml",
],
help="Optional: list of model names to be used for separation (default: %(default)s). Example: --model_names UVR_MDXNET_KARA_2.onnx UVR-MDX-NET-Inst_HQ_4.onnx",
default=["htdemucs_6s.yaml"],
help="Optional: List of models for other stems separation (default: %(default)s).",
)

default_model_dir_unix = "/tmp/audio-separator-models/"
Expand Down Expand Up @@ -94,12 +100,6 @@ def main():
help="Optional: lossless output format for separated audio (default: FLAC). Example: --lossless_output_format=WAV",
)

parser.add_argument(
"--lossy_output_format",
default="MP3",
help="Optional: lossy output format for separated audio (default: MP3). Example: --lossy_output_format=OGG",
)

parser.add_argument(
"--use_cuda",
action="store_true",
Expand Down Expand Up @@ -361,7 +361,9 @@ def main():
logger.setLevel(log_level)

if args.existing_instrumental:
args.model_names = ["Custom"]
args.clean_instrumental_model = None
args.backing_vocals_models = []
args.other_stems_models = []

logger.info(f"KaraokePrep beginning with input_media: {input_media} artist: {artist} and title: {title}")

Expand All @@ -373,11 +375,12 @@ def main():
dry_run=args.dry_run,
log_formatter=log_formatter,
log_level=log_level,
model_names=args.model_names,
clean_instrumental_model=args.clean_instrumental_model,
backing_vocals_models=args.backing_vocals_models,
other_stems_models=args.other_stems_models,
model_file_dir=args.model_file_dir,
output_dir=args.output_dir,
lossless_output_format=args.lossless_output_format,
lossy_output_format=args.lossy_output_format,
use_cuda=args.use_cuda,
use_coreml=args.use_coreml,
normalization_enabled=args.normalize,
Expand Down Expand Up @@ -429,11 +432,26 @@ def main():
logger.info(f" Lyrics: {track['lyrics']}")
logger.info(f" Processed Lyrics: {track['processed_lyrics']}")

for model_name in args.model_names:
logger.info(f" Instrumental: {track['separated_audio'][model_name]['instrumental']}")
logger.info(f" Instrumental (Lossy): {track['separated_audio'][model_name]['instrumental_lossy']}")
logger.info(f" Vocals: {track['separated_audio'][model_name]['vocals']}")
logger.info(f" Vocals (Lossy): {track['separated_audio'][model_name]['vocals_lossy']}")
logger.info(f" Separated Audio:")

# Clean Instrumental
logger.info(f" Clean Instrumental Model:")
for stem_type, file_path in track["separated_audio"]["clean_instrumental"].items():
logger.info(f" {stem_type.capitalize()}: {file_path}")

# Other Stems
logger.info(f" Other Stems Models:")
for model, stems in track["separated_audio"]["other_stems"].items():
logger.info(f" Model: {model}")
for stem_type, file_path in stems.items():
logger.info(f" {stem_type.capitalize()}: {file_path}")

# Backing Vocals
logger.info(f" Backing Vocals Models:")
for model, stems in track["separated_audio"]["backing_vocals"].items():
logger.info(f" Model: {model}")
for stem_type, file_path in stems.items():
logger.info(f" {stem_type.capitalize()}: {file_path}")


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "karaoke-prep"
version = "0.27.2"
version = "0.28.0"
description = "Prepare for karaoke video creation, by downloading audio and lyrics for a specified song or playlist from youtube and separating audio stems. After syncing, finalise the video with a title screen!"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit ef3a2df

Please sign in to comment.