Skip to content

Commit

Permalink
Upgraded dependencies to use latest audio-separator with denoising su…
Browse files Browse the repository at this point in the history
…pport, relaxing Torch version req from lyrics-transcriber. Improved youtube title extraction
  • Loading branch information
beveradb committed Aug 5, 2023
1 parent f23d4b6 commit cab6526
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 209 deletions.
35 changes: 27 additions & 8 deletions karaoke_generator/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,16 @@ def parse_input_source(self):
else:
raise Exception("Input path must be either a valid file path or URL")

self.input_source_slug = "-".join(filter(None, [slugify.slugify(self.artist), slugify.slugify(self.title), self.input_source_slug]))
self.input_source_slug = "-".join(
filter(
None,
[
slugify.slugify(self.artist) if self.artist else None,
slugify.slugify(self.title) if self.title else None,
self.input_source_slug,
],
)
)

def generate(self):
self.logger.info("KaraokeGenerator beginning generation")
Expand Down Expand Up @@ -236,15 +245,25 @@ def download_youtube_video(self):

if self.title is None:
self.logger.debug(f"Song title not specified, attempting to split from YouTube title: {youtube_info['title']}")
# Define the hyphen variations pattern
hyphen_pattern = regex.compile(r" [^[:ascii:]-_\p{Dash}] ")
# Split the string using the hyphen variations pattern
title_parts = hyphen_pattern.split(youtube_info["title"])

self.artist = title_parts[0]
self.title = title_parts[1]
# Define the pattern using regular expressions for possible hyphen-like characters
hyphen_pattern = regex.compile(r" [-\u2010-\u2015] ")

# Split the string using the hyphen pattern
title_parts = hyphen_pattern.split(youtube_info["title"], maxsplit=1)

if len(title_parts) < 2:
self.artist = None
self.title = None
self.logger.warning("Failed to extract artist and title from YouTube title.")
else:
self.artist = title_parts[0].strip()
self.title = title_parts[1].strip()

# Optional: further split to remove additional info, such as "(Painkiller Sessions 1990) [Audio]"
self.title = self.title.split(" (", 1)[0].strip()

print(f"Guessed metadata from title: Artist: {self.artist}, Title: {self.title}")
self.logger.debug(f"Guessed metadata from title: Artist: {self.artist}, Title: {self.title}")

# Extract audio to WAV file using ffmpeg
self.audio_file = os.path.join(self.cache_dir, self.output_filename_slug + ".wav")
Expand Down
Loading

0 comments on commit cab6526

Please sign in to comment.