Skip to content

Commit

Permalink
Updated to latest lyrics-transcriber, cleaned up output filepath to s…
Browse files Browse the repository at this point in the history
…upport non-youtube sources and contain artist and title
  • Loading branch information
beveradb committed Jul 9, 2023
1 parent 7ff4de6 commit 88172f3
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
/.cache/
/models/*.onnx
*.wav
karaoke-generator-output-*
/karaoke-*

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
53 changes: 36 additions & 17 deletions karaoke_generator/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import subprocess
import yt_dlp
import slugify
import tldextract
from audio_separator import Separator
from lyrics_transcriber import LyricsTranscriber

Expand Down Expand Up @@ -56,22 +57,15 @@ def __init__(
self.output_dir = output_dir

self.audio_file = None
self.youtube_url = None
self.source_url = None
self.source_site = None
self.source_video_id = None
self.input_source_slug = None

parsed_url = urllib.parse.urlparse(self.input_path)
if parsed_url.scheme and parsed_url.netloc:
self.youtube_url = self.input_path
self.input_source_slug = slugify.slugify(parsed_url.hostname + "-" + parsed_url.query, lowercase=False)
self.logger.debug(f"Input path was valid URL, set youtube_url and input_source_slug: {self.input_source_slug}")
elif os.path.exists(self.input_path):
self.audio_file = self.input_path
self.input_source_slug = slugify.slugify(os.path.basename(self.audio_file), lowercase=False)
self.logger.debug(f"Input path was valid file path, set audio_file and input_source_slug: {self.input_source_slug}")
else:
raise Exception("Input path must be either a valid file path or URL")
self.parse_input_source()

if self.output_dir is None:
self.output_dir = os.path.join(os.getcwd(), "karaoke-generator-output-" + self.input_source_slug)
self.output_dir = os.path.join(os.getcwd(), "karaoke-" + self.input_source_slug)

self.output_filename_slug = None
self.youtube_video_file = None
Expand All @@ -83,11 +77,36 @@ def __init__(
self.output_values = {}
self.create_folders()

def parse_input_source(self):
parsed_url = urllib.parse.urlparse(self.input_path)
if parsed_url.scheme and parsed_url.netloc:
self.source_url = self.input_path

ext = tldextract.extract(parsed_url.netloc.lower())
self.source_site = ext.registered_domain

if "youtube" in self.source_site:
query = urllib.parse.parse_qs(parsed_url.query)
self.source_video_id = query["v"][0]
self.input_source_slug = "youtube-" + self.source_video_id
else:
self.input_source_slug = self.source_site + slugify.slugify("-" + parsed_url.path, lowercase=False)

self.logger.debug(f"Input path was valid URL, set source_url and input_source_slug: {self.input_source_slug}")
elif os.path.exists(self.input_path):
self.audio_file = self.input_path
self.input_source_slug = slugify.slugify(os.path.basename(self.audio_file), lowercase=False)
self.logger.debug(f"Input path was valid file path, set audio_file and input_source_slug: {self.input_source_slug}")
else:
raise Exception("Input path must be either a valid file path or URL")

self.input_source_slug = "-".join(filter(None, [slugify.slugify(self.artist), slugify.slugify(self.title), self.input_source_slug]))

def generate(self):
self.logger.info("KaraokeGenerator beginning generation")

if self.audio_file is None and self.youtube_url is not None:
self.logger.debug(f"audio_file is none and youtube_url is {self.youtube_url}, fetching video from youtube")
if self.audio_file is None and self.source_url is not None:
self.logger.debug(f"audio_file is none and source_url is {self.source_url}, fetching video from youtube")
self.download_youtube_video()

self.separate_audio()
Expand Down Expand Up @@ -188,7 +207,7 @@ def download_youtube_video(self):

# Download the original highest quality file
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
youtube_info = ydl.extract_info(self.youtube_url, download=False)
youtube_info = ydl.extract_info(self.source_url, download=False)

temp_download_filepath = ydl.prepare_filename(youtube_info)
self.logger.debug(f"temp_download_filepath: {temp_download_filepath}")
Expand All @@ -210,7 +229,7 @@ def download_youtube_video(self):
with open(ydl_info_cache_file, "w") as cache_file:
json.dump(ydl.sanitize_info(youtube_info), cache_file, indent=4)

ydl.download([self.youtube_url])
ydl.download([self.source_url])
shutil.move(temp_download_filepath, youtube_info["download_filepath"])
self.youtube_video_file = youtube_info["download_filepath"]
self.logger.debug(f"successfully downloaded youtube video to path: {self.youtube_video_file}")
Expand Down
40 changes: 36 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "karaoke-generator"
version = "0.4.1"
version = "0.4.2"
description = "Fully automated creation of _acceptable_ karaoke music videos from any music on YouTube, using open source tools and AI (e.g. Whisper and MDX-Net)"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand All @@ -12,9 +12,10 @@ python = ">=3.9,<3.11"
yt-dlp = "^2023.6.22"
pydub = "^0.25.1"
audio-separator = "^0.3"
lyrics-transcriber = "^0.6"
lyrics-transcriber = "^0.6.3"
python-slugify = "^8.0.1"
regex = "^2023.6.3"
tldextract = "^3.4"
# Note: after adding lyrics-transcriber with poetry lock, I then removed all traces of triton
# from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.
# This was the only way I was able to get a working cross-platform build published to PyPI.
Expand Down

0 comments on commit 88172f3

Please sign in to comment.