diff --git a/README.en.md b/README.en.md
index 178a65b..94f39c0 100644
--- a/README.en.md
+++ b/README.en.md
@@ -74,6 +74,7 @@
Links or file paths: Pass the links or file paths of the materials to be transcribed directly after the Tafrigh tool name. For example: tafrigh "https://yout..." "https://yout..." "C:\Users\ieasybooks\leactue.wav"
Skip transcription if output exists: Use the --skip_if_output_exist
option to skip transcription if the required outputs already exist in the specified output folder
Number of download retries: If downloading a full playlist using the yt-dlp
library, some items may fail to download. The --download_retries
option can be used to specify the number of retry attempts if a download fails. The default value is 3
+ Additional options for yt-dlp
: You can pass additional options to the yt-dlp
library using the --yt_dlp_options
option in valid JSON format. For example, to download only the first 10 items from a playlist, pass --yt_dlp_options '{"playlist_items": "1-10"}'
@@ -158,7 +159,7 @@
```
➜ tafrigh --help
-usage: tafrigh [-h] [--version] [--skip_if_output_exist | --no-skip_if_output_exist] [--download_retries DOWNLOAD_RETRIES] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
+usage: tafrigh [-h] [--version] [--skip_if_output_exist | --no-skip_if_output_exist] [--download_retries DOWNLOAD_RETRIES] [--yt_dlp_options YT_DLP_OPTIONS] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
[-l {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,zh}]
[--use_faster_whisper | --no-use_faster_whisper] [--beam_size BEAM_SIZE] [--ct2_compute_type {default,int8,int8_float16,int16,float16}] [-w WIT_CLIENT_ACCESS_TOKENS [WIT_CLIENT_ACCESS_TOKENS ...]] [--max_cutting_duration [1-17]]
[--min_words_per_segment MIN_WORDS_PER_SEGMENT] [--save_files_before_compact | --no-save_files_before_compact] [--save_yt_dlp_responses | --no-save_yt_dlp_responses] [--output_sample OUTPUT_SAMPLE]
@@ -175,6 +176,8 @@ Input:
Whether to skip generating the output if the output file already exists.
--download_retries DOWNLOAD_RETRIES
Number of retries for yt-dlp downloads that fail.
+ --yt_dlp_options YT_DLP_OPTIONS
+ Additional options to pass to yt-dlp in valid JSON format (e.g. `'{"playlist_items": "1-10"}'`).
--verbose, --no-verbose
Whether to print out the progress and debug messages.
diff --git a/README.md b/README.md
index c244393..b6d0cd3 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@
الروابط أو مسارات الملفات: يجب تمرير الروابط أو مسارات الملفات للمواد المُراد تفريغها بعد اسم أداة تفريغ بشكل مباشر. على سبيل المثال: tafrigh "https://yout..." "https://yout..." "C:\Users\ieasybooks\leactue.wav"
تخطي عملية التفريغ في حال وجود المخرجات مسبقًا: يمكن تمرير الاختيار --skip_if_output_exist
لتخطي عملية التفريغ إذا كانت المخرجات المطلوبة موجودة بالفعل في مجلد الإخراج المحدد
عدد مرات محاولة إعادة تحميل المواد: قد يفشل تحميل بعض المواد عند تحميل قائمة تشغيل كاملة باستخدام مكتبة yt-dlp
، يمكن من خلال الاختيار --download_retries
تحديد عدد مرات محاولة إعادة التحميل في حال فشل تحميل إحدى المواد. القيمة الافتراضية هي 3
+ إضافة الخيارات المخصصة لمكتبة yt-dlp
: يمكنك تمرير الاختيار --yt_dlp_options
لإضافة الخيارات المخصصة لمكتبة yt-dlp
في صيغة JSON. على سبيل المثال، لتحميل القسم الأول من قائمة تشغيل YouTube، يمكنك تمرير --yt_dlp_options '{"playlist_items": "1-10"}'
@@ -157,7 +158,7 @@
```
➜ tafrigh --help
-usage: tafrigh [-h] [--version] [--skip_if_output_exist | --no-skip_if_output_exist] [--download_retries DOWNLOAD_RETRIES] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
+usage: tafrigh [-h] [--version] [--skip_if_output_exist | --no-skip_if_output_exist] [--download_retries DOWNLOAD_RETRIES] [--yt_dlp_options YT_DLP_OPTIONS] [--verbose | --no-verbose] [-m MODEL_NAME_OR_PATH] [-t {transcribe,translate}]
[-l {af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,zh}]
[--use_faster_whisper | --no-use_faster_whisper] [--beam_size BEAM_SIZE] [--ct2_compute_type {default,int8,int8_float16,int16,float16}] [-w WIT_CLIENT_ACCESS_TOKENS [WIT_CLIENT_ACCESS_TOKENS ...]] [--max_cutting_duration [1-17]]
[--min_words_per_segment MIN_WORDS_PER_SEGMENT] [--save_files_before_compact | --no-save_files_before_compact] [--save_yt_dlp_responses | --no-save_yt_dlp_responses] [--output_sample OUTPUT_SAMPLE]
@@ -174,6 +175,8 @@ Input:
Whether to skip generating the output if the output file already exists.
--download_retries DOWNLOAD_RETRIES
Number of retries for yt-dlp downloads that fail.
+ --yt_dlp_options YT_DLP_OPTIONS
+ Additional options to pass to yt-dlp in valid JSON format (e.g. `'{"playlist_items": "1-10"}'`).
--verbose, --no-verbose
Whether to print out the progress and debug messages.
diff --git a/pyproject.toml b/pyproject.toml
index 1dcb496..5110857 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tafrigh"
-version = "1.6.2"
+version = "1.7.0"
description = "تفريغ النصوص وإنشاء ملفات SRT و VTT باستخدام نماذج Whisper وتقنية wit.ai."
authors = ["EasyBooks "]
license = "MIT"
diff --git a/tafrigh/cli.py b/tafrigh/cli.py
index fb2668c..9ca3517 100644
--- a/tafrigh/cli.py
+++ b/tafrigh/cli.py
@@ -43,6 +43,7 @@ def main():
urls_or_paths=args.urls_or_paths,
skip_if_output_exist=args.skip_if_output_exist,
download_retries=args.download_retries,
+ yt_dlp_options=args.yt_dlp_options,
verbose=args.verbose,
),
whisper=Config.Whisper(
@@ -192,7 +193,7 @@ def process_url(
config: Config,
progress_info: dict,
) -> Generator[tuple[dict[str, Any], list[SegmentType]], None, None]:
- url_data = Downloader(output_dir=config.output.output_dir).download(
+ url_data = Downloader(yt_dlp_options=config.input.yt_dlp_options, output_dir=config.output.output_dir).download(
url,
retries=config.input.download_retries,
save_response=config.output.save_yt_dlp_responses,
diff --git a/tafrigh/config.py b/tafrigh/config.py
index 42cc9c4..6b4417d 100644
--- a/tafrigh/config.py
+++ b/tafrigh/config.py
@@ -21,11 +21,13 @@ def __init__(
urls_or_paths: list[str],
skip_if_output_exist: bool,
download_retries: int,
+ yt_dlp_options: str,
verbose: bool,
):
self.urls_or_paths = urls_or_paths
self.skip_if_output_exist = skip_if_output_exist
self.download_retries = download_retries
+ self.yt_dlp_options = yt_dlp_options
self.verbose = verbose
class Whisper:
diff --git a/tafrigh/downloader.py b/tafrigh/downloader.py
index 674ef55..3b7f12f 100644
--- a/tafrigh/downloader.py
+++ b/tafrigh/downloader.py
@@ -7,7 +7,8 @@
class Downloader:
- def __init__(self, output_dir: str):
+ def __init__(self, yt_dlp_options: str, output_dir: str):
+ self.yt_dlp_options = yt_dlp_options
self.output_dir = output_dir
self._initialize_youtube_dl_with_archive()
@@ -54,6 +55,7 @@ def _config(self, **kwargs: Any) -> dict[str, Any]:
}
config.update(kwargs)
+ config.update(json.loads(self.yt_dlp_options))
return config
diff --git a/tafrigh/utils/cli_utils.py b/tafrigh/utils/cli_utils.py
index d4bd996..0b1b80d 100644
--- a/tafrigh/utils/cli_utils.py
+++ b/tafrigh/utils/cli_utils.py
@@ -36,6 +36,13 @@ def parse_args(argv: list[str]) -> argparse.Namespace:
help="Number of retries for yt-dlp downloads that fail.",
)
+ input_group.add_argument(
+ '--yt_dlp_options',
+ type=str,
+ default='{}',
+ help="Additional options to pass to yt-dlp in valid JSON format (e.g. `'{\"playlist_items\": \"1-10\"}'`).",
+ )
+
input_group.add_argument(
'--verbose',
action=argparse.BooleanOptionalAction,