-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathyt_utils.py
42 lines (30 loc) · 998 Bytes
/
yt_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
'''
Transcription utilities that generate textual summaries of Youtube videos, given their URL(s)
- yt_get uses pytube to download the video URLs into a local file
- yt_transcribe used the Whisper ASR model to convert the audio into text
'''
import whisper
import datetime
import subprocess
from pathlib import Path
import pandas as pd
import re
import time
import os
import numpy as np
from pytube import YouTube
import torch
import time
def load_model():
return whisper.load_model("base")
model = load_model()
def yt_get(yt_url):
yt = YouTube("https://youtube.com"+ yt_url,use_oauth=True, allow_oauth_cache=True)
print(f"youtube to be downloadd - {yt}")
vpath = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
print(f"Downloaded video {vpath}")
return vpath
def yt_transcribe(video_url):
print(f"transcribing {video_url}")
result = model.transcribe(video_url)
return (result['text'])