Skip to content

Commit

Permalink
Merge pull request #10 from mobiusml/youtube_video_input
Browse files Browse the repository at this point in the history
YouTube Video as an Input
  • Loading branch information
movchan74 authored Nov 15, 2023
2 parents 5d023ca + 8ec30f0 commit 3fe9184
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg
7 changes: 5 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ jobs:
- name: Update PATH
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install dependencies
run: poetry install
run: |
poetry install
sudo apt-get update
sudo apt-get install ffmpeg
- name: Test with pytest
run: poetry run pytest
run: poetry run pytest
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ WORKDIR /app
ENV DEBIAN_FRONTEND=non-interactive

# Install required libraries, tools, and Python3
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 curl git python3.10 python3.10-dev python3-pip python3.10-venv
RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg curl git python3.10 python3.10-dev python3-pip python3.10-venv

# Install poetry
RUN curl -sSL https://install.python-poetry.org | python3 -
Expand Down
2 changes: 1 addition & 1 deletion aana/configs/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
name="blip2_video_generate",
path="/video/generate_captions",
summary="Generate captions for videos using BLIP2 OPT-2.7B",
outputs=["video_captions_hf_blip2_opt_2_7b"],
outputs=["video_captions_hf_blip2_opt_2_7b", "timestamps"],
),
],
"video": [
Expand Down
33 changes: 30 additions & 3 deletions aana/configs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
# videos: list[Video]
# params: VideoParams
# class Video:
# video: VideoInput
# video_input: VideoInput
# video: VideoObject
# frames: Frame
# timestamps: Timestamps
# duration: float
Expand Down Expand Up @@ -198,11 +199,33 @@
{
"name": "videos",
"key": "videos",
"path": "video_batch.videos.[*].video",
"path": "video_batch.videos.[*].video_input",
"data_model": VideoInputList,
}
],
},
{
"name": "download_video",
"type": "ray_task",
"function": "aana.utils.video.download_video",
"batched": True,
"flatten_by": "video_batch.videos.[*]",
"dict_output": False,
"inputs": [
{
"name": "videos",
"key": "video_input",
"path": "video_batch.videos.[*].video_input",
},
],
"outputs": [
{
"name": "video_objects",
"key": "output",
"path": "video_batch.videos.[*].video",
},
],
},
{
"name": "video_params",
"type": "input",
Expand All @@ -223,7 +246,11 @@
"batched": True,
"flatten_by": "video_batch.videos.[*]",
"inputs": [
{"name": "videos", "key": "video", "path": "video_batch.videos.[*].video"},
{
"name": "video_objects",
"key": "video",
"path": "video_batch.videos.[*].video",
},
{"name": "video_params", "key": "params", "path": "video_batch.params"},
],
"outputs": [
Expand Down
2 changes: 2 additions & 0 deletions aana/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class Settings(BaseSettings):
"""

tmp_data_dir: Path = Path("/tmp/aana_data")
youtube_video_dir = tmp_data_dir / "youtube_videos"
image_dir = tmp_data_dir / "images"


settings = Settings()
6 changes: 3 additions & 3 deletions aana/models/core/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ def save(self):
if self.path:
return

file_dir = settings.tmp_data_dir / "images"
file_dir.mkdir(parents=True, exist_ok=True)
file_path = file_dir / (self.media_id + ".bmp")
image_dir = settings.image_dir
image_dir.mkdir(parents=True, exist_ok=True)
file_path = image_dir / (self.media_id + ".bmp")

if self.content:
self.save_from_content(file_path)
Expand Down
6 changes: 3 additions & 3 deletions aana/models/core/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def save(self):
if self.path:
return

file_dir = settings.tmp_data_dir / "medias"
file_dir.mkdir(parents=True, exist_ok=True)
file_path = file_dir / (self.media_id + ".mp4")
image_dir = settings.image_dir
image_dir.mkdir(parents=True, exist_ok=True)
file_path = image_dir / (self.media_id + ".mp4")

if self.content:
self.save_from_content(file_path)
Expand Down
38 changes: 38 additions & 0 deletions aana/models/core/video_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from enum import Enum
import re


class VideoSource(str, Enum):
"""
Video sources.
Possible values are "auto" and "youtube".
Attributes:
AUTO (str): auto
YOUTUBE (str): youtube
"""

AUTO = "auto"
YOUTUBE = "youtube"

@classmethod
def from_url(cls, url: str) -> "VideoSource":
"""
Get the video source from a URL.
Args:
url (str): the URL
Returns:
VideoSource: the video source
"""

# TODO: Check that the URL is valid

youtube_pattern = r"^(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/watch\?v=|youtube\.[a-zA-Z]{2,3}(\.[a-zA-Z]{2})?\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]+)$"

if re.match(youtube_pattern, url):
return cls.YOUTUBE
else:
return cls.AUTO
33 changes: 27 additions & 6 deletions aana/models/pydantic/video_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class VideoInput(BaseModel):
Attributes:
media_id (str): the ID of the video. If not provided, it will be generated automatically.
path (str): the file path of the video
url (str): the URL of the video
url (str): the URL of the video (supports YouTube videos)
content (bytes): the content of the video in bytes
"""

path: Optional[str] = Field(None, description="The file path of the video.")
url: Optional[str] = Field(None, description="The URL of the video.")
url: Optional[str] = Field(
None, description="The URL of the video (supports YouTube videos)."
)
content: Optional[bytes] = Field(
None,
description=(
Expand All @@ -38,6 +40,25 @@ class VideoInput(BaseModel):
description="The ID of the video. If not provided, it will be generated automatically.",
)

@validator("url")
def check_url(cls, url: str) -> str:
"""
Check that the URL is valid and supported.
Right now, we support normal URLs and youtube URLs.
Args:
url (str): the URL
Returns:
str: the valid URL
Raises:
ValueError: if the URL is invalid or unsupported
"""
# TODO: implement the youtube URL validation
return url

@validator("media_id")
def media_id_must_not_be_empty(cls, media_id):
"""
Expand Down Expand Up @@ -200,14 +221,14 @@ def set_files(self, files: List[bytes]):
for video, file in zip(self.__root__, files):
video.set_file(file)

def convert_input_to_object(self) -> List[Video]:
def convert_input_to_object(self) -> List[VideoInput]:
"""
Convert the list of video inputs to a list of video objects.
Convert the VideoInputList to a list of video inputs.
Returns:
List[Video]: the list of video objects corresponding to the video inputs
List[VideoInput]: the list of video inputs
"""
return [video.convert_input_to_object() for video in self.__root__]
return self.__root__

class Config:
schema_extra = {
Expand Down
56 changes: 56 additions & 0 deletions aana/tests/test_video.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from importlib import resources
from pathlib import Path
import pytest
from aana.configs.settings import settings
from aana.exceptions.general import DownloadException
from aana.models.core.video import Video
from aana.models.pydantic.video_input import VideoInput
from aana.utils.video import download_video


@pytest.fixture
Expand Down Expand Up @@ -139,3 +143,55 @@ def test_at_least_one_input():

with pytest.raises(ValueError):
Video(save_on_disk=True)


def test_download_video(mock_download_file):
"""
Test download_video.
"""
# Test VideoInput
path = resources.path("aana.tests.files.videos", "squirrel.mp4")
video_input = VideoInput(path=str(path))
video = download_video(video_input)
assert isinstance(video, Video)
assert video.path == path
assert video.content is None
assert video.url is None

try:
url = "http://example.com/squirrel.mp4"
video_input = VideoInput(url=url)
video = download_video(video_input)
assert isinstance(video, Video)
assert video.path is not None
assert video.content is None
assert video.url == url
assert video.path.exists()
finally:
video.cleanup()

# Test Youtube URL
youtube_url = "https://www.youtube.com/watch?v=yModCU1OVHY"
youtube_video_dir = settings.youtube_video_dir
expected_path = youtube_video_dir / "yModCU1OVHY.mp4"
# remove the file if it exists
expected_path.unlink(missing_ok=True)

try:
youtube_video_input = VideoInput(url=youtube_url)
video = download_video(youtube_video_input)
assert isinstance(video, Video)
assert video.path == expected_path
assert video.path is not None
assert video.path.exists()
assert video.content is None
assert video.url is None
finally:
if video and video.path:
video.path.unlink(missing_ok=True)

# Test YoutubeVideoInput with invalid youtube_url
youtube_url = "https://www.youtube.com/watch?v=invalid_url"
youtube_video_input = VideoInput(url=youtube_url)
with pytest.raises(DownloadException):
download_video(youtube_video_input)
54 changes: 54 additions & 0 deletions aana/tests/test_video_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from operator import not_
import pytest
from aana.models.core.video_source import VideoSource


def test_video_source_from_url():
"""
Test that VideoSource.from_url returns the correct VideoSource for a given URL.
"""
# Test a YouTube URL
valid_youtube_urls = [
"https://youtube.com/watch?v=yModCU1OVHY",
"http://youtube.com/watch?v=yModCU1OVHY",
"https://www.youtube.com/watch?v=yModCU1OVHY",
"http://www.youtube.com/watch?v=yModCU1OVHY",
"www.youtube.com/watch?v=yModCU1OVHY",
"youtube.com/watch?v=yModCU1OVHY",
"https://youtube.de/watch?v=yModCU1OVHY",
"http://youtube.de/watch?v=yModCU1OVHY",
"https://www.youtube.de/watch?v=yModCU1OVHY",
"http://www.youtube.de/watch?v=yModCU1OVHY",
"www.youtube.de/watch?v=yModCU1OVHY",
"youtube.de/watch?v=yModCU1OVHY",
"https://youtu.be/yModCU1OVHY",
"http://youtu.be/yModCU1OVHY",
"https://www.youtu.be/yModCU1OVHY",
"http://www.youtu.be/yModCU1OVHY",
"www.youtu.be/yModCU1OVHY",
"youtu.be/yModCU1OVHY",
"https://www.youtube.co.uk/watch?v=yModCU1OVHY",
"https://www.youtube.co.uk/watch?v=yModCU1O",
"https://www.youtube.com/watch?v=18pCXD709TI",
"https://www.youtube.com/watch?v=18pCXD7",
]

not_youtube_urls = [
"https://example.com/video.mp4",
"https://youtube/watch?v=",
"https://www.youtubecom/watch?v=",
"http://.youtube.com/watch?v=abc123",
"https://youtube.co..uk/watch?v=abc123",
"youtube/watch?v=abc123",
"https:/youtube.com/watch?v=abc123",
"http://youtube/watch?v=",
"https://youtu.be/",
"https://www.youtube.com/",
"http://www.youtu.be/watch?v=",
]

for url in valid_youtube_urls:
assert VideoSource.from_url(url) == VideoSource.YOUTUBE

for url in not_youtube_urls:
assert VideoSource.from_url(url) == VideoSource.AUTO
Loading

0 comments on commit 3fe9184

Please sign in to comment.