AI4Bharat · kartikvirendrar · Jan 7, 2025 · Jan 3, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/backend/translation/utils.py b/backend/translation/utils.py
@@ -20,7 +20,13 @@
 from yt_dlp import YoutubeDL
 import pandas as pd
 from glossary.tmx.tmxservice import TMXService
-
+from PIL import Image
+from io import BytesIO
+from docx.shared import Cm
+from docx import Document
+from django.http import StreamingHttpResponse
+import math
+import subprocess
 
 def convert_to_scc(subtitles):
     scc_lines = ["Scenarist_SCC V1.0"]
@@ -146,26 +152,130 @@ def convert_to_paragraph(lines, video_name):
     return content
 
 
+def get_format_code(video_url, resolutions=["480p", "360p", "144p", "720p", "1080p"]):
+    """
+    Get the format code for the first available resolution from the video URL using yt-dlp.
+    """
+    # Run yt-dlp with the -F option to list all formats
+    result = subprocess.run(["yt-dlp", "-F", video_url], capture_output=True, text=True)
+    lines = result.stdout.splitlines()
+
+    # Search through the available resolutions in order of preference
+    for resolution in resolutions:
+        for line in lines:
+            if resolution in line:
+                # The format code is typically the first value in the line
+                format_code = line.split()[0]
+                print(f"Resolution {resolution} found with format code {format_code}.")
+                return format_code
+    print("No preferred resolution found.")
+    return None
+
+
+def extract_frames(video_url, timestamps, output_prefix="frame"):
+    """
+    Extract frames from a video at specific timestamps using ffmpeg.
+    """
+    # Get the format code
+    format_code = get_format_code(video_url)
+    if format_code is None:
+        print("No valid resolution found for the video.")
+        return
+
+    # Fetch the direct URL of the video
+    result = subprocess.run(
+        ["yt-dlp", "-f", format_code, "--get-url", video_url],
+        capture_output=True, text=True
+    )
+    direct_url = result.stdout.strip()
+    if not direct_url:
+        print("Failed to fetch the direct URL of the video.")
+        return
+
+    # Extract frames for each timestamp
+    for i, ts in enumerate(timestamps):
+        output_filename = f"{output_prefix}_{i+1}.jpg"
+        cmd = [
+            "ffmpeg",
+            "-ss", str(ts),  # Seek to the exact timestamp
+            "-i", direct_url,  # Input video
+            "-frames:v", "1",  # Extract one frame
+            "-q:v", "2",  # Quality of the output frame
+            output_filename
+        ]
+        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print(f"Frame at {ts}s saved as {output_filename}")
+
+
 def convert_to_paragraph_monolingual(payload, video_name):
-    lines = []
+    """
+    Convert payload into paragraphs with extracted video frames embedded.
+    """
+    def valid_xml_char_ordinal(c):
+        codepoint = ord(c)
+        return (0x20 <= codepoint <= 0xD7FF) or (0xE000 <= codepoint <= 0xFFFD) or (0x10000 <= codepoint <= 0x10FFFF)
+
     content = ""
-    translated_content = video_name + "\n" + "\n"
+    translated_content = f"{video_name}\n\n"
     sentences_count = 0
     number_of_paragraphs = math.ceil(len(payload) / 5)
     count_paragraphs = 0
+    document = Document()
+
+    video_url = payload[0].get("video_url", "")
+    timestamps = [segment.get("start_time", 0) for segment in payload if "start_time" in segment]
+
+    if not video_url:
+        return "Error: Video URL is missing."
+
+    extract_frames(video_url, timestamps)
+
     for index, segment in enumerate(payload):
-        if "text" in segment.keys():
-            lines.append(segment["target_text"])
-            translated_content = translated_content + " " + segment["target_text"]
+        if "target_text" in segment.keys():
+            text = segment["target_text"]
+            translated_content += " " + text
             sentences_count += 1
+
+            document.add_paragraph(text)
+
+            frame_file = f"frame_{index+1}.jpg"
+            if os.path.exists(frame_file):
+                with Image.open(frame_file) as img:
+                    image_stream = BytesIO()
+                    img.save(image_stream, format="JPEG")
+                    image_stream.seek(0)
+                    document.add_picture(image_stream, width=Cm(11), height=Cm(6.18))
+
             if sentences_count % 5 == 0:
                 count_paragraphs += 1
-                content = content + translated_content + "\n" + "\n"
+                content += translated_content + "\n\n"
                 translated_content = ""
 
     if count_paragraphs < number_of_paragraphs:
-        content = content + translated_content + "\n" + "\n"
-    return content
+        content += translated_content + "\n\n"
+
+    content = "".join(c for c in content if valid_xml_char_ordinal(c))
+    document.add_paragraph(content)
+
+    buffer = BytesIO()
+    document.save(buffer)
+    length = buffer.tell()
+    buffer.seek(0)
+
+    response = StreamingHttpResponse(
+        streaming_content=buffer,
+        content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    )
+    response["Content-Disposition"] = "attachment; filename=new_file_download.docx"
+    response["Content-Encoding"] = "UTF-8"
+    response["Content-Length"] = length
+
+    for index in range(len(timestamps)):
+        frame_file = f"frame_{index+1}.jpg"
+        if os.path.exists(frame_file):
+            os.remove(frame_file)
+
+    return response
 
 
 def convert_to_paragraph_bilingual(payload, video_name):

diff --git a/backend/translation/views.py b/backend/translation/views.py
@@ -146,7 +146,7 @@ def export_translation(request):
             {"message": "Task not found."},
             status=status.HTTP_404_NOT_FOUND,
         )
-
+    video_url = task.video.url
     translation = get_translation_id(task)
     if translation is None:
         return Response(
@@ -180,6 +180,8 @@ def export_translation(request):
         translation.save()
 
     payload = translation.payload["payload"]
+    for segment in payload:
+        segment["video_url"] = video_url  # Add video_url to each segment in the payload
     if with_speaker_info:
         speaker_info = translation.payload.get("speaker_info", None)
         if speaker_info == None:
@@ -253,11 +255,13 @@ def export_translation(request):
     elif export_type == "docx":
         filename = "translation.docx"
         content = convert_to_paragraph_monolingual(payload, task.video.name)
-        return convert_to_docx(content)
+        # return convert_to_docx(content)
+        return content
     elif export_type == "docx-bilingual":
         filename = "translation.docx"
         content = convert_to_paragraph_bilingual(payload, task.video.name)
-        return convert_to_docx(content)
+        # return convert_to_docx(content)
+        return content
 
     elif export_type == "sbv":
         for index, segment in enumerate(payload):