diff --git a/ai-services/align-api/requirements.txt b/ai-services/align-api/requirements.txt index f347b29a..26440aad 100644 --- a/ai-services/align-api/requirements.txt +++ b/ai-services/align-api/requirements.txt @@ -4,6 +4,6 @@ tensorboardX rich==12.6.0 srt==3.5.2 Cython==0.29.32 -urduhack==1.1.1 -fastapi['all'] +urduhack +fastapi indic-nlp-library \ No newline at end of file diff --git a/ai-services/align-api/src/wav2vec2/utils.py b/ai-services/align-api/src/wav2vec2/utils.py index b016bd56..38e403fb 100644 --- a/ai-services/align-api/src/wav2vec2/utils.py +++ b/ai-services/align-api/src/wav2vec2/utils.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from rich.console import Console from rich.traceback import install - +import os install() console = Console() @@ -35,8 +35,13 @@ def length(self): class Wav2vec2: def __init__(self, model_path, language_code, mode, device): - self.asr_path = glob(model_path + "/" + language_code + "/*.pt")[0] - self.dict_path = glob(model_path + "/" + language_code + "/*.txt")[0] + current_dir = os.path.dirname(os.path.abspath(__file__)) + two_levels_up = os.path.abspath(os.path.join(current_dir, "../../")) + model_loc = os.path.join(two_levels_up, os.path.join(model_path, language_code)) + + self.asr_path = glob(os.path.join( model_loc ,"*.pt"))[0] + self.dict_path = glob(os.path.join(model_loc, "*.txt"))[0] + self.device = device self.encoder = self.load_model_encoder() self.labels = self.get_labels() diff --git a/backend/backend/settings.py b/backend/backend/settings.py index 3c2d6415..3f05d610 100644 --- a/backend/backend/settings.py +++ b/backend/backend/settings.py @@ -94,7 +94,7 @@ DOMAIN = os.getenv("DOMAIN") SITE_NAME = os.getenv("DOMAIN") -PROTOCOL = "https" +DEFAULT_HTTP_PROTOCOL = 'https' DJOSER = { "PASSWORD_RESET_CONFIRM_URL": "forget-password/confirm/{uid}/{token}", @@ -127,6 +127,7 @@ CSRF_TRUSTED_ORIGINS = [ "http://localhost:*", # for localhost (Developlemt) + "https://*.ai4bharat.org", ] CUSTOM_CSRF_TRUSTED_ORIGINS = os.getenv("CORS_TRUSTED_ORIGINS", "") if CUSTOM_CSRF_TRUSTED_ORIGINS: diff --git a/backend/backend/urls.py b/backend/backend/urls.py index 107481ed..7db6798b 100644 --- a/backend/backend/urls.py +++ b/backend/backend/urls.py @@ -74,7 +74,7 @@ def get_schema(self, request=None, public=False): path("voiceover/", include("voiceover.urls")), path("youtube/", include("youtube.urls")), path( - "api/generic/transliteration///", + "xlit-api/generic/transliteration//", TransliterationAPIView.as_view(), name="transliteration-api", ), diff --git a/backend/config.py b/backend/config.py index fda9127f..28567d08 100644 --- a/backend/config.py +++ b/backend/config.py @@ -6,6 +6,7 @@ english_asr_url = os.getenv("ENGLISH_ASR_API_URL") indic_asr_url = os.getenv("INDIC_ASR_API_URL") service_id_hindi = os.getenv("SERVICE_ID_HINDI") +service_id_nepali = os.getenv("SERVICE_ID_NEPALI") service_id_indo_aryan = os.getenv("SERVICE_ID_INDO_ARYAN") service_id_dravidian = os.getenv("SERVICE_ID_DRAVIDIAN") misc_tts_url = os.getenv("MISC_TTS_API_URL") diff --git a/backend/organization/views.py b/backend/organization/views.py index 0cab44a5..64ccb5d6 100644 --- a/backend/organization/views.py +++ b/backend/organization/views.py @@ -496,6 +496,9 @@ def list_org_tasks(self, request, pk=None, *args, **kwargs): task["updated_at"] ).replace(tzinfo=None): buttons["Reopen"] = False + if "TRANSLATION_VOICEOVER" in task["task_type"]: + if task["status"] in ["SELECTED_SOURCE", "FAILED"] and task["is_active"] is False: + buttons["Regenerate"] = True if task["status"] == "POST_PROCESS": buttons["Update"] = True if task["status"] == "FAILED": diff --git a/backend/project/admin.py b/backend/project/admin.py index 7c28b90b..f52d3639 100644 --- a/backend/project/admin.py +++ b/backend/project/admin.py @@ -9,6 +9,7 @@ class ProjectAdmin(admin.ModelAdmin): list_display = ( "id", + "title", "organization_id", "default_task_types", "default_target_languages", diff --git a/backend/project/views.py b/backend/project/views.py index 91dddee0..1c55a501 100644 --- a/backend/project/views.py +++ b/backend/project/views.py @@ -861,6 +861,9 @@ def list_project_tasks(self, request, pk=None, *args, **kwargs): data["updated_at"] ).replace(tzinfo=None): buttons["Reopen"] = False + if "TRANSLATION_VOICEOVER" in data["task_type"]: + if data["status"] in ["SELECTED_SOURCE", "FAILED"] and data["is_active"] is False: + buttons["Regenerate"] = True if data["status"] == "POST_PROCESS": buttons["Update"] = True if data["status"] == "FAILED": diff --git a/backend/task/tasks.py b/backend/task/tasks.py index 8004939b..4144272c 100644 --- a/backend/task/tasks.py +++ b/backend/task/tasks.py @@ -296,6 +296,8 @@ def celery_nmt_tts_call(task_id): task_obj.status = "FAILED" task_obj.is_active = False task_obj.save() + logging.info("Generating translation payload failed for %s", str(task_id)) + return else: if ( type(translation_obj.payload) == dict @@ -308,69 +310,66 @@ def celery_nmt_tts_call(task_id): task_obj.status = "SELECTED_SOURCE" # task_obj.is_active = True task_obj.save() - tts_payload = process_translation_payload( - translation_obj, task_obj.target_language - ) - if type(tts_payload) == dict and "message" in tts_payload.keys(): - message = tts_payload["message"] - logging.info("Error from TTS API") - voice_over_task.status = "FAILED" - voice_over_task.save() - # set_fail_for_translation_task(task) - return message - ( - tts_input, - target_language, - translation, - translation_id, - empty_sentences, - ) = tts_payload + tts_payload = process_translation_payload( + translation_obj, task_obj.target_language + ) + if type(tts_payload) == dict and "message" in tts_payload.keys(): + message = tts_payload["message"] + logging.info("Error from TTS API") + voice_over_task.status = "FAILED" + voice_over_task.save() + # set_fail_for_translation_task(task) + return message - generate_audio = task_obj.video.project_id.pre_generate_audio - tts_payload = generate_tts_output( - tts_input, - target_language, - translation, - translation_obj, - empty_sentences, - generate_audio, - ) - payloads = tts_payload + ( + tts_input, + target_language, + translation, + translation_id, + empty_sentences, + ) = tts_payload - existing_voiceover = VoiceOver.objects.filter(task=task_obj).first() + generate_audio = task_obj.video.project_id.pre_generate_audio + tts_payload = generate_tts_output( + tts_input, + target_language, + translation, + translation_obj, + empty_sentences, + generate_audio, + ) + payloads = tts_payload - print("Fetched voiceover", existing_voiceover) + existing_voiceover = VoiceOver.objects.filter(task=task_obj).first() - if existing_voiceover == None: - voiceover_obj = VoiceOver( - video=task_obj.video, - user=task_obj.user, - translation=translation_obj, - payload=tts_payload, - target_language=task_obj.target_language, - task=task_obj, - voice_over_type="MACHINE_GENERATED", - status="VOICEOVER_SELECT_SOURCE", - ) - voiceover_obj.save() - else: - existing_voiceover.payload = tts_payload - existing_voiceover.translation = translation_obj - existing_voiceover.save() - task_obj.is_active = True - task_obj.status = "SELECTED_SOURCE" - task_obj.save() - logging.info("Payload generated for TTS API for %s", str(task_id)) - if "message" in tts_payload: - task_obj.is_active = False - task_obj.status = "FAILED" - task_obj.save() - try: - send_mail_to_user(task_obj) - except: - logging.info("Error in sending mail") + print("Fetched voiceover", existing_voiceover) - # send_mail_to_user(task_obj) + if existing_voiceover == None: + voiceover_obj = VoiceOver( + video=task_obj.video, + user=task_obj.user, + translation=translation_obj, + payload=tts_payload, + target_language=task_obj.target_language, + task=task_obj, + voice_over_type="MACHINE_GENERATED", + status="VOICEOVER_SELECT_SOURCE", + ) + voiceover_obj.save() else: - logging.info("Translation already exists") + existing_voiceover.payload = tts_payload + existing_voiceover.translation = translation_obj + existing_voiceover.save() + task_obj.is_active = True + task_obj.status = "SELECTED_SOURCE" + task_obj.save() + logging.info("Payload generated for TTS API for %s", str(task_id)) + if "message" in tts_payload: + task_obj.is_active = False + task_obj.status = "FAILED" + task_obj.save() + try: + send_mail_to_user(task_obj) + except: + logging.info("Error in sending mail") \ No newline at end of file diff --git a/backend/task/views.py b/backend/task/views.py index f2ca340b..b98fc0e1 100644 --- a/backend/task/views.py +++ b/backend/task/views.py @@ -87,7 +87,7 @@ from rest_framework.decorators import parser_classes from rest_framework.parsers import MultiPartParser, FormParser import regex - +from translation.views import regenerate_translation_voiceover def get_export_translation(request, task_id, export_type): new_request = HttpRequest() @@ -3193,6 +3193,15 @@ def inspect_queue(self, request): ) elif elem["name"] == "task.tasks.celery_nmt_call": task_obj["task_id"] = eval(elem["kwargs"])["task_id"] + elif elem["name"] == "task.tasks.celery_nmt_tts_call": + try: + task_obj["task_id"] = eval(elem["kwargs"])["task_id"] + except: + task_obj["task_id"] = eval(elem["args"].split(",")[0].split("(")[1]) + elif elem["name"] == "voiceover.tasks.celery_integration": + task_obj["task_id"] = eval(elem["args"].split(",")[2]) + elif elem["name"] == "voiceover.tasks.export_voiceover_async": + task_obj["task_id"] = eval(elem["args"].split(",")[0].split("(")[1]) else: task_obj["task_id"] = "" @@ -3229,19 +3238,17 @@ def inspect_queue(self, request): status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) else: - if queue == "nmt": + if queue == "nmt" or queue == "nmt_tts": queue_type = "celery@nmt_worker" - elif queue == "tts": - queue_type = "celery@asr_tts_worker" else: queue_type = "celery@asr_tts_worker" try: task_list = [] + status_list = [] url = f"{flower_url}/api/tasks" params = { - "state": "STARTED", - "sort_by": "received", + "sort_by": "-received", "workername": queue_type, } if flower_username and flower_password: @@ -3255,42 +3262,27 @@ def inspect_queue(self, request): for elem in task_data: if queue == "asr" and elem["name"] == "task.tasks.celery_asr_call": task_list.append(eval(elem["kwargs"])["task_id"]) + status_list.append(elem["state"]) elif ( queue == "tts" and elem["name"] == "task.tasks.celery_tts_call" ): # task_list.append(eval(elem["kwargs"])["task_id"]) task_list.append(eval(elem["args"].split(",")[0].split("(")[1])) + status_list.append(elem["state"]) elif ( queue == "nmt" and elem["name"] == "task.tasks.celery_nmt_call" ): task_list.append(eval(elem["kwargs"])["task_id"]) - else: - pass - params = { - "state": "RECEIVED", - "sort_by": "received", - "workername": queue_type, - } - if flower_username and flower_password: - res = requests.get( - url, params=params, auth=(flower_username, flower_password) - ) - else: - res = requests.get(url, params=params) - data = res.json() - task_data = list(data.values()) - for elem in task_data: - if queue == "asr" and elem["name"] == "task.tasks.celery_asr_call": - task_list.append(eval(elem["kwargs"])["task_id"]) - elif ( - queue == "tts" and elem["name"] == "task.tasks.celery_tts_call" - ): - # task_list.append(eval(elem["kwargs"])["task_id"]) - task_list.append(eval(elem["args"].split(",")[0].split("(")[1])) + status_list.append(elem["state"]) elif ( - queue == "nmt" and elem["name"] == "task.tasks.celery_nmt_call" + queue == "nmt_tts" and elem["name"] == "task.tasks.celery_nmt_tts_call" ): - task_list.append(eval(elem["kwargs"])["task_id"]) + try: + task_list.append(eval(elem["kwargs"])["task_id"]) + status_list.append(elem["state"]) + except: + task_list.append(eval(elem["args"].split(",")[0].split("(")[1])) + status_list.append(elem["state"]) else: pass if task_list: @@ -3314,8 +3306,12 @@ def inspect_queue(self, request): "video_duration": str(elem["video__duration"]), } i = task_list.index(elem["id"]) + task_dict["status"] = status_list[i] task_list[i] = task_dict - + for i in task_list: + if type(i) == int: + j = task_list.index(i) + task_list[j] = {"task_id": i, "status": "Not Found"} return Response( {"message": "successful", "data": task_list}, status=status.HTTP_200_OK, @@ -3392,6 +3388,12 @@ def regenerate_response(self, request, pk, *args, **kwargs): elif task.task_type == "VOICEOVER_EDIT": celery_tts_call.delay(task_id=task.id) api = "TTS" + elif task.task_type == "TRANSLATION_VOICEOVER_EDIT": + if regenerate_translation_voiceover(task.id) is False: + return Response( + {"message": "Transcription task is not complete yet"}, status=status.HTTP_400_BAD_REQUEST + ) + api = "NMT-TTS" else: return Response( {"message": "Invalid task"}, status=status.HTTP_400_BAD_REQUEST diff --git a/backend/transcript/admin.py b/backend/transcript/admin.py index d35c6cb5..ee2282fd 100644 --- a/backend/transcript/admin.py +++ b/backend/transcript/admin.py @@ -5,7 +5,7 @@ # Show particular fields in the admin panel class TranscriptAdmin(admin.ModelAdmin): - list_display = ("task", "video", "language", "transcript_type", "updated_at", "id") + list_display = ("task", "video", "language", "transcript_type", "updated_at", "id", "status") list_filter = ("video", "language", "transcript_type") search_fields = ("video", "language", "transcript_type") ordering = ("-updated_at",) diff --git a/backend/transcript/metadata.py b/backend/transcript/metadata.py index 0c308f5d..b424458d 100644 --- a/backend/transcript/metadata.py +++ b/backend/transcript/metadata.py @@ -13,6 +13,7 @@ ("te", "Telugu"), ("sa", "Sanskrit"), ("ur", "Urdu"), + ("ne", "Nepali"), ] TRANSCRIPTION_SUPPORTED_LANGUAGES = { @@ -29,4 +30,5 @@ "Tamil": "ta", "Telugu": "te", "Urdu": "ur", + "Nepali": "ne", } diff --git a/backend/transcript/urls.py b/backend/transcript/urls.py index 88b25585..28f4c1c4 100644 --- a/backend/transcript/urls.py +++ b/backend/transcript/urls.py @@ -12,6 +12,9 @@ name="generate_original_transcript", ), path("save/", views.save_transcription, name="save_transcript"), + path("reopen_completed_transcription_task/", views.reopen_completed_transcription_task, name="reopen_completed_transcription_task"), + path("get_transcription_status/", views.fetch_transcript_status, name="get_transcription_status"), + path("set_transcription_status/", views.update_transcript_status, name="set_transcription_status"), path( "save_full_transcript/", views.save_full_transcription, diff --git a/backend/transcript/utils/asr.py b/backend/transcript/utils/asr.py index 2bb77d85..aa92bc95 100644 --- a/backend/transcript/utils/asr.py +++ b/backend/transcript/utils/asr.py @@ -8,6 +8,7 @@ indic_asr_url, dhruva_key, service_id_hindi, + service_id_nepali, service_id_indo_aryan, service_id_dravidian, ) @@ -51,6 +52,8 @@ def make_asr_api_call(url, lang, vad_level=3, chunk_size=10): service_id = service_id_indo_aryan elif lang in ["kn", "ml", "ta", "te"]: service_id = service_id_dravidian + elif lang in ["ne"]: + service_id = service_id_nepali else: return None diff --git a/backend/transcript/views.py b/backend/transcript/views.py index d40a3963..d1a3c489 100644 --- a/backend/transcript/views.py +++ b/backend/transcript/views.py @@ -79,7 +79,7 @@ from .utils.timestamp import * import openai from utils.llm_api import get_model_output - +from voiceover.models import VoiceOver @api_view(["GET"]) def get_transcript_export_types(request): @@ -163,10 +163,35 @@ def export_transcript(request): transcript = get_transcript_id(task) if transcript is None: - return Response( - {"message": "Transcript not found."}, - status=status.HTTP_404_NOT_FOUND, - ) + try: + if task.task_type == "TRANSLATION_VOICEOVER_EDIT" and task.status != "COMPLETE": + voice_over_obj = VoiceOver.objects.filter(task=task).first() + transcript = voice_over_obj.translation.transcript + updated_payload = [] + index = 0 + for segment in voice_over_obj.payload["payload"].values(): + start_time = datetime.datetime.strptime( + segment["start_time"], "%H:%M:%S.%f" + ) + end_time = datetime.datetime.strptime(segment["end_time"], "%H:%M:%S.%f") + unix_start_time = datetime.datetime.timestamp(start_time) + unix_end_time = datetime.datetime.timestamp(end_time) + + updated_segment = { + "start_time": segment["start_time"], + "end_time": segment["end_time"], + "text": segment["transcription_text"], + "speaker_id": "", + "unix_start_time": unix_start_time, + "unix_end_time": unix_end_time, + } + updated_payload.append(updated_segment) + transcript.payload["payload"] = updated_payload + except: + return Response( + {"message": "Transcript not found."}, + status=status.HTTP_404_NOT_FOUND, + ) if with_speaker_info: speaker_info = transcript.video.multiple_speaker @@ -642,6 +667,208 @@ def get_transcript_id(task): ) return transcript_id +@swagger_auto_schema( + method="get", + manual_parameters=[ + openapi.Parameter( + "task_id", + openapi.IN_QUERY, + description=("An integer to pass the task id"), + type=openapi.TYPE_INTEGER, + required=True, + ), + ], + responses={ + 200: "Status has been changed successfully", + 400: "Bad request", + 404: "No transcript found for given task", + }, +) +@api_view(["GET"]) +def reopen_completed_transcription_task(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + task_id = request.query_params.get("task_id") + except KeyError: + return Response( + { + "message": "Missing required parameter - task_id" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + + transcript = get_transcript_id(task) + if transcript is not None: + transcript_id = transcript.id + try: + transcript = Transcript.objects.get(pk=transcript_id) + transcript.delete() + task.status = "INPROGRESS" + task.save() + return Response( + { + "message": "Status has been changed successfully" + }, + status=status.HTTP_200_OK, + ) + except: + return Response( + {"message": "Transcript doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + +@swagger_auto_schema( + method="get", + manual_parameters=[ + openapi.Parameter( + "task_id", + openapi.IN_QUERY, + description=("An integer to pass the task id"), + type=openapi.TYPE_INTEGER, + required=True, + ), + ], + responses={ + 200: "Status has been fetched successfully", + 400: "Bad request", + 404: "No transcript found for given task", + }, +) +@api_view(["GET"]) +def fetch_transcript_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + task_id = request.query_params.get("task_id") + except KeyError: + return Response( + { + "message": "Missing required parameter - task_id" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + transcript = get_transcript_id(task) + if transcript is not None: + transcript_id = transcript.id + try: + transcript = Transcript.objects.get(pk=transcript_id) + return Response( + { + "message": "Status has been fetched successfully", + "task_id": task.id, + "transcript_id": transcript_id, + "status": transcript.status, + }, + status=status.HTTP_200_OK, + ) + except: + return Response( + {"message": "Transcript doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) + +@swagger_auto_schema( + method="post", + request_body=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["task_id", "trs_status"], + properties={ + "task_id": openapi.Schema( + type=openapi.TYPE_INTEGER, + description="An integer identifying the transcript instance", + ), + "trs_status": openapi.Schema( + type=openapi.TYPE_STRING, + description="Transcript task status to be set", + ) + }, + description="Post request body", + ), + responses={ + 200: "Status has been updated successfully", + 400: "Bad request", + 404: "No transcript found for given task", + }, +) +@api_view(["POST"]) +def update_transcript_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + # Get the required data from the POST body + task_id = request.data["task_id"] + trs_status = request.data["trs_status"] + except KeyError: + return Response( + { + "message": "Missing required parameters - task_id or trs_status" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + transcript = get_transcript_id(task) + if transcript is not None: + transcript_id = transcript.id + try: + transcript = Transcript.objects.get(pk=transcript_id) + if trs_status in ["TRANSCRIPTION_SELECT_SOURCE", "TRANSCRIPTION_EDITOR_ASSIGNED", "TRANSCRIPTION_EDIT_INPROGRESS", "TRANSCRIPTION_EDIT_COMPLETE", "TRANSCRIPTION_REVIEWER_ASSIGNED", "TRANSCRIPTION_REVIEW_INPROGRESS", "TRANSCRIPTION_REVIEW_COMPLETE"]: + transcript.status = trs_status + transcript.save() + return Response( + { + "message": "Status has been updated successfully", + }, + status=status.HTTP_200_OK, + ) + else: + return Response( + {"message": "Invalid Status"}, + status=status.HTTP_400_BAD_REQUEST, + ) + except: + return Response( + {"message": "Transcript doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) @swagger_auto_schema( method="get", @@ -1162,8 +1389,6 @@ def change_active_status_of_next_tasks(task, transcript_obj): translation.save() if source_type == None or source_type == "MACHINE_GENERATED": source_type = "MACHINE_GENERATED" - translation.transcript = transcript_obj - translation.save() celery_nmt_tts_call.delay(task_id=translation.task.id) else: payloads = generate_translation_payload( @@ -1210,7 +1435,10 @@ def change_active_status_of_next_tasks(task, transcript_obj): # Helper function to call the paraphrasing API def paraphrase_text(text): # Set API configuration - text = get_model_output(user_prompt=text) + try: + text = get_model_output(user_prompt=text) + except: + True return text @@ -1405,9 +1633,9 @@ def modify_payload(offset, limit, payload, start_offset, end_offset, transcript) ), # Generate paraphrased text if paraphrase=true }, ) - last_valid_end_time = transcript.payload["payload"][len(payload["payload"])][ - "end_time" - ] + # last_valid_end_time = transcript.payload["payload"][len(payload["payload"])][ + # "end_time" + # ] offset_to_check = start_offset + len(payload["payload"]) last_valid_start_time = transcript.payload["payload"][offset_to_check - 1][ "start_time" @@ -1741,9 +1969,7 @@ def save_transcription(request): transcript_id = transcript.id bookmarked_segment = request.data.get("bookmark", None) user = request.user - print(bookmarked_segment) - if bookmarked_segment: - print("Saving History") + if bookmarked_segment is not None: user.user_history = { "task_id": task_id, "offset": offset, @@ -1853,6 +2079,12 @@ def save_transcription(request): if task.status == "INPROGRESS": task.status = "COMPLETE" task.save() + if task.status == "SELECTED_SOURCE": + task.status = "COMPLETE" + task.save() + if task.status == "PARAPHRASE": + task.status = "COMPLETE" + task.save() return Response( { "message": "Final Edited Transcript already submitted." @@ -1880,8 +2112,8 @@ def save_transcription(request): transcript_obj, ) for item in transcript_obj.payload["payload"]: - item['verbatim_text'] = item.pop('text') - item['text'] = item['paraphrased_text'] + item['verbatim_text'] = item['text'] + item['text'] = item['paraphrased_text'] if 'paraphrased_text' in item and item['paraphrased_text'] is not None else item['verbatim_text'] transcript_obj.save() task.status = "COMPLETE" task.save() @@ -1994,7 +2226,7 @@ def save_transcription(request): transcript_obj, ) for item in transcript_obj.payload["payload"]: - item['verbatim_text'] = item.pop('text') + item['verbatim_text'] = item['text'] item['text'] = item['paraphrased_text'] transcript_obj.save() task.status = "COMPLETE" diff --git a/backend/translation/admin.py b/backend/translation/admin.py index 19114afa..a143bf06 100644 --- a/backend/translation/admin.py +++ b/backend/translation/admin.py @@ -17,8 +17,9 @@ class TranslationAdmin(admin.ModelAdmin): "translation_type", "updated_at", "id", + "status", ) - list_filter = ("task", "transcript", "target_language", "translation_type") + list_filter = ("task", "transcript", "target_language", "translation_type", "video") search_fields = ("task", "transcript", "target_language", "translation_type") ordering = ("-updated_at",) diff --git a/backend/translation/urls.py b/backend/translation/urls.py index e50e93d9..1c32c1fd 100644 --- a/backend/translation/urls.py +++ b/backend/translation/urls.py @@ -11,6 +11,8 @@ name="retrieve_all_translations", ), path("save/", views.save_translation, name="save_translation"), + path("get_translation_status/", views.fetch_translation_status, name="get_translation_status"), + path("set_translation_status/", views.update_translation_status, name="set_translation_status"), path( "get_translation_supported_languages", views.get_translation_supported_languages, diff --git a/backend/translation/views.py b/backend/translation/views.py index b7789659..a95bbb59 100644 --- a/backend/translation/views.py +++ b/backend/translation/views.py @@ -71,7 +71,8 @@ from transcript.utils.timestamp import * from django.core.mail import EmailMultiAlternatives from django.conf import settings - +from transcript.views import get_transcript_id +from task.tasks import celery_nmt_tts_call @api_view(["GET"]) def get_translation_export_types(request): @@ -164,8 +165,6 @@ def export_translation(request): end_time = datetime.datetime.strptime(segment["end_time"], "%H:%M:%S.%f") unix_start_time = datetime.datetime.timestamp(start_time) unix_end_time = datetime.datetime.timestamp(end_time) - target_text = segment["text"] - target_text = segment["transcription_text"] updated_segment = { "start_time": segment["start_time"], @@ -560,7 +559,149 @@ def get_translation_id(task): .first() ) return translation_id + +@swagger_auto_schema( + method="get", + manual_parameters=[ + openapi.Parameter( + "task_id", + openapi.IN_QUERY, + description=("An integer to pass the task id"), + type=openapi.TYPE_INTEGER, + required=True, + ), + ], + responses={ + 200: "Status has been fetched successfully", + 400: "Bad request", + 404: "No translation found for given task", + }, +) +@api_view(["GET"]) +def fetch_translation_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + task_id = request.query_params.get("task_id") + except KeyError: + return Response( + { + "message": "Missing required parameter - task_id" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + translation = get_translation_id(task) + if translation is not None: + translation_id = translation.id + try: + translation = Translation.objects.get(pk=translation_id) + return Response( + { + "message": "Status has been fetched successfully", + "task_id": task.id, + "translation_id": translation_id, + "status": translation.status, + }, + status=status.HTTP_200_OK, + ) + except: + return Response( + {"message": "Translation doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) + +@swagger_auto_schema( + method="post", + request_body=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["task_id", "trl_status"], + properties={ + "task_id": openapi.Schema( + type=openapi.TYPE_INTEGER, + description="An integer identifying the translation instance", + ), + "trl_status": openapi.Schema( + type=openapi.TYPE_STRING, + description="Translation task status to be set", + ) + }, + description="Post request body", + ), + responses={ + 200: "Status has been updated successfully", + 400: "Bad request", + 404: "No translation found for given task", + }, +) +@api_view(["POST"]) +def update_translation_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + # Get the required data from the POST body + task_id = request.data["task_id"] + trl_status = request.data["trl_status"] + except KeyError: + return Response( + { + "message": "Missing required parameters - task_id or trl_status" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + translation = get_translation_id(task) + if translation is not None: + translation_id = translation.id + try: + translation = Translation.objects.get(pk=translation_id) + if trl_status in ["TRANSLATION_SELECT_SOURCE", "TRANSLATION_EDITOR_ASSIGNED", "TRANSLATION_EDIT_INPROGRESS", "TRANSLATION_EDIT_COMPLETE", "TRANSLATION_REVIEWER_ASSIGNED", "TRANSLATION_REVIEW_INPROGRESS", "TRANSLATION_REVIEW_COMPLETE"]: + translation.status = trl_status + translation.save() + return Response( + { + "message": "Status has been updated successfully", + }, + status=status.HTTP_200_OK, + ) + else: + return Response( + {"message": "Invalid Status"}, + status=status.HTTP_400_BAD_REQUEST, + ) + except: + return Response( + {"message": "Translation doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) @swagger_auto_schema( method="get", @@ -1533,7 +1674,7 @@ def save_translation(request): ) bookmarked_segment = request.data.get("bookmark", None) user = request.user - if bookmarked_segment: + if bookmarked_segment is not None: user.user_history = { "task_id": task_id, "offset": offset, @@ -2208,5 +2349,18 @@ def parse_date(date_str): lang_data.append(i) temp_data = {"org": org, "data": lang_data} res.append(temp_data) - - return Response(res, status=status.HTTP_200_OK) \ No newline at end of file + + return Response(res, status=status.HTTP_200_OK) + +def regenerate_translation_voiceover(task_id): + task_obj = Task.objects.get(pk=task_id) + transcription_task = Task.objects.filter(video=task_obj.video, task_type="TRANSCRIPTION_EDIT", status="COMPLETE").first() + if transcription_task is None: + return False + transcript = get_transcript_id(transcription_task) + transcript_obj = Transcript.objects.get(pk=transcript.id) + translation = Translation.objects.filter(task=task_obj).first() + translation.transcript = transcript_obj + translation.save() + celery_nmt_tts_call.delay(task_id) + return True diff --git a/backend/user_reports.py b/backend/user_reports.py index d715d966..bd7899ef 100644 --- a/backend/user_reports.py +++ b/backend/user_reports.py @@ -129,6 +129,77 @@ def get_completed_tasks(): else: html_table_df_tasks = "" +def get_active_tasks(): + users = User.objects.filter(id__in=[2248, 2252, 2253, 2254, 2255, 2256, 2257, 2259, 2263, 2264, 2266, 2268, 2273, 2278, 2281, 2282, 2283, 2286, 2289, 2291, 2293, 2296, 2299, 2300, 2320, 2322, 2326, 2328, 2329, 2336, 2337, 2338, 2339, 2340, 2343, 2344, 2345, 2351, 2353, 2360, 2361, 2365, 2374, 2376, 2379, 2390, 2395, 2402, 2405, 2459, 2461, 2471, 2472, 2480, 2485, 2486, 2487, 2550, 2559, 64]) + for member in list(users): + tasks_managed = [] + tasks = ( + Task.objects + .filter(status__in=["INPROGRESS", "SELECTED_SOURCE"]) + .filter(is_active=True) + .filter(user=member) + ) + for task in tasks: + if task.get_task_type_label.count("VoiceOver"): + type = "voiceover" + elif task.get_task_type_label.count("Translation"): + type = "translate" + else: + type = "transcript" + task_link = f"https://chitralekha.ai4bharat.org/#/task/{task.id}/{type}" + tasks_managed.append( + { + "Project Name": task.video.project_id.title, + "Project Id": task.video.project_id.id, + "Task ID": task.id, + "Task Type": task.get_task_type_label, + "Task Link": f'Open Task' + } + ) + if len(tasks_managed) > 0: + df = pd.DataFrame.from_records(tasks_managed) + blankIndex = [""] * len(df) + df.index = blankIndex + html_table_df_tasks = build_table( + df, + "orange_light", + font_size="medium", + text_align="left", + width="auto", + index=False, + escape=False, + ) + message = ( + "Hope you are doing great " + + str(member.first_name + " " + member.last_name) + + ",\n Following tasks are active now." + ) + + email_to_send = ( + "

" + + message + + "


Active Tasks Reports

" + + html_table_df_tasks + ) + logging.info("Sending Mail to %s", member.email) + + compiled_msg = send_email_template_with_attachment( + subject=f"{app_name} - Active Tasks Report", + username=[member.email], + message=email_to_send, + ) + msg = EmailMultiAlternatives( + f"{app_name} - Active Tasks Report", + compiled_msg, + settings.DEFAULT_FROM_EMAIL, + [member.email], + ) + email_content = compiled_msg + msg.attach_alternative(email_content, "text/html") + msg.send() + else: + html_table_df_tasks = "" + def get_new_tasks(): logging.info("Calculate Reports...") diff --git a/backend/users/managers.py b/backend/users/managers.py index 59031c5c..c4b3c6fc 100644 --- a/backend/users/managers.py +++ b/backend/users/managers.py @@ -9,7 +9,7 @@ def _create_user(self, email, password, **extra_fields): Create dummy users for invites. """ - email = self.normalize_email(email) + email = self.normalize_email(email.lower()) user = self.model(email=email, **extra_fields) user.set_password(password) @@ -20,7 +20,7 @@ def _create_user(self, email, password, **extra_fields): def create_user(self, email, password=None, **extra_fields): extra_fields.setdefault("is_staff", False) extra_fields.setdefault("is_superuser", False) - return self._create_user(email, password, **extra_fields) + return self._create_user(email.lower(), password, **extra_fields) def create_superuser(self, email, password, **extra_fields): extra_fields.setdefault("is_staff", True) @@ -31,4 +31,4 @@ def create_superuser(self, email, password, **extra_fields): if extra_fields.get("is_superuser") is not True: raise ValueError("Superuser must have is_superuser=True.") - return self._create_user(email, password, **extra_fields) + return self._create_user(email.lower(), password, **extra_fields) diff --git a/backend/users/tasks.py b/backend/users/tasks.py index 25953839..4632106e 100644 --- a/backend/users/tasks.py +++ b/backend/users/tasks.py @@ -12,6 +12,9 @@ def send_completed_tasks_mail(): get_completed_tasks() +@shared_task(name="send_active_tasks_mail") +def send_active_tasks_mail(): + get_active_tasks() @shared_task(name="send_new_tasks_mail") def send_new_tasks_mail(): diff --git a/backend/users/views.py b/backend/users/views.py index 19651c0a..a03e426d 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -186,6 +186,7 @@ def get( *args, **kwargs, ): + email_id = email_id.lower() interested_in = ", ".join(str(interested_in).title().split(" ")) onboarding_table_1 = onboarding_table.format( org_name=org_name, @@ -276,6 +277,7 @@ def invite_users(self, request): for email in emails: # Checking if the email is in valid format. + email = email.lower() if re.fullmatch(regex, email): try: user = User( @@ -415,7 +417,7 @@ def sign_up_user(self, request, pk=None): """ Users to sign up for the first time. """ - email = request.data.get("email") + email = request.data.get("email").lower() try: user = User.objects.get(email=email) except User.DoesNotExist: @@ -489,6 +491,7 @@ def re_invite(self, request): already_accepted_invite, ) = ([], [], [], []) for user_email in distinct_emails: + user_email = user_email.lower() if user_email in existing_emails_set: user = User.objects.get(email=user_email) if user.has_accepted_invite: @@ -744,7 +747,7 @@ def update_email(self, request): """ try: user = request.user - unverified_email = request.data.get("email") + unverified_email = request.data.get("email").lower() old_email_update_code = generate_random_string(10) new_email_verification_code = generate_random_string(10) diff --git a/backend/utils/llm_api.py b/backend/utils/llm_api.py index 754b8116..e597905d 100644 --- a/backend/utils/llm_api.py +++ b/backend/utils/llm_api.py @@ -88,7 +88,7 @@ def get_gpt3_output(system_prompt=None, user_prompt=None, history=None): openai.api_base = os.getenv("LLM_INTERACTIONS_OPENAI_API_BASE") openai.api_version = os.getenv("LLM_INTERACTIONS_OPENAI_API_VERSION") openai.api_key = os.getenv("OPENAI_API_KEY") - engine = "prompt-chat-gpt35" + engine = os.getenv("LLM_INTERACTIONS_OPENAI_ENGINE") messages = [] if system_prompt: diff --git a/backend/video/urls.py b/backend/video/urls.py index 08a7e687..8e95e938 100644 --- a/backend/video/urls.py +++ b/backend/video/urls.py @@ -7,7 +7,7 @@ path("", views.get_video, name="get_video"), path("get_listings",views.get_video_details,name="get_video_details"), path( - "api/generic/transliteration///", + "xlit-api/generic/transliteration//", TransliterationAPIView.as_view(), name="transliteration-api", ), diff --git a/backend/video/utils.py b/backend/video/utils.py index ffa6d299..c4d8f695 100644 --- a/backend/video/utils.py +++ b/backend/video/utils.py @@ -28,8 +28,10 @@ from config import youtube_api_key from googleapiclient.discovery import build import re +from moviepy.editor import VideoFileClip +from math import floor -ydl = YoutubeDL({"format": "best*[acodec!=none]"}) +ydl = YoutubeDL({"format": "best"}) # Declare a global variable to save the object for Google Drive ID extraction drive_info_extractor = get_info_extractor("GoogleDrive")() @@ -545,14 +547,22 @@ def get_video_func(request): ) try: + if "blob.core.windows.net" in url: + info = ydl.extract_info(url, download=False) + title = info["title"] + video = VideoFileClip(url) + duration = timedelta(seconds=floor(video.duration)) + direct_video_url = url + normalized_url = url + else: # Get the video info from the YouTube API - ( - direct_video_url, - normalized_url, - title, - duration, - direct_audio_url, - ) = get_data_from_google_video(url) + ( + direct_video_url, + normalized_url, + title, + duration, + direct_audio_url, + ) = get_data_from_google_video(url) except: direct_video_url = "" direct_audio_url = "" @@ -574,17 +584,18 @@ def get_video_func(request): title = video["snippet"]["title"] duration_iso8601 = video["contentDetails"]["duration"] duration = timedelta(seconds=iso8601_duration_to_seconds(duration_iso8601)) - except: + except Exception as e: + logging.error(e) return Response( {"message": "This is an invalid video URL."}, status=status.HTTP_400_BAD_REQUEST, ) - if title[-4:] == ".mp4" and "youtube.com" not in normalized_url: - return Response( - {"message": "Invalid file type. Mp4 is not supported"}, - status=status.HTTP_400_BAD_REQUEST, - ) + # if title[-4:] == ".mp4" and "youtube.com" not in normalized_url: + # return Response( + # {"message": "Invalid file type. Mp4 is not supported"}, + # status=status.HTTP_400_BAD_REQUEST, + # ) # Create a new DB entry if URL does not exist, else return the existing entry if create: video = Video.objects.create( diff --git a/backend/video/views.py b/backend/video/views.py index 609b8acb..abd02458 100644 --- a/backend/video/views.py +++ b/backend/video/views.py @@ -38,6 +38,7 @@ from collections import Counter from rest_framework.views import APIView import config +from rest_framework.permissions import IsAuthenticated accepted_languages = [ "as", @@ -97,25 +98,16 @@ class TransliterationAPIView(APIView): + permission_classes = [IsAuthenticated] + def get(self, request, target_language, data, *args, **kwargs): - json_data = { - "input": [{"source": data}], - "config": { - "language": { - "sourceLanguage": "en", - "targetLanguage": target_language, - }, - "isSentence": False, - "numSuggestions": 5, - }, - } - logging.info("Calling Transliteration API") - response_transliteration = requests.post( - config.transliteration_url, - headers={"authorization": config.dhruva_key}, - json=json_data, + response_transliteration = requests.get( + os.getenv("TRANSLITERATION_URL") + target_language + "/" + data, + headers={"Authorization": "Bearer " + os.getenv("TRANSLITERATION_KEY")}, ) + print(response_transliteration) + transliteration_output = response_transliteration.json() return Response(transliteration_output, status=status.HTTP_200_OK) @@ -1076,6 +1068,27 @@ def upload_csv_data(request): else: valid_row["assignee"] = User.objects.get(email=row["Assignee"].strip()).id + format = "%d-%m-%Y" + input_eta = datetime.datetime.strptime(row["ETA"], format) + curr_date = datetime.datetime.now().date() + if bool(input_eta) == False: + errors.append( + { + "row_no": f"Row {row_num}", + "message": f"Invalid ETA Format, expected format is dd-mm-yyyy: received{row['ETA']}", + } + ) + elif input_eta.date() < curr_date: + errors.append( + { + "row_no": f"Row {row_num}", + "message": f"ETA can't be less than current Date: received{row['ETA']}", + } + ) + + else: + valid_row["ETA"] = input_eta.strftime("%Y-%m-%dT18:29:00.000Z") + valid_row["video_description"] = row["Video Description"] valid_row["task_description"] = row["Task Description"] video = Video.objects.filter(url=row["Youtube URL"].strip()).first() diff --git a/backend/voiceover/admin.py b/backend/voiceover/admin.py index bbb4e052..944763ee 100644 --- a/backend/voiceover/admin.py +++ b/backend/voiceover/admin.py @@ -15,6 +15,7 @@ class VoiceOverAdmin(admin.ModelAdmin): "voice_over_type", "updated_at", "task", + "status", ) list_filter = ("video", "voice_over_type", "translation") search_fields = ("video", "voice_over_type", "translation") diff --git a/backend/voiceover/urls.py b/backend/voiceover/urls.py index e64ed6ee..7863af15 100644 --- a/backend/voiceover/urls.py +++ b/backend/voiceover/urls.py @@ -10,6 +10,8 @@ ), path("get_translated_text/", views.get_translated_text, name="get_translated_text"), path("save/", views.save_voice_over, name="save_voice_over"), + path("get_voiceover_status/", views.fetch_voice_over_status, name="get_voice_over_staus"), + path("set_voiceover_status/", views.update_voice_over_status, name="set_voice_over_staus"), path( "get_voiceover_supported_languages", views.get_voiceover_supported_languages, @@ -61,4 +63,9 @@ views.get_voiceover_report, name="get_voiceover_report", ), + path( + "csv_bulk_regenerate", + views.csv_bulk_regenerate, + name="csv_bulk_regenerate", + ), ] diff --git a/backend/voiceover/utils.py b/backend/voiceover/utils.py index 2de2e17c..df5dec0e 100644 --- a/backend/voiceover/utils.py +++ b/backend/voiceover/utils.py @@ -222,8 +222,8 @@ def upload_zip_to_azure(zip_file_path): return blob_client_zip.url -def get_tts_output(tts_input, target_language, multiple_speaker, gender): - logging.info("Calling TTS API") +def get_tts_output(tts_input, target_language, multiple_speaker, gender, id): + logging.info("Calling TTS API for %s task in %s language", str(id), str(target_language)) tts_url = get_tts_url(target_language) if tts_url is None: return { @@ -285,6 +285,7 @@ def generate_tts_output( target_language, translation_obj.video.multiple_speaker, gender.lower(), + translation_obj.id, ) logging.info("output generated") else: @@ -306,6 +307,7 @@ def generate_tts_output( target_language, translation_obj.video.multiple_speaker, speaker_info[speaker_id], + translation_obj.id, ) if ( type(speaker_tts_output) != dict @@ -1035,7 +1037,7 @@ def generate_voiceover_payload(translation_payload, target_language, task): else: gender = task.video.gender voiceover_machine_generated = get_tts_output( - tts_input, target_language, task.video.multiple_speaker, gender.lower() + tts_input, target_language, task.video.multiple_speaker, gender.lower(), task.id ) if ( type(voiceover_machine_generated) == dict @@ -1137,6 +1139,13 @@ def check_audio_completion(voice_over_obj): for index, payload in enumerate(voice_over_obj.translation.payload["payload"]): if str(index) in voice_over_obj.payload["payload"].keys(): + if (get_original_duration_neg(voice_over_obj.payload["payload"][str(index)]["start_time"], voice_over_obj.payload["payload"][str(index)]["end_time"]) < 0.1): + missing_cards.append( + { + "card_number": index + 1, + "message": "Duration is 0 for this card.", + } + ) if ( "audio" in voice_over_obj.payload["payload"][str(index)].keys() and type(voice_over_obj.payload["payload"][str(index)]["audio"]) == dict @@ -1157,6 +1166,7 @@ def check_audio_completion(voice_over_obj): "message": "There is no audio present in this card.", } ) + return missing_cards @@ -1219,6 +1229,8 @@ def adjust_audio(audio_file, original_time, audio_speed): elif audio_time_difference == 0: logging.info("No time difference") elif audio_time_difference < -0.001: + if original_time == 0: + raise ZeroDivisionError logging.info("Speed up the audio by %s", str(seconds / original_time)) speedup_factor = seconds / original_time if speedup_factor > 1.009: @@ -1282,6 +1294,14 @@ def get_original_duration(start_time, end_time): ) return t_d +def get_original_duration_neg(start_time, end_time): + start = datetime.strptime(start_time, "%H:%M:%S.%f") + end = datetime.strptime(end_time, "%H:%M:%S.%f") + + time_difference = (end - start).total_seconds() + + return time_difference + def integrate_all_audios(file_name, payload, video_duration): length_payload = len(payload["payload"]) @@ -1350,6 +1370,23 @@ def integrate_all_audios(file_name, payload, video_duration): final_audio.export( file_name + "_" + str(previous_index) + ".ogg", format="ogg" ) + if "time_difference" not in payload["payload"][str(index)]: + start_time = payload["payload"][str(index)]["start_time"] + end_time = payload["payload"][str(index)]["end_time"] + time_difference = ( + datetime.strptime(end_time, "%H:%M:%S.%f") + - timedelta( + hours=float(start_time.split(":")[0]), + minutes=float(start_time.split(":")[1]), + seconds=float(start_time.split(":")[-1]), + ) + ).strftime("%H:%M:%S.%f") + t_d = ( + int(time_difference.split(":")[0]) * 3600 + + int(time_difference.split(":")[1]) * 60 + + float(time_difference.split(":")[2]) + ) + payload["payload"][str(index)]["time_difference"] = t_d if index == length_payload - 1: original_time = payload["payload"][str(index)]["time_difference"] end_time = payload["payload"][str(index)]["end_time"] diff --git a/backend/voiceover/views.py b/backend/voiceover/views.py index 0aade47a..93a7917a 100644 --- a/backend/voiceover/views.py +++ b/backend/voiceover/views.py @@ -1,3 +1,5 @@ +import csv +import io from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status @@ -7,6 +9,9 @@ authentication_classes, ) from rest_framework.response import Response +from task.tasks import celery_nmt_tts_call +from transcript.models import Transcript +from transcript.views import get_transcript_id from task.models import Task, TRANSLATION_VOICEOVER_EDIT from translation.utils import get_batch_translations_using_indictrans_nmt_api from translation.models import ( @@ -45,7 +50,9 @@ import uuid import regex from glossary.tmx.tmxservice import TMXService - +from organization.decorators import is_admin +from organization.models import Organization +from video.models import Video @api_view(["GET"]) def get_voice_over_export_types(request): @@ -85,6 +92,12 @@ def get_voice_over_id(task): .filter(status="VOICEOVER_EDIT_INPROGRESS") .first() ) + if task.status == "FAILED": + voice_over_id = ( + voice_over.filter(video=task.video) + .filter(status="VOICEOVER_SELECT_SOURCE") + .first() + ) else: if task.status == "NEW": voice_over_id = ( @@ -372,6 +385,8 @@ def get_payload(request): ) if voice_over.voice_over_type == "MACHINE_GENERATED": input_sentences = [] + fast_audio_threshold = 20 if task.target_language != "sa" else 16 + moderate_audio_threshold = 16 if task.target_language != "sa" else 12 for text, index in translation_payload: audio_index = str(start_offset + index) if audio_index in voice_over.payload["payload"].keys(): @@ -401,6 +416,10 @@ def get_payload(request): + float(time_difference.split(":")[1]) * 60 + float(time_difference.split(":")[2]) ) + try: + text_length_per_second = len(transcription_text)/t_d + except: + text_length_per_second = 100 sentences_list.append( { "id": str(int(audio_index) + 1), @@ -413,6 +432,7 @@ def get_payload(request): "audio" ], "audio_speed": 1, + "fast_audio": 0 if text_length_per_second < moderate_audio_threshold else 1 if text_length_per_second < fast_audio_threshold else 2, } ) payload = {"payload": sentences_list} @@ -728,7 +748,149 @@ def get_translated_text(request): {"message": "Translation failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) + +@swagger_auto_schema( + method="get", + manual_parameters=[ + openapi.Parameter( + "task_id", + openapi.IN_QUERY, + description=("An integer to pass the task id"), + type=openapi.TYPE_INTEGER, + required=True, + ), + ], + responses={ + 200: "Status has been fetched successfully", + 400: "Bad request", + 404: "No voice_over found for given task", + }, +) +@api_view(["GET"]) +def fetch_voice_over_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + task_id = request.query_params.get("task_id") + except KeyError: + return Response( + { + "message": "Missing required parameter - task_id" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + voice_over = get_voice_over_id(task) + if voice_over is not None: + voice_over_id = voice_over.id + try: + voice_over = VoiceOver.objects.get(pk=voice_over_id) + return Response( + { + "message": "Status has been fetched successfully", + "task_id": task.id, + "voiceover_id": voice_over_id, + "status": voice_over.status, + }, + status=status.HTTP_200_OK, + ) + except: + return Response( + {"message": "VoiceOver doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) + +@swagger_auto_schema( + method="post", + request_body=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["task_id", "vo_status"], + properties={ + "task_id": openapi.Schema( + type=openapi.TYPE_INTEGER, + description="An integer identifying the voice_over instance", + ), + "vo_status": openapi.Schema( + type=openapi.TYPE_STRING, + description="Voiceover task status to be set", + ) + }, + description="Post request body", + ), + responses={ + 200: "Status has been updated successfully", + 400: "Bad request", + 404: "No voice_over found for given task", + }, +) +@api_view(["POST"]) +def update_voice_over_status(request): + if not request.user.is_authenticated: + return Response({"message":"You do not have enough permissions to access this view!"}, status=401) + try: + # Get the required data from the POST body + task_id = request.data["task_id"] + vo_status = request.data["vo_status"] + except KeyError: + return Response( + { + "message": "Missing required parameters - task_id or vo_status" + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + try: + task = Task.objects.get(pk=task_id) + except Task.DoesNotExist: + return Response( + {"message": "Task doesn't exist."}, + status=status.HTTP_404_NOT_FOUND, + ) + + if not task.is_active: + return Response( + {"message": "This task is not active yet."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + voice_over = get_voice_over_id(task) + if voice_over is not None: + voice_over_id = voice_over.id + try: + voice_over = VoiceOver.objects.get(pk=voice_over_id) + if vo_status in ["VOICEOVER_SELECT_SOURCE", "VOICEOVER_EDITOR_ASSIGNED", "VOICEOVER_EDIT_INPROGRESS", "VOICEOVER_EDIT_COMPLETE", "VOICEOVER_REVIEWER_ASSIGNED", "VOICEOVER_REVIEW_INPROGRESS", "VOICEOVER_REVIEW_COMPLETE"]: + voice_over.status = vo_status + voice_over.save() + return Response( + { + "message": "Status has been updated successfully", + }, + status=status.HTTP_200_OK, + ) + else: + return Response( + {"message": "Invalid Status"}, + status=status.HTTP_400_BAD_REQUEST, + ) + except: + return Response( + {"message": "VoiceOver doesn't exist."}, + status=status.HTTP_400_BAD_REQUEST, + ) @swagger_auto_schema( method="post", @@ -807,7 +969,7 @@ def save_voice_over(request): bookmarked_segment = request.data.get("bookmark", None) user = request.user - if bookmarked_segment: + if bookmarked_segment is not None: user.user_history = { "task_id": task_id, "offset": offset, @@ -995,7 +1157,7 @@ def save_voice_over(request): text = voice_over_payload["text"] if text == "" or len(text) == 0: return Response( - {"message": "Text can't be empty."}, + {"message": "Text can't be empty for segment "+str(index+1)}, status=status.HTTP_400_BAD_REQUEST, ) @@ -1030,9 +1192,15 @@ def save_voice_over(request): if voice_over.voice_over_type == "MANUALLY_CREATED": voiceover_adjusted = adjust_voiceover(translation_payload) else: - voiceover_machine_generated = generate_voiceover_payload( - translation_payload, task.target_language, task - ) + try: + voiceover_machine_generated = generate_voiceover_payload( + translation_payload, task.target_language, task + ) + except ZeroDivisionError: + return Response( + {"message": "Cannot generate voiceover due to 0 duration for a segment"}, + status=status.HTTP_400_BAD_REQUEST, + ) if request.data.get("final"): if ( VoiceOver.objects.filter(status=VOICEOVER_EDIT_COMPLETE) @@ -1253,6 +1421,8 @@ def save_voice_over(request): if voice_over_obj is not None and int( payload["payload"][0]["id"] ) == int(offset): + fast_audio_threshold = 20 if task.target_language != "sa" else 16 + moderate_audio_threshold = 16 if task.target_language != "sa" else 12 for i in range(len(payload["payload"])): start_time = payload["payload"][i]["start_time"] end_time = payload["payload"][i]["end_time"] @@ -1340,6 +1510,10 @@ def save_voice_over(request): } ) else: + try: + text_length_per_second = len(transcription_text)/t_d + except: + text_length_per_second = 100 voice_over_obj.payload["payload"][ str(start_offset + i) ] = { @@ -1379,6 +1553,7 @@ def save_voice_over(request): "transcription_text": payload["payload"][i][ "transcription_text" ], + "fast_audio": 0 if text_length_per_second < moderate_audio_threshold else 1 if text_length_per_second < fast_audio_threshold else 2, } ) voice_over_obj.save() @@ -2078,3 +2253,111 @@ def reopen_translation_voiceover_task(request): {"message": "Can not reopen this task."}, status=status.HTTP_400_BAD_REQUEST, ) + +@api_view(["POST"]) +def csv_bulk_regenerate(request): + """ + API Endpoint to upload a csv file to regenerate failed VOTR tasks + Endpoint: /voiceover/csv_bulk_regenerate/ + Method: POST + """ + + org_id = request.data.get("org_id") + csv_content = request.data.get("csv") + + try: + org = Organization.objects.get(pk=org_id) + except Organization.DoesNotExist: + return Response( + {"message": "Organization not found"}, status=status.HTTP_404_NOT_FOUND + ) + + if not org.organization_owners.filter(id=request.user.id).exists(): + return Response( + {"message": "You are not allowed to upload CSV."}, + status=status.HTTP_403_FORBIDDEN, + ) + + decrypted = base64.b64decode(csv_content).decode("utf-8") + task_ids = [] + with io.StringIO(decrypted) as fp: + reader = csv.reader(fp, delimiter=",", quotechar='"') + for row in reader: + if row and row[0].strip(): + task_ids.append(int(row[0])) + + if len(task_ids) > 30: + return Response( + {"message": "Number of task id's is greater than 30."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + errors = [] + + for task_id in task_ids: + try: + task_obj = Task.objects.get(pk=task_id) + if task_obj.video.project_id.organization_id.id != org_id: + errors.append( + { + "row_no": f"Task {task_id}", + "message": f"Task Id does not belong to your organization", + } + ) + continue + # add flower queue check + except Task.DoesNotExist: + errors.append( + { + "row_no": f"Task {task_id}", + "message": f"Task Id does not exists", + } + ) + continue + + voiceover_obj = get_voice_over_id(task_obj) + + if voiceover_obj is None: + errors.append( + { + "row_no": f"Task {task_id}", + "message": f"Voiceover object does not exists", + } + ) + continue + + if voiceover_obj.status != "VOICEOVER_SELECT_SOURCE": + voiceover_obj.status = "VOICEOVER_SELECT_SOURCE" + voiceover_obj.save() + if task_obj.status != "SELECTED_SOURCE": + task_obj.status = "SELECTED_SOURCE" + task_obj.is_active = False + task_obj.save() + + transcription_task = Task.objects.filter(video=task_obj.video, task_type="TRANSCRIPTION_EDIT", status="COMPLETE").first() + if transcription_task is None: + errors.append( + { + "row_no": f"Task {task_id}", + "message": f"Transcription not completed yet for this VOTR task", + } + ) + continue + + transcript = get_transcript_id(transcription_task) + transcript_obj = Transcript.objects.get(pk=transcript.id) + translation = Translation.objects.filter(task=task_obj).first() + translation.transcript = transcript_obj + translation.save() + + if len(errors) > 0: + return Response( + {"message": "Invalid CSV", "response": errors}, + status=status.HTTP_400_BAD_REQUEST, + ) + else: + for task_id in task_ids: + celery_nmt_tts_call.delay(task_id) + return Response( + {"message": "CSV uploaded successfully"}, status=status.HTTP_200_OK + ) \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 0341a4d0..09b6e24a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,7 +53,7 @@ services: condition: service_started backend: condition: service_started - command: python3 -m celery -A backend worker -Q asr_tts -n asr_tts_worker --concurrency=1 -l DEBUG + command: python3 -m celery -A backend worker -Q asr_tts -n asr_tts_worker --concurrency=4 -l DEBUG restart: unless-stopped celery_nmt: