Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTPError 502 Bad Gateway for big number of strings #118

Open
CojocaruLiviuGabriel opened this issue Sep 12, 2024 · 0 comments
Open

HTTPError 502 Bad Gateway for big number of strings #118

CojocaruLiviuGabriel opened this issue Sep 12, 2024 · 0 comments

Comments

@CojocaruLiviuGabriel
Copy link

CojocaruLiviuGabriel commented Sep 12, 2024

When I use the API to translate a big array of strings ( 322 to be more specific ), after a while I receive HTTPError 502 Bad Gateway error. I tried to 'split' in smaller pieces, but the I still get the same error.

image


def parse_xml_file(file_path):
    """Parse the XML file and extract <source> and <translation> tag contents."""
    tree = ET.parse(file_path)
    root = tree.getroot()

    source_translation_pairs = []

    for context in root.findall('context'):
        for message in context.findall('message'):
            source = message.find('source')
            translation = message.find('translation')

            if source is not None and translation is not None:
                source_text = source.text
                translation_text = translation.text
                source_translation_pairs.append((source_text, translation_text))

    return source_translation_pairs, tree, root

def translate_batch(texts, target_lang='ES'):
    """Translate a batch of texts to the specified target language using the DeepL API."""
    data = {
        'auth_key': API_KEY,
        'target_lang': target_lang,
        'formality': 'default',  # Adjust formality if needed
    }
    translations = []

    for text in texts:
        data['text'] = text

        try:
            response = requests.post(DEEPL_API_URL, data=data, proxies=proxies)
            response.raise_for_status()  # Check for HTTP request errors

            # Extract translated text
            translation_result = response.json()
            translations.append(translation_result['translations'][0]['text'])
        except requests.exceptions.RequestException as e:
            print(f"Error during translation: {e}")
            translations.append("")  # Append empty string in case of failure

        time.sleep(2)  # Add a delay to avoid overloading the API

    return translations

def translate_texts(texts, target_lang='ES'):
    """Translate a list of texts in batches to avoid overloading the API."""
    translated_texts = []

    for i in range(0, len(texts), BATCH_SIZE):
        batch = texts[i:i + BATCH_SIZE]
        translated_batch = translate_batch(batch, target_lang)
        translated_texts.extend(translated_batch)

    return translated_texts

def update_spanish_file(english_file, spanish_file):
    """Translate English text and update Spanish XML file."""

    # Start timing
    start_time = time.time()

    # Step 1: Parse the English XML file and extract translations
    english_source_translation_pairs, _, _ = parse_xml_file(english_file)

    # Extract only English translations for translation via DeepL
    english_translations = [pair[1] for pair in english_source_translation_pairs if pair[1]]

    # Step 2: Translate the English translations into Spanish
    translated_texts = translate_texts(english_translations, target_lang='ES')

    # Step 3: Parse the Spanish XML file
    _, spanish_tree, spanish_root = parse_xml_file(spanish_file)

    # Step 4: Update the Spanish translation in the right <message> tags
    for (english_source, _), translated_text in zip(english_source_translation_pairs, translated_texts):
        for context in spanish_root.findall('context'):
            for message in context.findall('message'):
                source = message.find('source')
                translation = message.find('translation')

                if source is not None and translation is not None and source.text == english_source:
                    translation.text = translated_text

    # Step 5: Write the updated Spanish XML back to the file
    spanish_tree.write(spanish_file, encoding='utf-8', xml_declaration=True)
    write_xml_with_doctype(spanish_file, spanish_tree)

    # End timing
    end_time = time.time()

    # Calculate and print the execution time
    execution_time = end_time - start_time
    print(f"Execution time: {execution_time:.2f} seconds")

# Example usage:
if __name__ == "__main__":
    english_file = 'english_file.ts'
    spanish_file = 'spanish_file.ts'

    update_spanish_file(english_file, spanish_file)

Above is my code

Edit: I am using the free version for the moment, but I plan on upgrading to PRO. If I raise the sleep to be 2 seconds I receive code 504

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant