Skip to content

Commit

Permalink
feat: add watsonx translation backend (#598)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelroudnitski authored Sep 28, 2024
1 parent 7b4eb29 commit f56a528
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 2 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ Available backends:
- `deepl` - [DeepL Pro](#deepl-translation-config)
- `yandex` - [Yandex Translate](#yandex-translation-config)
- `openai` - [OpenAI](#openai-translation-config)
- `watsonx` - [watsonx](#watsonx-translation-config)

### Find usages

Expand Down Expand Up @@ -483,6 +484,28 @@ OPENAI_API_KEY=<OpenAI API key>
OPENAI_MODEL=<optional>
```

<a name="watsonx-translation-config"></a>
### watsonx Translate

`i18n-tasks translate-missing` requires a watsonx project and api key, get it at [IBM watsonx](https://www.ibm.com/watsonx/).

```yaml
# config/i18n-tasks.yml
translation:
backend: watsonx
watsonx_api_key: <watsonx API key>
watsonx_project_id: <watsonx project id>
watsonx_model: <optional>
```

or via environment variable:

```bash
WATSONX_API_KEY=<watsonx API key>
WATSONX_PROJECT_ID=<watsonx project id>
WATSONX_MODEL=<optional>
```

### Contextual Rails Parser

There is an experimental feature to parse Rails with more context. `i18n-tasks` will support:
Expand Down
9 changes: 9 additions & 0 deletions config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ en:
none: Every translation is in use.
usages:
none: No key usages found.
watsonx_translate:
errors:
no_api_key: >-
Set watsonx API key via WATSONX_API_KEY environment variable or translation.watsonx_api_key
in config/i18n-tasks.yml. Get the key at https://www.ibm.com/products/watsonx-ai.
no_project_id: >-
Set watsonx Project ID via WATSONX_PROJECT_ID environment variable or translation.watsonx_api_key
in config/i18n-tasks.yml. Get the key at https://www.ibm.com/products/watsonx-ai.
no_results: watsonx returned no results.
yandex_translate:
errors:
no_api_key: >-
Expand Down
9 changes: 9 additions & 0 deletions config/locales/ru.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ ru:
none: Все переводы используются.
usages:
none: Не найдено использований.
watsonx_translate:
errors:
no_api_key: >-
Установите ключ API watsonx через переменную среды WATSONX_API_KEY или translation.watsonx_api_key
в config/i18n-tasks.yml. Получите ключ на https://www.ibm.com/products/watsonx-ai.
no_project_id: >-
Установите идентификатор проекта watsonx через переменную среды WATSONX_PROJECT_ID или translation.watsonx_api_key
в config/i18n-tasks.yml. Получите ключ на https://www.ibm.com/products/watsonx-ai.
no_results: watsonx не вернул результатов.
yandex_translate:
errors:
no_api_key: |-
Expand Down
2 changes: 1 addition & 1 deletion lib/i18n/tasks/command/options/locales.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ module Locales
parser: OptionParsers::Locale::Parser,
default: 'base'

TRANSLATION_BACKENDS = %w[google deepl yandex openai].freeze
TRANSLATION_BACKENDS = %w[google deepl yandex openai watsonx].freeze
arg :translation_backend,
'-b',
'--backend BACKEND',
Expand Down
5 changes: 4 additions & 1 deletion lib/i18n/tasks/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def data_config

# translation config
# @return [Hash{String => String,Hash,Array}]
def translation_config # rubocop:disable Metrics/AbcSize
def translation_config # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@config_sections[:translation] ||= begin
conf = (config[:translation] || {}).with_indifferent_access
conf[:backend] ||= DEFAULTS[:translation_backend]
Expand All @@ -70,6 +70,9 @@ def translation_config # rubocop:disable Metrics/AbcSize
conf[:deepl_version] = ENV['DEEPL_VERSION'] if ENV.key?('DEEPL_VERSION')
conf[:openai_api_key] = ENV['OPENAI_API_KEY'] if ENV.key?('OPENAI_API_KEY')
conf[:openai_model] = ENV['OPENAI_MODEL'] if ENV.key?('OPENAI_MODEL')
conf[:watsonx_api_key] = ENV['WATSONX_API_KEY'] if ENV.key?('WATSONX_API_KEY')
conf[:watsonx_project_id] = ENV['WATSONX_PROJECT_ID'] if ENV.key?('WATSONX_PROJECT_ID')
conf[:watsonx_model] = ENV['WATSONX_MODEL'] if ENV.key?('WATSONX_MODEL')
conf[:yandex_api_key] = ENV['YANDEX_API_KEY'] if ENV.key?('YANDEX_API_KEY')
conf
end
Expand Down
3 changes: 3 additions & 0 deletions lib/i18n/tasks/translation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require 'i18n/tasks/translators/deepl_translator'
require 'i18n/tasks/translators/google_translator'
require 'i18n/tasks/translators/openai_translator'
require 'i18n/tasks/translators/watsonx_translator'
require 'i18n/tasks/translators/yandex_translator'

module I18n::Tasks
Expand All @@ -19,6 +20,8 @@ def translate_forest(forest, from:, backend:)
Translators::GoogleTranslator.new(self).translate_forest(forest, from)
when :openai
Translators::OpenAiTranslator.new(self).translate_forest(forest, from)
when :watsonx
Translators::WatsonxTranslator.new(self).translate_forest(forest, from)
when :yandex
Translators::YandexTranslator.new(self).translate_forest(forest, from)
else
Expand Down
153 changes: 153 additions & 0 deletions lib/i18n/tasks/translators/watsonx_translator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# frozen_string_literal: true

require 'i18n/tasks/translators/base_translator'
require 'active_support/core_ext/string/filters'

module I18n::Tasks::Translators
class WatsonxTranslator < BaseTranslator
# max allowed texts per request
BATCH_SIZE = 50
DEFAULT_SYSTEM_PROMPT = <<~PROMPT.squish
You are a helpful assistant that translates content from the %{from} locale
to the %{to} locale in an i18n locale array.
You always preserve the structure and formatting exactly as it is.
The array has a structured format and contains multiple strings. Your task is to translate
each of these strings and create a new array with the translated strings.
Reminder:
- Translate only the text, preserving the structure and formatting.
- Do not translate any URLs.
- Do not translate HTML tags like `<details>` and `<summary>`.
- HTML markups (enclosed in < and > characters) must not be changed under any circumstance.
- Variables (starting with %%{ and ending with }) must not be changed under any circumstance.
- Output only the result, without any additional information or comments.
PROMPT

def options_for_translate_values(from:, to:, **options)
options.merge(
from: from,
to: to
)
end

def options_for_html
{}
end

def options_for_plain
{}
end

def no_results_error_message
I18n.t('i18n_tasks.watsonx_translate.errors.no_results')
end

private

def translator
@translator ||= WatsonxClient.new(key: api_key)
end

def api_key
@api_key ||= begin
key = @i18n_tasks.translation_config[:watsonx_api_key]
fail ::I18n::Tasks::CommandError, I18n.t('i18n_tasks.watsonx_translate.errors.no_api_key') if key.blank?

key
end
end

def project_id
@project_id ||= begin
project_id = @i18n_tasks.translation_config[:watsonx_project_id]
if project_id.blank?
fail ::I18n::Tasks::CommandError,
I18n.t('i18n_tasks.watsonx_translate.errors.no_project_id')
end

project_id
end
end

def model
@model ||= @i18n_tasks.translation_config[:watsonx_model].presence || 'meta-llama/llama-3-2-90b-vision-instruct'
end

def system_prompt
@system_prompt ||= @i18n_tasks.translation_config[:watsonx_system_prompt].presence || DEFAULT_SYSTEM_PROMPT
end

def translate_values(list, from:, to:)
results = []

list.each_slice(BATCH_SIZE) do |batch|
translations = translate(batch, from, to)

results << JSON.parse(translations)
end

results.flatten
end

def translate(values, from, to)
prompt = [
'<|eot_id|><|start_header_id|>system<|end_header_id|>',
format(system_prompt, from: from, to: to),
'<|eot_id|><|start_header_id|>user<|end_header_id|>Translate this array:',
"<|eot_id|><|start_header_id|>user<|end_header_id|>#{values.to_json}",
'<|eot_id|><|start_header_id|>assistant<|end_header_id|>'
].join

response = translator.generate_text(
model_id: model,
project_id: project_id,
input: prompt,
parameters: {
decoding_method: :greedy,
max_new_tokens: 2048,
repetition_penalty: 1
}
)
response.dig('results', 0, 'generated_text')
end
end
end

class WatsonxClient
WATSONX_BASE_URL = 'https://us-south.ml.cloud.ibm.com/ml/'
IBM_CLOUD_IAM_URL = 'https://iam.cloud.ibm.com/identity/token'

def initialize(key:)
begin
require 'faraday'
rescue LoadError
raise ::I18n::Tasks::CommandError, "Add gem 'faraday' to your Gemfile to use this command"
end

@http = Faraday.new(url: WATSONX_BASE_URL) do |conn|
conn.use Faraday::Response::RaiseError
conn.request :json
conn.response :json
conn.options.timeout = 600
conn.request :authorization, :Bearer, token(key)
end
end

def generate_text(**opts)
@http.post('v1/text/generation?version=2024-05-20', **opts).body
end

private

def token(key)
Faraday.new(url: IBM_CLOUD_IAM_URL) do |conn|
conn.use Faraday::Response::RaiseError
conn.response :json
conn.params = {
grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
apikey: key
}
end.post.body['access_token']
end
end

0 comments on commit f56a528

Please sign in to comment.