diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..48b2c01 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed + +- `vi::telex` & `vi::vni` are deprecated & will be removed in the next release. Users are recommended to use `vi::methods` instead. +- `vi::telex::transform_buffer` & `vi::vni::transform_buffer` are deprecated. Users are recommended to use `vi::transform_buffer` instead. + +### Added + +- `vi::methods` module containing method definition & transforming functions. \ No newline at end of file diff --git a/benches/transform_benchmark.rs b/benches/transform_benchmark.rs index 4c91af5..60f6b84 100644 --- a/benches/transform_benchmark.rs +++ b/benches/transform_benchmark.rs @@ -4,42 +4,42 @@ pub fn telex_benchmark(c: &mut Criterion) { c.bench_function("telex vieejt", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("vieejt".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("vieejt".chars()), &mut output) }) }); c.bench_function("telex ddaay", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("ddaay".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("ddaay".chars()), &mut output) }) }); c.bench_function("telex jjjjjjjjjjjjjj", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("jjjjjjjjjjjjjj".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("jjjjjjjjjjjjjj".chars()), &mut output) }) }); c.bench_function("telex jj", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("jj".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("jj".chars()), &mut output) }) }); c.bench_function("telex nghienge", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("nghienge".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("nghienge".chars()), &mut output) }) }); c.bench_function("telex ddaaysf", |b| { b.iter(|| { let mut output = String::new(); - vi::telex::transform_buffer(black_box("ddaaysf".chars()), &mut output) + vi::transform_buffer(&vi::TELEX, black_box("ddaaysf".chars()), &mut output) }) }); } @@ -48,42 +48,42 @@ pub fn vni_benchmark(c: &mut Criterion) { c.bench_function("vni viet65", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("viet65".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("viet65".chars()), &mut output) }) }); c.bench_function("vni day96", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("ddaay".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("ddaay".chars()), &mut output) }) }); c.bench_function("vni 1111111111111111", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("1111111111111111".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("1111111111111111".chars()), &mut output) }) }); c.bench_function("vni 11", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("11".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("11".chars()), &mut output) }) }); c.bench_function("vni nghieng6", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("nghieng6".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("nghieng6".chars()), &mut output) }) }); c.bench_function("vni day9612", |b| { b.iter(|| { let mut output = String::new(); - vi::vni::transform_buffer(black_box("day9612".chars()), &mut output) + vi::transform_buffer(&vi::VNI, black_box("day9612".chars()), &mut output) }) }); } diff --git a/examples/custom_definition.rs b/examples/custom_definition.rs new file mode 100644 index 0000000..26d9ee3 --- /dev/null +++ b/examples/custom_definition.rs @@ -0,0 +1,34 @@ +use phf::phf_map; +use vi::{ + processor::{LetterModification, ToneMark}, + Action, Definition, +}; + +// Custom vni method with ư short hand using the w character +const MY_VNI: Definition = phf_map! { + '1' => &[Action::AddTonemark(ToneMark::Acute)], + '2' => &[Action::AddTonemark(ToneMark::Grave)], + '3' => &[Action::AddTonemark(ToneMark::HookAbove)], + '4' => &[Action::AddTonemark(ToneMark::Tilde)], + '5' => &[Action::AddTonemark(ToneMark::Underdot)], + '6' => &[Action::ModifyLetter(LetterModification::Circumflex)], + '7' => &[Action::ModifyLetter(LetterModification::Horn)], + '8' => &[Action::ModifyLetter(LetterModification::Breve)], + '9' => &[Action::ModifyLetter(LetterModification::Dyet)], + 'z' => &[Action::ResetInsertedƯ, Action::InsertƯ], + '0' => &[Action::RemoveToneMark], +}; + +fn main() { + let inputs = "Xin hay4 mo73 toang het61 nhzng4 canh1 cza3 cua3 qua1 khz1 de963 thuyen62 toi6 nzong gio1 lang4 quen6 ra khoi7"; + + let words = inputs.split(' '); + + let mut result = String::new(); + for word in words { + vi::transform_buffer(&MY_VNI, word.chars(), &mut result); + result.push(' '); + } + + println!("{}", result); // prints "Xin hãy mở toang hết những cánh cửa của quá khứ để thuyền tôi nương gió lãng quên ra khơi" +} diff --git a/examples/long.rs b/examples/long.rs index 67ab297..3826912 100644 --- a/examples/long.rs +++ b/examples/long.rs @@ -1,6 +1,6 @@ extern crate vi; -use vi::vni; +use vi::{transform_buffer, VNI}; fn main() { let inputs = "xin chao2 toi6 la2 Hung7, toi6 den961 tu72 Viet65 Nam"; @@ -9,7 +9,7 @@ fn main() { let mut result = String::new(); for word in words { - vni::transform_buffer(word.chars(), &mut result); + transform_buffer(&VNI, word.chars(), &mut result); result.push(' '); } diff --git a/examples/repl.rs b/examples/repl.rs index 3e5d7f4..1e60aef 100644 --- a/examples/repl.rs +++ b/examples/repl.rs @@ -1,7 +1,6 @@ extern crate vi; use rustyline::DefaultEditor; -use vi::{telex, vni}; // A REPL for testing transformation result. fn main() { @@ -16,11 +15,13 @@ fn main() { let mut result = String::new(); for word in input.split_whitespace() { - if method == "telex" { - telex::transform_buffer(word.chars(), &mut result) + let definition = if method == "telex" { + &vi::TELEX } else { - vni::transform_buffer(word.chars(), &mut result) + &vi::VNI }; + + vi::transform_buffer(definition, word.chars(), &mut result); result.push(' '); } diff --git a/examples/simple.rs b/examples/simple.rs index 6c7dcbd..0ae55b9 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,13 +1,11 @@ extern crate vi; -use vi::vni; - fn main() { let inputs = vec![vec!['v', 'i', 'e', 't', '5', '6'], vec!['n', 'a', 'm']]; let mut result = String::new(); for input in inputs { - vni::transform_buffer(input.iter().cloned(), &mut result); + vi::transform_buffer(&vi::VNI, input.iter().cloned(), &mut result); result.push(' '); } diff --git a/examples/telex.rs b/examples/telex.rs index 977bc4c..80abbb5 100644 --- a/examples/telex.rs +++ b/examples/telex.rs @@ -1,7 +1,5 @@ extern crate vi; -use vi::telex; - fn main() { let inputs = "hoiwx anh tifnh yeue gioosng nhuw cais cheets nuotos trooi taats car"; @@ -9,7 +7,7 @@ fn main() { let mut result = String::new(); for word in words { - telex::transform_buffer(word.chars(), &mut result); + vi::transform_buffer(&vi::TELEX, word.chars(), &mut result); result.push(' '); } diff --git a/examples/vni.rs b/examples/vni.rs index eeb9808..dc69794 100644 --- a/examples/vni.rs +++ b/examples/vni.rs @@ -1,7 +1,5 @@ extern crate vi; -use vi::vni; - fn main() { let inputs = "anh se4 lam2, lam2 ta6t1 ca3 de963 d9uo75c che6t1 thay em"; @@ -9,7 +7,7 @@ fn main() { let mut result = String::new(); for word in words { - vni::transform_buffer(word.chars(), &mut result); + vi::transform_buffer(&vi::VNI, word.chars(), &mut result); result.push(' '); } diff --git a/src/editing.rs b/src/editing.rs index 82015c0..1b33860 100644 --- a/src/editing.rs +++ b/src/editing.rs @@ -1,3 +1,6 @@ +//! Functions used for character editing. +//! +//! These functions work directly with character & string instead of the abstract word struct. use crate::{ maps::{ ACCUTE_MAP, BREVE_MAP, CIRCUMFLEX_MAP, DOT_MAP, DYET_MAP, GRAVE_MAP, HOOK_ABOVE_MAP, diff --git a/src/lib.rs b/src/lib.rs index 8f1106a..7c22303 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,11 +14,10 @@ //! # Example //! //! ``` -//! use vi::vni; //! let inputs = vec![vec!['v', 'i', 'e', 't', '5', '6'], vec!['n', 'a', 'm']]; //! let mut result = String::new(); //! for input in inputs { -//! vni::transform_buffer(input.iter().cloned(), &mut result); +//! vi::transform_buffer(&vi::VNI, input.iter().cloned(), &mut result); //! result.push(' '); //! } //! println!("{}", result); // prints "việt nam " @@ -29,24 +28,21 @@ //! VI aims to be as lean as possible, focusing on only the useful features and main use-cases. Therefore, the engine //! implemented these rules by default with no way of configuring them: //! -//! - **Tone mark are placed in the new accent:** hoà instead of hòa +//! - **Tone mark are placed in the new accent:** `hoà` instead of `hòa` //! - **`w` in telex will insert `ư`:** so `chuw` or `chw` will produce `chư` +//! +//! Although, should you need to customise any behaviour, you can create your custom typing methods. See: [`methods`]. pub mod editing; pub mod maps; +pub mod methods; pub mod parsing; pub mod processor; +#[deprecated(since = "0.7.0")] pub mod telex; pub mod util; pub mod validation; +#[deprecated(since = "0.7.0")] pub mod vni; pub mod word; -/// A result of a buffer transformation. -#[derive(Debug, Clone)] -#[allow(dead_code)] -pub struct TransformResult { - /// Indicates whether a tone mark has been removed after the transformation. - pub tone_mark_removed: bool, - /// Indicates whether a letter modification has been removed after the transformation. - pub letter_modification_removed: bool, -} +pub use methods::*; diff --git a/src/methods.rs b/src/methods.rs new file mode 100644 index 0000000..a0117a2 --- /dev/null +++ b/src/methods.rs @@ -0,0 +1,288 @@ +//! The definitions of different typing methods. +//! +//! Normally, for IME developers, you only need these things from this module: +//! - [`transform_buffer`] function to transfer your sequence of character into a word using a typing definition. +//! - [`TELEX`] typing definition that you can use to pass in [`transform_buffer`] to transform character sequence using telex method. +//! - [`VNI`] typing defnition that you can use to pass in [`transform_buffer`] to trasnform character sequence using vni method. +//! +//! ## Example +//! +//! To transform a character sequence using the VNI definition: +//! ``` +//! use vi::methods::transform_buffer; +//! +//! let mut result = String::new(); +//! transform_buffer(&vi::VNI, "viet65".chars(), &mut result); +//! assert_eq!(result, "việt".to_owned()); +//! ``` +//! +//! ## Define your own typing definition +//! +//! `vi-rs` support some typing methods out of the box such as `telex` and `vni`. However, should users ever need to define their +//! own typing methods, they can use the existing APIs in the module. +//! +//! To define a new typing definition, you need to declare a definition map, which is a [`phf::Map`]: +//! +//! ``` +//! use phf::phf_map; +//! use vi::{ +//! processor::{LetterModification, ToneMark}, +//! Action, Definition, +//! }; +//! use vi::methods::transform_buffer; +//! +//! pub static MY_VNI: Definition = phf_map! { +//! '1' => &[Action::AddTonemark(ToneMark::Acute)], +//! '2' => &[Action::AddTonemark(ToneMark::Grave)], +//! '3' => &[Action::AddTonemark(ToneMark::HookAbove)], +//! '4' => &[Action::AddTonemark(ToneMark::Tilde)], +//! '5' => &[Action::AddTonemark(ToneMark::Underdot)], +//! '6' => &[Action::ModifyLetter(LetterModification::Circumflex)], +//! '7' => &[Action::ModifyLetter(LetterModification::Horn)], +//! '8' => &[Action::ModifyLetter(LetterModification::Breve)], +//! '9' => &[Action::ModifyLetter(LetterModification::Dyet)], +//! 'z' => &[Action::ResetInsertedƯ, Action::InsertƯ], +//! '0' => &[Action::RemoveToneMark], +//! }; +//! +//! // Then you can pass that in `transform_buffer` as usual: +//! let mut result = String::new(); +//! transform_buffer(&MY_VNI, "chza".chars(), &mut result); +//! assert_eq!(result, "chưa".to_owned()); +//! ``` +use phf::{phf_map, Map}; + +use crate::{ + processor::{ + add_tone, modify_letter, remove_tone, LetterModification, ToneMark, Transformation, + }, + validation::is_valid_word, + word::Word, +}; + +/// An action to be listed as part of a typing definition. +#[derive(Clone, Debug, PartialEq)] +pub enum Action { + /// Add a tonemark + AddTonemark(ToneMark), + /// Apply letter modification where possible + ModifyLetter(LetterModification), + /// Apply letter modification only if the character family exist. For example, + /// `ModifyLetterOnCharacterFamily(Circumflex, 'a')` will only apply circumflex + /// modification if `a` or any character in the `a` family (`â`, `ă`). + ModifyLetterOnCharacterFamily(LetterModification, char), + /// Insert an ư character at the end of the word. + InsertƯ, + /// Remove the last ư character inserted at the end of the word. **Note:** this only trigger if the last action is `InsertƯ`. + ResetInsertedƯ, + /// Remove the tonemark from the word. + RemoveToneMark, +} + +/// A definition of a typing method. +/// +/// The definition is a [`phf::Map`] with the key as the character that trigger an action and the value, +/// a list of actions that can be triggered by that character. +/// +/// If a character can trigger different actions depending on what is possible, its value will contains multiple Action. For example, +/// +/// ``` +/// use phf::phf_map; +/// use vi::{ +/// processor::{LetterModification, ToneMark}, +/// Action, Definition, +/// }; +/// pub static TELEX: Definition = phf_map! { +/// 'w' => &[Action::ResetInsertedƯ, Action::ModifyLetter(LetterModification::Horn), Action::ModifyLetter(LetterModification::Breve), Action::InsertƯ], +/// }; +/// ``` +/// +/// The definition above specify that `w` can trigger a `ResetInseretedƯ`, or if that doesn't work, a `ModifyLetter(LetterModification::Horn)` action +/// will be executed instead and so on, and so on, \*sniff\*. Note that as soon as one action in the list is applied, the rest of the actions +/// in the list will be ignored. +pub type Definition = Map; + +/// A result of a buffer transformation. +#[derive(Debug, Clone)] +pub struct TransformResult { + /// Indicates whether a tone mark has been removed after the transformation. + pub tone_mark_removed: bool, + /// Indicates whether a letter modification has been removed after the transformation. + pub letter_modification_removed: bool, +} + +/// A definition for the VNI typing method with these configuration: +/// +/// - `1` -> Acute (thêm dấu sắc) +/// - `2` -> Grave (thêm dấu huyền) +/// - `3` -> HookAbove (thêm dấu hỏi) +/// - `4` -> Tilde (thêm dấu ngã) +/// - `5` -> Underdot (thêm dấu nặng) +/// - `6` -> Circumflex (thêm dấu ^) +/// - `7` -> Horn (thêm dấu móc cho ư hoặc ơ) +/// - `8` -> Breve (thêm dấu cho a thành ă) +/// - `9` -> Dyet (thêm dấu gạch cho d thành đ) +/// - `0` -> RemoveToneMark bỏ dấu thanh (sắc, hỏi, ngã, huyền) +pub static VNI: Definition = phf_map! { + '1' => &[Action::AddTonemark(ToneMark::Acute)], + '2' => &[Action::AddTonemark(ToneMark::Grave)], + '3' => &[Action::AddTonemark(ToneMark::HookAbove)], + '4' => &[Action::AddTonemark(ToneMark::Tilde)], + '5' => &[Action::AddTonemark(ToneMark::Underdot)], + '6' => &[Action::ModifyLetter(LetterModification::Circumflex)], + '7' => &[Action::ModifyLetter(LetterModification::Horn)], + '8' => &[Action::ModifyLetter(LetterModification::Breve)], + '9' => &[Action::ModifyLetter(LetterModification::Dyet)], + '0' => &[Action::RemoveToneMark], +}; + +/// A definition for the Telex typing method with these configuration: +/// +/// - `s` -> Acute (thêm dấu sắc) +/// - `f` -> Grave (thêm dấu huyền) +/// - `r` -> HookAbove (thêm dấu hỏi) +/// - `x` -> Tilde (thêm dấu ngã) +/// - `j` -> Underdot (thêm dấu nặng) +/// - `a` -> Circumflex for a (thêm dấu ^ cho chữ a) +/// - `e` -> Circumflex for e (thêm dấu ^ cho chữ e) +/// - `o` -> Circumflex for o (thêm dấu ^ cho chữ o) +/// - `w` -> Horn for ư/ơ or Breve for a (thêm dấu móc cho ư hoặc ơ hoặc thêm dấu cho a thành ă) +/// - `d` -> Dyet (thêm dấu gạch cho d thành đ) +/// - `z` -> RemoveToneMark bỏ dấu thanh (sắc, hỏi, ngã, huyền) +/// +/// **Note:** +/// - By default `w` inserted by itself will be inserted as `ư` in the word. +/// - An `u` followed by a `w` will produce: `ư`, and if you add another `w`, it will result in `uw`. +/// - A `w` will produce `ư`, and if it's followed by a `w`, it will not produce `uw` but will replace `ư` with `w`. +pub static TELEX: Definition = phf_map! { + 's' => &[Action::AddTonemark(ToneMark::Acute)], + 'f' => &[Action::AddTonemark(ToneMark::Grave)], + 'r' => &[Action::AddTonemark(ToneMark::HookAbove)], + 'x' => &[Action::AddTonemark(ToneMark::Tilde)], + 'j' => &[Action::AddTonemark(ToneMark::Underdot)], + 'a' => &[Action::ModifyLetterOnCharacterFamily(LetterModification::Circumflex, 'a')], + 'e' => &[Action::ModifyLetterOnCharacterFamily(LetterModification::Circumflex, 'e')], + 'o' => &[Action::ModifyLetterOnCharacterFamily(LetterModification::Circumflex, 'o')], + 'w' => &[Action::ResetInsertedƯ, Action::ModifyLetter(LetterModification::Horn), Action::ModifyLetter(LetterModification::Breve), Action::InsertƯ], + 'd' => &[Action::ModifyLetter(LetterModification::Dyet)], + 'z' => &[Action::RemoveToneMark], +}; + +/// Transform a buffer of characters using a typing method definition. +/// +/// # Example +/// +/// ``` +/// use vi::methods::transform_buffer; +/// +/// let mut result = String::new(); +/// transform_buffer(&vi::VNI, "viet65".chars(), &mut result); +/// assert_eq!(result, "việt".to_owned()); +/// ``` +pub fn transform_buffer( + definition: &Definition, + buffer: I, + output: &mut String, +) -> TransformResult +where + I: IntoIterator, +{ + let mut word = Word::empty(); + let mut tone_mark_removed = false; + let mut letter_modification_removed = false; + + let mut last_executed_action = None; + + for ch in buffer { + let lowercase_ch = ch.to_ascii_lowercase(); + + // If a character is not recognised as a transformation character in definition. Skip it. + if !definition.contains_key(&lowercase_ch) { + word.push(ch); + continue; + } + + let fallback = format!("{}{}", word, ch); + let actions = definition.get(&lowercase_ch).unwrap(); + + let mut action_iter = actions.iter(); + let mut action = action_iter.next().unwrap(); + + loop { + let transformation = match action { + Action::AddTonemark(tonemark) => add_tone(&mut word, tonemark), + Action::ModifyLetter(modification) => modify_letter(&mut word, modification), + Action::ModifyLetterOnCharacterFamily(modification, family_char) + if word.vowel.to_ascii_lowercase().contains(*family_char) => + { + modify_letter(&mut word, modification) + } + Action::RemoveToneMark => remove_tone(&mut word), + Action::InsertƯ => { + if word.vowel.is_empty() || word.to_string() == "gi" { + word.push(if ch.is_lowercase() { 'u' } else { 'U' }); + let last_index = word.len() - 1; + word.letter_modifications + .push((last_index, LetterModification::Horn)); + Transformation::LetterModificationAdded + } else { + Transformation::Ignored + } + } + Action::ResetInsertedƯ if matches!(last_executed_action, Some(Action::InsertƯ)) => + { + word.replace_last_char(ch); + Transformation::LetterModificationRemoved + } + _ => Transformation::Ignored, + }; + + // If the transformation cannot be applied, try the next action if there's one. + if transformation == Transformation::Ignored { + if let Some(next_action) = action_iter.next() { + action = next_action; + continue; + } + } + + if transformation == Transformation::ToneMarkRemoved { + tone_mark_removed = true; + } + + if transformation == Transformation::LetterModificationRemoved { + letter_modification_removed = true; + } + + let action_performed = match transformation { + Transformation::Ignored | Transformation::LetterModificationRemoved => false, + // If tone mark was intentionally removed with z character then it's count as an action. + Transformation::ToneMarkRemoved => *action == Action::RemoveToneMark, + _ => true, + }; + + // If the action is to trigger reset ư insert then we don't need further processing + if *action == Action::ResetInsertedƯ { + last_executed_action = Some(action.clone()); + break; + } + + if !action_performed { + word.push(ch); + last_executed_action = None; + } else if !is_valid_word(&word.to_string()) { + word.set(fallback); + last_executed_action = None; + } else { + last_executed_action = Some(action.clone()); + } + break; + } + } + + output.push_str(&word.to_string()); + + TransformResult { + tone_mark_removed, + letter_modification_removed, + } +} diff --git a/src/telex.rs b/src/telex.rs index 07feaa8..1bd44f7 100644 --- a/src/telex.rs +++ b/src/telex.rs @@ -1,11 +1,5 @@ -//! The telex method transformation -use crate::processor::{add_tone, modify_letter, remove_tone, Transformation}; -use crate::validation::is_valid_word; -use crate::word::Word; use crate::TransformResult; -use super::processor::{LetterModification, ToneMark}; - /// Transform input buffer containing a single word to vietnamese string output using telex mode. /// /// # Example @@ -16,97 +10,10 @@ use super::processor::{LetterModification, ToneMark}; /// transform_buffer("vieetj".chars(), &mut result); /// assert_eq!(result, "việt".to_owned()); /// ``` +#[deprecated(since = "0.7.0", note = "please use `vi::transform_buffer` instead")] pub fn transform_buffer(buffer: I, output: &mut String) -> TransformResult where I: IntoIterator, { - let mut word = Word::empty(); - let mut ư_inserted_previously = false; - let mut tone_mark_removed = false; - let mut letter_modification_removed = false; - - for ch in buffer { - let fallback = format!("{}{}", word, ch); - let ch_lowercase = ch.to_ascii_lowercase(); - - if ch_lowercase != 'w' { - ư_inserted_previously = false; - } - - let transformation = match ch_lowercase { - 's' => add_tone(&mut word, &ToneMark::Acute), - 'f' => add_tone(&mut word, &ToneMark::Grave), - 'r' => add_tone(&mut word, &ToneMark::HookAbove), - 'x' => add_tone(&mut word, &ToneMark::Tilde), - 'j' => add_tone(&mut word, &ToneMark::Underdot), - 'z' => remove_tone(&mut word), - 'a' | 'e' | 'o' if word.vowel.to_ascii_lowercase().contains(ch_lowercase) => { - modify_letter(&mut word, &LetterModification::Circumflex) - } - 'w' if ư_inserted_previously => { - word.replace_last_char(ch); - Transformation::LetterModificationRemoved - } - 'w' => match modify_letter(&mut word, &LetterModification::Horn) { - Transformation::Ignored | Transformation::LetterModificationRemoved => { - match modify_letter(&mut word, &LetterModification::Breve) { - Transformation::Ignored | Transformation::LetterModificationRemoved => { - let transformation = - if word.vowel.is_empty() || word.to_string() == "gi" { - word.push(if ch.is_lowercase() { 'u' } else { 'U' }); - let last_index = word.len() - 1; - word.letter_modifications - .push((last_index, LetterModification::Horn)); - Transformation::LetterModificationAdded - } else { - Transformation::Ignored - }; - ư_inserted_previously = transformation != Transformation::Ignored; - transformation - } - transformation => transformation, - } - } - transformation => transformation, - }, - 'd' => modify_letter(&mut word, &LetterModification::Dyet), - _ => Transformation::Ignored, - }; - - if transformation == Transformation::ToneMarkRemoved { - tone_mark_removed = true; - } - - if transformation == Transformation::LetterModificationRemoved { - letter_modification_removed = true; - } - - let initial_ư_removed = Transformation::LetterModificationRemoved == transformation - && ư_inserted_previously - && word.len() == 1; - - let action_performed = match transformation { - Transformation::LetterModificationRemoved if initial_ư_removed => true, - Transformation::Ignored | Transformation::LetterModificationRemoved => false, - // If tone mark was intentionally removed with z character then it's count as an action. - Transformation::ToneMarkRemoved => ch_lowercase == 'z', - _ => true, - }; - - if !action_performed { - word.push(ch); - } else if !initial_ư_removed && !is_valid_word(&word.to_string()) { - word.set(fallback); - } - - if initial_ư_removed { - ư_inserted_previously = false; - } - } - output.push_str(&word.to_string()); - - TransformResult { - tone_mark_removed, - letter_modification_removed, - } + crate::transform_buffer(&crate::TELEX, buffer, output) } diff --git a/src/util.rs b/src/util.rs index 93c29c4..a13e6b6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,3 +1,4 @@ +//! Useful utilties functions that might be helpful for developing a Vietnamese IME. use crate::maps::VOWELS; /// Strip off tone mark & modifications from an input char. diff --git a/src/vni.rs b/src/vni.rs index a187535..fcad959 100644 --- a/src/vni.rs +++ b/src/vni.rs @@ -1,12 +1,4 @@ -//! The vni method transformation -use crate::{ - processor::{add_tone, modify_letter, remove_tone, Transformation}, - validation::is_valid_word, - word::Word, - TransformResult, -}; - -use super::processor::{LetterModification, ToneMark}; +use crate::TransformResult; /// Transform input buffer containing a single word to vietnamese string output using vni mode. /// @@ -18,57 +10,10 @@ use super::processor::{LetterModification, ToneMark}; /// transform_buffer("viet65".chars(), &mut result); /// assert_eq!(result, "việt".to_owned()); /// ``` +#[deprecated(since = "0.7.0", note = "please use `vi::transform_buffer` instead")] pub fn transform_buffer(buffer: I, output: &mut String) -> TransformResult where I: IntoIterator, { - let mut word = Word::empty(); - let mut tone_mark_removed = false; - let mut letter_modification_removed = false; - - for ch in buffer { - let fallback = format!("{}{}", word, ch); - - let transformation = match ch { - '1' => add_tone(&mut word, &ToneMark::Acute), - '2' => add_tone(&mut word, &ToneMark::Grave), - '3' => add_tone(&mut word, &ToneMark::HookAbove), - '4' => add_tone(&mut word, &ToneMark::Tilde), - '5' => add_tone(&mut word, &ToneMark::Underdot), - '6' => modify_letter(&mut word, &LetterModification::Circumflex), - '7' => modify_letter(&mut word, &LetterModification::Horn), - '8' => modify_letter(&mut word, &LetterModification::Breve), - '9' => modify_letter(&mut word, &LetterModification::Dyet), - '0' => remove_tone(&mut word), - _ => Transformation::Ignored, - }; - - if transformation == Transformation::ToneMarkRemoved { - tone_mark_removed = true; - } - - if transformation == Transformation::LetterModificationRemoved { - letter_modification_removed = true; - } - - let action_performed = match transformation { - Transformation::Ignored | Transformation::LetterModificationRemoved => false, - // If tone mark was intentionally removed with 0 character then it's count as an action. - Transformation::ToneMarkRemoved => ch == '0', - _ => true, - }; - - if !action_performed { - word.push(ch); - } else if !is_valid_word(&word.to_string()) { - word.set(fallback); - } - } - - output.push_str(&word.to_string()); - - TransformResult { - tone_mark_removed, - letter_modification_removed, - } + crate::transform_buffer(&crate::VNI, buffer, output) } diff --git a/testdata/output/telex__simple_telex.snap b/testdata/output/telex__simple_telex.snap index c6af1e8..b39e2df 100644 --- a/testdata/output/telex__simple_telex.snap +++ b/testdata/output/telex__simple_telex.snap @@ -38,7 +38,7 @@ daasd aa dd chuw -chww +chw ựo chuyện quăng @@ -68,7 +68,7 @@ trường cười hường chửa -chuă +chuảwz ừa w ư diff --git a/tests/telex.rs b/tests/telex.rs index 52a3ad3..a04fc6d 100644 --- a/tests/telex.rs +++ b/tests/telex.rs @@ -3,7 +3,7 @@ mod shared; fn snapshot_transform(lines: &str) -> String { shared::transform_lines(lines, |word| { let mut trasformed_word = String::new(); - vi::telex::transform_buffer(word.chars(), &mut trasformed_word); + vi::transform_buffer(&vi::TELEX, word.chars(), &mut trasformed_word); trasformed_word }) } diff --git a/tests/vni.rs b/tests/vni.rs index eb7ff7e..bb88c23 100644 --- a/tests/vni.rs +++ b/tests/vni.rs @@ -3,7 +3,7 @@ mod shared; fn snapshot_transform(lines: &str) -> String { shared::transform_lines(lines, |word| { let mut trasformed_word = String::new(); - vi::vni::transform_buffer(word.chars(), &mut trasformed_word); + vi::transform_buffer(&vi::VNI, word.chars(), &mut trasformed_word); trasformed_word }) }