From c8537a5c260e422d1f7e6461146154902a76d0f8 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 26 Jul 2022 13:21:53 +0200 Subject: [PATCH] updated dependencies; most notably upgrade to rustfst 0.11.5 required some type adaptations --- Cargo.toml | 6 +++--- src/lib.rs | 13 +++++++------ src/search.rs | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b204ab1..e5b9720 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,10 +22,10 @@ bitflags = "1.3.2" clap = "2.34.0" ibig = "0.3.5" num-traits = "0.2.15" -rayon = "1.5.2" -rustfst = "0.8.1" +rayon = "1.5.3" +rustfst = "0.11.5" sesdiff = "0.3.0" simple-error = "0.2.3" [dev-dependencies] -criterion = "0.3.5" +criterion = "0.3.6" diff --git a/src/lib.rs b/src/lib.rs index 4cbee54..dab8a2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use std::cmp::Ordering; use std::str::FromStr; use std::error::Error; +use std::convert::TryInto; use rayon::prelude::*; use rustfst::prelude::*; @@ -1682,7 +1683,7 @@ impl VariantModel { //adds states for all boundaries let mut final_found = false; - let states: Vec = boundaries.iter().map(|boundary| { + let states: Vec = boundaries.iter().map(|boundary| { let state = fst.add_state(); if boundary.offset.begin == end_offset || boundary.offset.end == end_offset { final_found = true; @@ -1713,7 +1714,7 @@ impl VariantModel { let mut prevboundary: Option = None; let mut nextboundary: Option = None; - let input_symbol = match_index + 1; + let input_symbol = (match_index + 1) as u32; for (i, boundary) in boundaries.iter().enumerate() { if m.offset.begin == boundary.offset.end { @@ -1735,7 +1736,7 @@ impl VariantModel { if m.variants.is_some() && !m.variants.as_ref().unwrap().is_empty() { for (variant_index, variantresult) in m.variants.as_ref().unwrap().iter().enumerate() { - let output_symbol = output_symbols.len(); + let output_symbol = output_symbols.len() as u32; output_symbols.push( OutputSymbol { vocab_id: variantresult.vocab_id, symbol: output_symbol, @@ -1750,7 +1751,7 @@ impl VariantModel { variant_text += format!(" ({})", output_symbol).as_str(); //we encode the output symbol in the text otherwise the symbol table returns the old match eprintln!(" (transition state {}->{}: {} ({}) -> {} and variant score {})", prevstate, nextstate, m.text, input_symbol, variant_text, -1.0 * variantresult.score(params.freq_weight).ln() as f32); let osym = symtab_out.add_symbol(variant_text); - assert!(osym == output_symbol); + assert!(osym == output_symbol.try_into().unwrap()); } //each transition gets a base cost of n (the number of input tokens it covers) @@ -1760,7 +1761,7 @@ impl VariantModel { fst.add_tr(prevstate, Tr::new(input_symbol, output_symbol, cost, nextstate)).expect("adding transition"); } } else if n == 1 { //only for unigrams - let output_symbol = output_symbols.len(); + let output_symbol = output_symbols.len() as u32; output_symbols.push( OutputSymbol { vocab_id: 0, //0 vocab_id means we have an Out-of-Vocabulary word to copy from input symbol: output_symbol, @@ -1838,7 +1839,7 @@ impl VariantModel { eprintln!(" (#{}, path: {:?})", i+1, path); } for output_symbol in path.olabels.iter() { - let output_symbol = output_symbols.get(*output_symbol).expect("expected valid output symbol"); + let output_symbol = output_symbols.get(*output_symbol as usize).expect("expected valid output symbol"); sequence.output_symbols.push(output_symbol.clone()); } if self.have_lm && params.lm_weight > 0.0 { diff --git a/src/search.rs b/src/search.rs index 5febff0..a17b116 100644 --- a/src/search.rs +++ b/src/search.rs @@ -129,7 +129,7 @@ pub struct OutputSymbol { pub boundary_index: usize, /// ID of this symbol (each symbol is unlike, but multiple symbols can refers to the same vocab_id). /// The 0 symbol is reserved for epsilon in the underlying FST implementation - pub symbol: usize, + pub symbol: u32, }