Skip to content

Commit

Permalink
updated dependencies; most notably upgrade to rustfst 0.11.5 required…
Browse files Browse the repository at this point in the history
… some type adaptations
  • Loading branch information
proycon committed Jul 26, 2022
1 parent 25c0a95 commit c8537a5
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ bitflags = "1.3.2"
clap = "2.34.0"
ibig = "0.3.5"
num-traits = "0.2.15"
rayon = "1.5.2"
rustfst = "0.8.1"
rayon = "1.5.3"
rustfst = "0.11.5"
sesdiff = "0.3.0"
simple-error = "0.2.3"

[dev-dependencies]
criterion = "0.3.5"
criterion = "0.3.6"
13 changes: 7 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use std::sync::Arc;
use std::cmp::Ordering;
use std::str::FromStr;
use std::error::Error;
use std::convert::TryInto;
use rayon::prelude::*;
use rustfst::prelude::*;

Expand Down Expand Up @@ -1682,7 +1683,7 @@ impl VariantModel {

//adds states for all boundaries
let mut final_found = false;
let states: Vec<usize> = boundaries.iter().map(|boundary| {
let states: Vec<u32> = boundaries.iter().map(|boundary| {
let state = fst.add_state();
if boundary.offset.begin == end_offset || boundary.offset.end == end_offset {
final_found = true;
Expand Down Expand Up @@ -1713,7 +1714,7 @@ impl VariantModel {
let mut prevboundary: Option<usize> = None;
let mut nextboundary: Option<usize> = None;

let input_symbol = match_index + 1;
let input_symbol = (match_index + 1) as u32;

for (i, boundary) in boundaries.iter().enumerate() {
if m.offset.begin == boundary.offset.end {
Expand All @@ -1735,7 +1736,7 @@ impl VariantModel {

if m.variants.is_some() && !m.variants.as_ref().unwrap().is_empty() {
for (variant_index, variantresult) in m.variants.as_ref().unwrap().iter().enumerate() {
let output_symbol = output_symbols.len();
let output_symbol = output_symbols.len() as u32;
output_symbols.push( OutputSymbol {
vocab_id: variantresult.vocab_id,
symbol: output_symbol,
Expand All @@ -1750,7 +1751,7 @@ impl VariantModel {
variant_text += format!(" ({})", output_symbol).as_str(); //we encode the output symbol in the text otherwise the symbol table returns the old match
eprintln!(" (transition state {}->{}: {} ({}) -> {} and variant score {})", prevstate, nextstate, m.text, input_symbol, variant_text, -1.0 * variantresult.score(params.freq_weight).ln() as f32);
let osym = symtab_out.add_symbol(variant_text);
assert!(osym == output_symbol);
assert!(osym == output_symbol.try_into().unwrap());
}

//each transition gets a base cost of n (the number of input tokens it covers)
Expand All @@ -1760,7 +1761,7 @@ impl VariantModel {
fst.add_tr(prevstate, Tr::new(input_symbol, output_symbol, cost, nextstate)).expect("adding transition");
}
} else if n == 1 { //only for unigrams
let output_symbol = output_symbols.len();
let output_symbol = output_symbols.len() as u32;
output_symbols.push( OutputSymbol {
vocab_id: 0, //0 vocab_id means we have an Out-of-Vocabulary word to copy from input
symbol: output_symbol,
Expand Down Expand Up @@ -1838,7 +1839,7 @@ impl VariantModel {
eprintln!(" (#{}, path: {:?})", i+1, path);
}
for output_symbol in path.olabels.iter() {
let output_symbol = output_symbols.get(*output_symbol).expect("expected valid output symbol");
let output_symbol = output_symbols.get(*output_symbol as usize).expect("expected valid output symbol");
sequence.output_symbols.push(output_symbol.clone());
}
if self.have_lm && params.lm_weight > 0.0 {
Expand Down
2 changes: 1 addition & 1 deletion src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pub struct OutputSymbol {
pub boundary_index: usize,
/// ID of this symbol (each symbol is unlike, but multiple symbols can refers to the same vocab_id).
/// The 0 symbol is reserved for epsilon in the underlying FST implementation
pub symbol: usize,
pub symbol: u32,
}


Expand Down

0 comments on commit c8537a5

Please sign in to comment.