@@ -548,7 +548,8 @@ mod tests {
548
548
( "abcd" . to_string( ) , 10.0 ) ,
549
549
] ;
550
550
551
- let model = Unigram :: from ( sentencepieces, Some ( 0 ) , false ) . unwrap ( ) ;
551
+ let model =
552
+ Unigram :: from ( sentencepieces, Some ( 0 ) , false , & AddedVocabulary :: default ( ) ) . unwrap ( ) ;
552
553
let result = model. encode ( "abcd" ) . unwrap ( ) ;
553
554
assert_eq ! ( result, vec![ "abcd" ] ) ;
554
555
}
@@ -570,7 +571,8 @@ mod tests {
570
571
( "qr" . to_string( ) , -0.5 ) ,
571
572
] ;
572
573
573
- let mut model = Unigram :: from ( sentencepieces, Some ( 0 ) , false ) . unwrap ( ) ;
574
+ let mut model =
575
+ Unigram :: from ( sentencepieces, Some ( 0 ) , false , & AddedVocabulary :: default ( ) ) . unwrap ( ) ;
574
576
575
577
for is_optimized in & [ true , false ] {
576
578
model. set_optimized ( * is_optimized) ;
@@ -617,7 +619,8 @@ mod tests {
617
619
( "<0xC3>" . to_string( ) , -0.01 ) ,
618
620
( "<0xA9>" . to_string( ) , -0.03 ) ,
619
621
] ;
620
- let unigram = Unigram :: from ( sentencepieces, Some ( 0 ) , true ) . unwrap ( ) ;
622
+ let unigram =
623
+ Unigram :: from ( sentencepieces, Some ( 0 ) , true , & AddedVocabulary :: default ( ) ) . unwrap ( ) ;
621
624
let tokens: Vec < Token > = unigram. tokenize ( "é" ) . unwrap ( ) ;
622
625
assert_eq ! (
623
626
tokens,
0 commit comments