@@ -9,25 +9,6 @@ use crate::user_history::UserHistory;
99use super :: cost:: { conn_cost, script_cost} ;
1010use super :: viterbi:: { RichSegment , ScoredPath } ;
1111
12- /// Non-independent kanji penalty for a segment.
13- /// Returns penalty (> 0) if the segment is non-independent (形式名詞/補助動詞) with kanji surface.
14- pub ( super ) fn non_independent_kanji_penalty ( seg : & RichSegment , conn : & ConnectionMatrix ) -> i64 {
15- if conn. is_non_independent ( seg. left_id ) && seg. surface . chars ( ) . any ( is_kanji) {
16- settings ( ) . reranker . non_independent_kanji_penalty
17- } else {
18- 0
19- }
20- }
21-
22- /// Pronoun cost bonus for a segment (positive value = cost reduction).
23- pub ( super ) fn pronoun_bonus ( seg : & RichSegment , conn : & ConnectionMatrix ) -> i64 {
24- if conn. is_pronoun ( seg. left_id ) {
25- settings ( ) . reranker . pronoun_cost_bonus
26- } else {
27- 0
28- }
29- }
30-
3112/// Te-form kanji penalty for a segment that follows て/で.
3213/// `prev` is the preceding segment (None for the first segment).
3314pub ( super ) fn te_form_kanji_penalty (
@@ -46,16 +27,6 @@ pub(super) fn te_form_kanji_penalty(
4627 0
4728}
4829
49- /// Person name penalty for a segment.
50- /// Returns penalty (> 0) if the segment is a person name (人名: 一般/姓/名; role == 6).
51- pub ( super ) fn person_name_penalty ( seg : & RichSegment , conn : & ConnectionMatrix ) -> i64 {
52- if conn. is_person_name ( seg. left_id ) {
53- settings ( ) . reranker . person_name_penalty
54- } else {
55- 0
56- }
57- }
58-
5930/// Single-char kanji content-word penalty with dictionary compound exemption.
6031pub ( super ) fn single_char_kanji_penalty (
6132 seg : & RichSegment ,
@@ -222,20 +193,16 @@ pub fn rerank(
222193 . sum ( ) ;
223194 path. viterbi_cost += total_script;
224195
225- // Per-segment penalties: non-independent kanji, pronoun bonus,
226- // te-form kanji, single-char kanji content-word.
196+ // Per-segment penalties: te-form kanji, single-char kanji content-word.
227197 if let Some ( conn) = conn {
228198 for ( i, seg) in path. segments . iter ( ) . enumerate ( ) {
229199 let prev = if i > 0 {
230200 Some ( & path. segments [ i - 1 ] )
231201 } else {
232202 None
233203 } ;
234- path. viterbi_cost += non_independent_kanji_penalty ( seg, conn) ;
235- path. viterbi_cost -= pronoun_bonus ( seg, conn) ;
236204 path. viterbi_cost += te_form_kanji_penalty ( prev, seg, conn) ;
237205 path. viterbi_cost += single_char_kanji_penalty ( seg, i, & path. segments , conn, dict) ;
238- path. viterbi_cost += person_name_penalty ( seg, conn) ;
239206 }
240207 }
241208 }
@@ -315,27 +282,6 @@ mod tests {
315282 }
316283 }
317284
318- #[ test]
319- fn non_independent_kanji_penalty_applied ( ) {
320- // ID 2 = non-independent (role 4), ID 1 = content word (role 0)
321- let roles = vec ! [ 0u8 , 0 , 4 ] ;
322- let conn = conn_with_roles ( roles) ;
323-
324- // Path A: こと (hiragana, non-independent) — no penalty
325- // Path B: 事 (kanji, non-independent) — penalty applied
326- let mut paths = vec ! [
327- path( vec![ seg( "こと" , "事" , 2 ) ] , 100 ) ,
328- path( vec![ seg( "こと" , "こと" , 2 ) ] , 100 ) ,
329- ] ;
330-
331- rerank ( & mut paths, Some ( & conn) , None ) ;
332-
333- // The hiragana path should rank higher (lower cost)
334- assert_eq ! ( paths[ 0 ] . segments[ 0 ] . surface, "こと" ) ;
335- assert_eq ! ( paths[ 1 ] . segments[ 0 ] . surface, "事" ) ;
336- assert ! ( paths[ 0 ] . viterbi_cost < paths[ 1 ] . viterbi_cost) ;
337- }
338-
339285 /// Build a minimal ConnectionMatrix with the given roles vector and
340286 /// function-word ID range.
341287 fn conn_with_roles_and_fw ( roles : Vec < u8 > , fw_min : u16 , fw_max : u16 ) -> ConnectionMatrix {
@@ -344,30 +290,6 @@ mod tests {
344290 ConnectionMatrix :: new_owned ( num_ids, fw_min, fw_max, roles, costs)
345291 }
346292
347- #[ test]
348- fn non_independent_kanji_penalty_not_applied_to_content_words ( ) {
349- // ID 1 = content word (role 0)
350- let roles = vec ! [ 0u8 , 0 ] ;
351- let conn = conn_with_roles ( roles) ;
352-
353- // Both paths use content word IDs — no non-independent penalty
354- let mut paths = vec ! [
355- path( vec![ seg( "こと" , "事" , 1 ) ] , 100 ) ,
356- path( vec![ seg( "こと" , "こと" , 1 ) ] , 100 ) ,
357- ] ;
358-
359- rerank ( & mut paths, Some ( & conn) , None ) ;
360-
361- // Costs should differ only by script cost, not by non-independent penalty
362- let penalty = settings ( ) . reranker . non_independent_kanji_penalty ;
363- let cost_diff = ( paths[ 1 ] . viterbi_cost - paths[ 0 ] . viterbi_cost ) . abs ( ) ;
364- assert ! (
365- cost_diff < penalty,
366- "no non-independent penalty should be applied: diff = {}" ,
367- cost_diff
368- ) ;
369- }
370-
371293 #[ test]
372294 fn te_form_kanji_penalty_applied ( ) {
373295 // ID 2 = function word (fw_min=2, fw_max=2), ID 1 = content word
@@ -472,35 +394,6 @@ mod tests {
472394 assert ! ( paths[ 0 ] . viterbi_cost < paths[ 1 ] . viterbi_cost) ;
473395 }
474396
475- #[ test]
476- fn pronoun_bonus_applied ( ) {
477- // ID 2 = pronoun (role 5), ID 1 = content word (role 0)
478- let roles = vec ! [ 0u8 , 0 , 5 ] ;
479- let conn = conn_with_roles ( roles) ;
480-
481- // Both paths have the same surface (hiragana) to isolate pronoun bonus.
482- // Path A: pronoun POS (id=2) — bonus applied
483- // Path B: content word POS (id=1) — no bonus
484- let mut paths = vec ! [
485- path( vec![ seg( "どれ" , "どれ" , 2 ) ] , 1000 ) ,
486- path( vec![ seg( "どれ" , "どれ" , 1 ) ] , 1000 ) ,
487- ] ;
488-
489- rerank ( & mut paths, Some ( & conn) , None ) ;
490-
491- // The pronoun path should rank higher (lower cost) after bonus
492- assert_eq ! (
493- paths[ 0 ] . segments[ 0 ] . left_id, 2 ,
494- "pronoun path should rank first"
495- ) ;
496- let bonus = settings ( ) . reranker . pronoun_cost_bonus ;
497- let diff = paths[ 1 ] . viterbi_cost - paths[ 0 ] . viterbi_cost ;
498- assert_eq ! (
499- diff, bonus,
500- "cost difference should equal pronoun_cost_bonus"
501- ) ;
502- }
503-
504397 /// A minimal dictionary for testing compound exemption.
505398 struct MockDict {
506399 entries : Vec < ( String , Vec < DictEntry > ) > ,
@@ -720,35 +613,6 @@ mod tests {
720613 ) ;
721614 }
722615
723- #[ test]
724- fn person_name_penalty_applied ( ) {
725- // ID 2 = person name (role 6), ID 1 = content word (role 0)
726- let roles = vec ! [ 0u8 , 0 , 6 ] ;
727- let conn = conn_with_roles ( roles) ;
728-
729- // Both paths have the same hiragana surface to isolate person name penalty.
730- // Path A: person name POS (id=2) — penalty applied
731- // Path B: content word POS (id=1) — no penalty
732- let mut paths = vec ! [
733- path( vec![ seg( "にしま" , "にしま" , 2 ) ] , 1000 ) ,
734- path( vec![ seg( "にしま" , "にしま" , 1 ) ] , 1000 ) ,
735- ] ;
736-
737- rerank ( & mut paths, Some ( & conn) , None ) ;
738-
739- // The content word path should rank higher (lower cost)
740- assert_eq ! (
741- paths[ 0 ] . segments[ 0 ] . left_id, 1 ,
742- "content word path should rank first"
743- ) ;
744- let penalty = settings ( ) . reranker . person_name_penalty ;
745- let diff = paths[ 1 ] . viterbi_cost - paths[ 0 ] . viterbi_cost ;
746- assert_eq ! (
747- diff, penalty,
748- "cost difference should equal person_name_penalty"
749- ) ;
750- }
751-
752616 #[ test]
753617 fn te_form_kanji_penalty_not_applied_to_non_te_function_word ( ) {
754618 // ID 2 = function word (fw_min=2, fw_max=2), ID 1 = content word
0 commit comments