View Javadoc
1   package com.acumenvelocity.ath.mt.confidence;
2   
3   import java.util.List;
4   import java.util.stream.Collectors;
5   
6   import com.acumenvelocity.ath.mt.confidence.CommonDataStructures.LanguagePairSupport;
7   
8   import net.sf.okapi.common.resource.TextFragment;
9   
10  /**
11   * Data structures for the HybridQualityEstimator.
12   */
13  public class HybridDataStructures {
14  
15    public enum EvaluationStrategy {
16      VERTEX_ONLY("Vertex AI only (MetricX/COMET)"),
17      VERTEX_WITH_HEURISTIC_VALIDATION("Vertex AI primary with Heuristic Validation"),
18      VERTEX_WITH_HEURISTIC_SUPPLEMENT("Vertex AI (COMET) with Heuristic Supplement"),
19      HEURISTIC_ONLY("Heuristic-only evaluation");
20  
21      private final String description;
22  
23      EvaluationStrategy(String description) {
24        this.description = description;
25      }
26  
27      @Override
28      public String toString() {
29        return description;
30      }
31    }
32  
33    /**
34     * Represents a single translation with combined scores from all estimators.
35     */
36    public static class HybridTranslationScore {
37      private final String modelId;
38      private final String translation;
39      private final double confidence; // Final, adjusted score
40      private final Double metricXScore;
41      private final Double cometScore;
42      private final Double heuristicScore;
43      private final String method; // e.g., "vertex_validated", "heuristic_only"
44      private boolean anomalyFlagged;
45      private String anomalyReason;
46  
47      public HybridTranslationScore(String modelId, String translation, double confidence,
48          Double metricXScore, Double cometScore, Double heuristicScore, String method) {
49        
50        this.modelId = modelId;
51        this.translation = translation;
52        this.confidence = confidence;
53        this.metricXScore = metricXScore;
54        this.cometScore = cometScore;
55        this.heuristicScore = heuristicScore;
56        this.method = method;
57        this.anomalyFlagged = false;
58      }
59  
60      public void flagAnomaly(String reason) {
61        this.anomalyFlagged = true;
62        this.anomalyReason = reason;
63      }
64  
65      public String getModelId() {
66        return modelId;
67      }
68  
69      public String getTranslation() {
70        return translation;
71      }
72  
73      public double getConfidence() {
74        return confidence;
75      }
76  
77      public Double getMetricXScore() {
78        return metricXScore;
79      }
80  
81      public Double getCometScore() {
82        return cometScore;
83      }
84  
85      public Double getHeuristicScore() {
86        return heuristicScore;
87      }
88  
89      public String getMethod() {
90        return method;
91      }
92  
93      public boolean isAnomalyFlagged() {
94        return anomalyFlagged;
95      }
96  
97      public String getAnomalyReason() {
98        return anomalyReason;
99      }
100 
101     @Override
102     public String toString() {
103       String anomaly = anomalyFlagged ? String.format(" [ANOMALY: %s]", anomalyReason) : "";
104       
105       return String.format("[%s, %s] %s | Confidence: %.3f | COMET: %.3f | Heuristic: %.3f%s",
106           modelId,
107           method,
108           translation.substring(0, Math.min(translation.length(), 60))
109               + (translation.length() > 60 ? "..." : ""),
110           confidence,
111           cometScore != null ? cometScore : Double.NaN,
112           heuristicScore != null ? heuristicScore : Double.NaN,
113           anomaly);
114     }
115   }
116 
117   /**
118    * Represents all hybrid-scored translations for a single source segment.
119    */
120   public static class HybridScoredSegment {
121     private final int segmentId;
122     private final String sourceText;
123     private final TextFragment sourceTf;
124     private final List<HybridTranslationScore> scores;
125 
126     public HybridScoredSegment(int segmentId, String sourceText, TextFragment sourceTf,
127         List<HybridTranslationScore> scores) {
128       
129       this.segmentId = segmentId;
130       this.sourceText = sourceText;
131       this.sourceTf = sourceTf;
132       this.scores = scores;
133     }
134 
135     public int getSegmentId() {
136       return segmentId;
137     }
138 
139     public String getSourceText() {
140       return sourceText;
141     }
142 
143     public List<HybridTranslationScore> getScores() {
144       return scores;
145     }
146 
147     public HybridTranslationScore getBestTranslation() {
148       return scores.isEmpty() ? null : scores.get(0);
149     }
150 
151     public void printReport() {
152       System.out.printf("\n=== Segment %d ===%n", segmentId);
153       System.out.printf("Source: %s%n", sourceText);
154       System.out.println("\nRanked Translations:");
155       
156       for (int i = 0; i < scores.size(); i++) {
157         System.out.printf("%d. %s%n", i + 1, scores.get(i));
158       }
159     }
160 
161     public TextFragment getSourceTf() {
162       return sourceTf;
163     }
164   }
165 
166   /**
167    * The final result object for the hybrid evaluation.
168    */
169   public static class HybridEvaluationResult {
170     private final List<HybridScoredSegment> segments;
171     private final EvaluationStrategy strategy;
172     private final LanguagePairSupport support;
173 
174     public HybridEvaluationResult(List<HybridScoredSegment> segments, EvaluationStrategy strategy,
175         LanguagePairSupport support) {
176       
177       this.segments = segments;
178       this.strategy = strategy;
179       this.support = support;
180     }
181 
182     public List<HybridScoredSegment> getSegments() {
183       return segments;
184     }
185 
186     public EvaluationStrategy getStrategy() {
187       return strategy;
188     }
189 
190     public LanguagePairSupport getSupport() {
191       return support;
192     }
193 
194     public void printFullReport() {
195       System.out.println("\n========================================");
196       System.out.println("====== HYBRID EVALUATION REPORT =======");
197       System.out.println("========================================");
198       System.out.println(support);
199       System.out.printf("Evaluation Strategy: %s%n", strategy);
200 
201       for (HybridScoredSegment segment : segments) {
202         segment.printReport();
203       }
204 
205       // Summarize
206       System.out.println("\n========================================");
207       System.out.println("Average Confidence by Model:");
208       
209       segments.stream()
210           .flatMap(s -> s.getScores().stream())
211           .collect(Collectors.groupingBy(HybridTranslationScore::getModelId,
212               Collectors.averagingDouble(HybridTranslationScore::getConfidence)))
213           .forEach((model, avg) -> System.out.printf(" %s: %.3f%n", model, avg));
214       
215       System.out.println("========================================");
216     }
217   }
218 }