View Javadoc
1   package com.acumenvelocity.ath.steps;
2   
3   import java.util.ArrayList;
4   import java.util.List;
5   
6   import com.acumenvelocity.ath.common.Const;
7   import com.acumenvelocity.ath.common.ControllerUtil;
8   import com.acumenvelocity.ath.common.Log;
9   import com.acumenvelocity.ath.common.OkapiUtil;
10  import com.acumenvelocity.ath.gct.v3.AthTranslation;
11  
12  import net.sf.okapi.common.Event;
13  import net.sf.okapi.common.IParameters;
14  import net.sf.okapi.common.IResource;
15  import net.sf.okapi.common.MimeTypeMapper;
16  import net.sf.okapi.common.Util;
17  import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
18  import net.sf.okapi.common.query.MatchType;
19  import net.sf.okapi.common.resource.ITextUnit;
20  import net.sf.okapi.common.resource.Segment;
21  import net.sf.okapi.common.resource.TextContainer;
22  import net.sf.okapi.common.resource.TextFragment;
23  import net.sf.okapi.common.resource.TextFragmentUtil;
24  import net.sf.okapi.lib.translation.QueryUtil;
25  
26  public class BatchMtStep extends BaseTuBatchProcessingStep {
27  
28    private BatchMtParameters params = new BatchMtParameters();
29    private final List<String> sourceSegments = new ArrayList<>();
30    private final List<SegmentInfo> segmentInfos = new ArrayList<>();
31    private final QueryUtil qutil = new QueryUtil();
32  
33    @Override
34    public String getName() {
35      return "Batch Google Cloud Translation v3";
36    }
37  
38    @Override
39    public String getDescription() {
40      return "Translates a batch of collected TU segments with the "
41          + "Google Cloud Translate v3 service";
42    }
43  
44    @Override
45    public BatchMtParameters getParameters() {
46      return params;
47    }
48  
49    @Override
50    public void setParameters(IParameters params) {
51      this.params = (BatchMtParameters) params;
52    }
53  
54    @Override
55    protected void clear() {
56      sourceSegments.clear();
57      segmentInfos.clear();
58    }
59  
60    /**
61     * Pre-process text units: collect source segments
62     */
63    private void preProcessTextUnit(ITextUnit tu) {
64      TextContainer source = tu.getSource();
65  
66      if (source == null) {
67        Log.error(getClass(), "Source of TU '{}' is null", tu.getId());
68        return;
69      }
70  
71      // Process each segment in the text unit
72      for (Segment segment : source.getSegments()) {
73        TextFragment content = segment.getContent();
74  
75        // Skip empty segments
76        if (content == null || content.isEmpty()) {
77          Log.trace(getClass(), "Skipping empty segment in TU '{}'", tu.getId());
78          continue;
79        }
80  
81        String sourceText = null;
82  
83        if (params.isMtSendPlainText()) {
84          sourceText = content.getText();
85          params.setMimeType(MimeTypeMapper.PLAIN_TEXT_MIME_TYPE);
86  
87        } else {
88          sourceText = qutil.toCodedHTML(content);
89          params.setMimeType(MimeTypeMapper.HTML_MIME_TYPE);
90        }
91  
92        sourceSegments.add(sourceText);
93  
94        // Store segment info for later mapping
95        segmentInfos.add(new SegmentInfo(tu, segment.getId()));
96  
97        Log.trace(getClass(), "Collected segment [{}]: '{}'",
98            sourceSegments.size() - 1, sourceText);
99      }
100   }
101 
102   /**
103    * Post-process text units: set target segments
104    */
105   private void postProcessTextUnits(List<String> targetSegments) {
106     if (Util.isEmpty(targetSegments)) {
107       Log.warn(getClass(), "No evaluation results available");
108       return;
109     }
110 
111     if (targetSegments.size() != segmentInfos.size()) {
112       Log.error(getClass(), "Mismatch: {} target segments, but {} segment infos",
113           targetSegments.size(), segmentInfos.size());
114 
115       return;
116     }
117 
118     // Process each target segment
119     for (int i = 0; i < targetSegments.size(); i++) {
120       String targetText = targetSegments.get(i);
121       SegmentInfo segInfo = segmentInfos.get(i);
122 
123       ITextUnit tu = segInfo.textUnit;
124       TextContainer source = tu.getSource();
125       String segmentId = segInfo.segmentId;
126 
127       // Get or create target container
128       TextContainer target = tu.getTarget(getTargetLocale());
129 
130       if (target == null) {
131         target = tu.createTarget(getTargetLocale(), false, IResource.COPY_SEGMENTATION);
132         Log.trace(getClass(), "Created target container for TU '{}'", tu.getId());
133       }
134 
135       // Get or create target segment
136       Segment sseg = source.getSegments().get(segmentId);
137       Segment tseg = target.getSegments().get(segmentId);
138 
139       if (tseg == null) {
140         tseg = new Segment(segmentId);
141         target.append(tseg);
142 
143         Log.trace(getClass(), "Created target segment '{}' in TU '{}'", segmentId, tu.getId());
144       }
145 
146       TextFragment targetTf;
147 
148       if (params.isMtSendPlainText()) {
149         targetTf = new TextFragment(targetText);
150 
151       } else {
152         targetTf = qutil.fromCodedHTMLToFragment(targetText, null);
153         TextFragment segSource = sseg.getContent();
154 
155         OkapiUtil.removeExtraCodes(segSource.getCodes(), targetTf);
156 
157         // Align codes and copy metadata from source to target
158         TextFragmentUtil.alignAndCopyCodeMetadata(segSource, targetTf, true, true);
159 
160         // Rearrange opening and closing codes
161         OkapiUtil.rearrangeCodes(segSource.getCodes(), targetTf);
162       }
163 
164       tseg.setContent(targetTf);
165 
166       // Mark as MT
167       AltTranslationsAnnotation ata = new AltTranslationsAnnotation();
168 
169       ata.add(getSourceLocale(), getTargetLocale(), sseg.getContent(), sseg.getContent(), targetTf,
170           MatchType.MT, 95, Const.MT_PROVIDER_GOOGLE_MT_V3, 95, 95);
171 
172       tseg.setAnnotation(ata);
173     }
174   }
175 
176   @Override
177   protected void processTuEvents(List<Event> tuEvents) {
178     for (Event tue : tuEvents) {
179       ITextUnit tu = tue.getTextUnit();
180       preProcessTextUnit(tu);
181     }
182 
183     Log.info(getClass(), "Collected {} source segments from {} text units",
184         sourceSegments.size(), tuEvents.size());
185 
186     List<String> targetSegments = AthTranslation.translateBatch(
187         sourceSegments,
188         getSourceLocale().toString(),
189         getTargetLocale().toString(),
190         MimeTypeMapper.PLAIN_TEXT_MIME_TYPE,
191         ControllerUtil.getProjectId(),
192         params.getProjectLocation(),        
193         params.getModelProjectId(),
194         params.getModelProjectLocation(),
195         params.getModelId(),        
196         params.getGlossaryProjectId(),
197         params.getGlossaryProjectLocation(),
198         params.getGlossaryId());
199 
200     // translateBatchWithModel(
201     // AthTranslation.getClient(),
202     // sourceSegments,
203     // getSourceLocale().toString(),
204     // getTargetLocale().toString());
205 
206     postProcessTextUnits(targetSegments);
207   }
208 
209 }