1 package com.acumenvelocity.ath.steps;
2
3 import java.util.ArrayList;
4 import java.util.List;
5
6 import com.acumenvelocity.ath.common.Const;
7 import com.acumenvelocity.ath.common.ControllerUtil;
8 import com.acumenvelocity.ath.common.Log;
9 import com.acumenvelocity.ath.common.OkapiUtil;
10 import com.acumenvelocity.ath.gct.v3.AthTranslation;
11
12 import net.sf.okapi.common.Event;
13 import net.sf.okapi.common.IParameters;
14 import net.sf.okapi.common.IResource;
15 import net.sf.okapi.common.MimeTypeMapper;
16 import net.sf.okapi.common.Util;
17 import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
18 import net.sf.okapi.common.query.MatchType;
19 import net.sf.okapi.common.resource.ITextUnit;
20 import net.sf.okapi.common.resource.Segment;
21 import net.sf.okapi.common.resource.TextContainer;
22 import net.sf.okapi.common.resource.TextFragment;
23 import net.sf.okapi.common.resource.TextFragmentUtil;
24 import net.sf.okapi.lib.translation.QueryUtil;
25
26 public class BatchMtStep extends BaseTuBatchProcessingStep {
27
28 private BatchMtParameters params = new BatchMtParameters();
29 private final List<String> sourceSegments = new ArrayList<>();
30 private final List<SegmentInfo> segmentInfos = new ArrayList<>();
31 private final QueryUtil qutil = new QueryUtil();
32
33 @Override
34 public String getName() {
35 return "Batch Google Cloud Translation v3";
36 }
37
38 @Override
39 public String getDescription() {
40 return "Translates a batch of collected TU segments with the "
41 + "Google Cloud Translate v3 service";
42 }
43
44 @Override
45 public BatchMtParameters getParameters() {
46 return params;
47 }
48
49 @Override
50 public void setParameters(IParameters params) {
51 this.params = (BatchMtParameters) params;
52 }
53
54 @Override
55 protected void clear() {
56 sourceSegments.clear();
57 segmentInfos.clear();
58 }
59
60
61
62
63 private void preProcessTextUnit(ITextUnit tu) {
64 TextContainer source = tu.getSource();
65
66 if (source == null) {
67 Log.error(getClass(), "Source of TU '{}' is null", tu.getId());
68 return;
69 }
70
71
72 for (Segment segment : source.getSegments()) {
73 TextFragment content = segment.getContent();
74
75
76 if (content == null || content.isEmpty()) {
77 Log.trace(getClass(), "Skipping empty segment in TU '{}'", tu.getId());
78 continue;
79 }
80
81 String sourceText = null;
82
83 if (params.isMtSendPlainText()) {
84 sourceText = content.getText();
85 params.setMimeType(MimeTypeMapper.PLAIN_TEXT_MIME_TYPE);
86
87 } else {
88 sourceText = qutil.toCodedHTML(content);
89 params.setMimeType(MimeTypeMapper.HTML_MIME_TYPE);
90 }
91
92 sourceSegments.add(sourceText);
93
94
95 segmentInfos.add(new SegmentInfo(tu, segment.getId()));
96
97 Log.trace(getClass(), "Collected segment [{}]: '{}'",
98 sourceSegments.size() - 1, sourceText);
99 }
100 }
101
102
103
104
105 private void postProcessTextUnits(List<String> targetSegments) {
106 if (Util.isEmpty(targetSegments)) {
107 Log.warn(getClass(), "No evaluation results available");
108 return;
109 }
110
111 if (targetSegments.size() != segmentInfos.size()) {
112 Log.error(getClass(), "Mismatch: {} target segments, but {} segment infos",
113 targetSegments.size(), segmentInfos.size());
114
115 return;
116 }
117
118
119 for (int i = 0; i < targetSegments.size(); i++) {
120 String targetText = targetSegments.get(i);
121 SegmentInfo segInfo = segmentInfos.get(i);
122
123 ITextUnit tu = segInfo.textUnit;
124 TextContainer source = tu.getSource();
125 String segmentId = segInfo.segmentId;
126
127
128 TextContainer target = tu.getTarget(getTargetLocale());
129
130 if (target == null) {
131 target = tu.createTarget(getTargetLocale(), false, IResource.COPY_SEGMENTATION);
132 Log.trace(getClass(), "Created target container for TU '{}'", tu.getId());
133 }
134
135
136 Segment sseg = source.getSegments().get(segmentId);
137 Segment tseg = target.getSegments().get(segmentId);
138
139 if (tseg == null) {
140 tseg = new Segment(segmentId);
141 target.append(tseg);
142
143 Log.trace(getClass(), "Created target segment '{}' in TU '{}'", segmentId, tu.getId());
144 }
145
146 TextFragment targetTf;
147
148 if (params.isMtSendPlainText()) {
149 targetTf = new TextFragment(targetText);
150
151 } else {
152 targetTf = qutil.fromCodedHTMLToFragment(targetText, null);
153 TextFragment segSource = sseg.getContent();
154
155 OkapiUtil.removeExtraCodes(segSource.getCodes(), targetTf);
156
157
158 TextFragmentUtil.alignAndCopyCodeMetadata(segSource, targetTf, true, true);
159
160
161 OkapiUtil.rearrangeCodes(segSource.getCodes(), targetTf);
162 }
163
164 tseg.setContent(targetTf);
165
166
167 AltTranslationsAnnotation ata = new AltTranslationsAnnotation();
168
169 ata.add(getSourceLocale(), getTargetLocale(), sseg.getContent(), sseg.getContent(), targetTf,
170 MatchType.MT, 95, Const.MT_PROVIDER_GOOGLE_MT_V3, 95, 95);
171
172 tseg.setAnnotation(ata);
173 }
174 }
175
176 @Override
177 protected void processTuEvents(List<Event> tuEvents) {
178 for (Event tue : tuEvents) {
179 ITextUnit tu = tue.getTextUnit();
180 preProcessTextUnit(tu);
181 }
182
183 Log.info(getClass(), "Collected {} source segments from {} text units",
184 sourceSegments.size(), tuEvents.size());
185
186 List<String> targetSegments = AthTranslation.translateBatch(
187 sourceSegments,
188 getSourceLocale().toString(),
189 getTargetLocale().toString(),
190 MimeTypeMapper.PLAIN_TEXT_MIME_TYPE,
191 ControllerUtil.getProjectId(),
192 params.getProjectLocation(),
193 params.getModelProjectId(),
194 params.getModelProjectLocation(),
195 params.getModelId(),
196 params.getGlossaryProjectId(),
197 params.getGlossaryProjectLocation(),
198 params.getGlossaryId());
199
200
201
202
203
204
205
206 postProcessTextUnits(targetSegments);
207 }
208
209 }