1 package com.acumenvelocity.ath.common;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.Map;
7 import java.util.Objects;
8
9 import com.acumenvelocity.ath.model.InlineCode;
10 import com.acumenvelocity.ath.model.InlineCodeRef;
11 import com.acumenvelocity.ath.model.LayeredSegment;
12 import com.acumenvelocity.ath.model.MtTargetInfo;
13 import com.acumenvelocity.ath.model.x.LayeredTextX;
14 import com.acumenvelocity.ath.mt.confidence.ConfidenceScoredTranslation;
15
16 import net.sf.okapi.common.LocaleId;
17 import net.sf.okapi.common.annotation.AltTranslation;
18 import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
19 import net.sf.okapi.common.resource.Code;
20 import net.sf.okapi.common.resource.TextContainer;
21 import net.sf.okapi.common.resource.TextFragment;
22 import net.sf.okapi.common.resource.TextFragment.TagType;
23
24 public class ConversionUtil {
25
26
27
28
29
30
31
32 public static TextFragment toTextFragment(LayeredTextX fs) {
33 if (fs == null) {
34 return null;
35 }
36
37
38 String text = fs.getText();
39 if (text == null) {
40 text = "";
41 }
42
43 TextFragment tf = new TextFragment();
44 List<InlineCode> inlineCodes = fs.getCodes();
45
46 if (inlineCodes == null || inlineCodes.isEmpty()) {
47
48 tf.append(text);
49 return tf;
50 }
51
52
53 List<InlineCode> sortedCodes = new ArrayList<>(inlineCodes);
54 sortedCodes.sort((a, b) -> Integer.compare(a.getPosition(), b.getPosition()));
55
56 int currentPos = 0;
57 int textLength = text.length();
58
59 for (InlineCode ic : sortedCodes) {
60 int codePos = ic.getPosition();
61
62
63 if (codePos < 0) {
64 Log.warn(ConversionUtil.class,
65 "Invalid code position {} (negative) for code id={}, skipping",
66 codePos, ic.getId());
67
68 continue;
69 }
70
71 if (codePos > textLength) {
72 Log.warn(ConversionUtil.class,
73 "Invalid code position {} (exceeds text length {}) for code id={}, clamping to end",
74 codePos, textLength, ic.getId());
75
76 codePos = textLength;
77 }
78
79
80 if (codePos > currentPos) {
81
82
83 if (currentPos < textLength) {
84 int endPos = Math.min(codePos, textLength);
85 tf.append(text.substring(currentPos, endPos));
86 }
87 }
88
89
90 TagType tagType = ic.getTagType();
91
92
93 Code code = tf.append(
94 tagType,
95 ic.getType() != null ? ic.getType() : "",
96 ic.getData() != null ? ic.getData() : "");
97
98
99 if (ic.getId() != null) {
100 code.setId(ic.getId());
101 }
102
103 if (ic.getOuterData() != null && !ic.getOuterData().isEmpty()) {
104 code.setOuterData(ic.getOuterData());
105 }
106
107 if (ic.getDisplayText() != null) {
108 code.setDisplayText(ic.getDisplayText());
109 }
110
111 if (ic.getOriginalId() != null) {
112 code.setOriginalId(ic.getOriginalId());
113 }
114
115
116 if (ic.getFlag() != null) {
117 int flag = ic.getFlag();
118
119 if ((flag & 0x01) != 0) {
120 code.setReferenceFlag(true);
121 }
122
123 if ((flag & 0x02) != 0) {
124 code.setCloneable(true);
125 }
126
127 if ((flag & 0x04) != 0) {
128 code.setDeleteable(true);
129 }
130 }
131
132 currentPos = codePos;
133 }
134
135
136 if (currentPos < textLength) {
137 tf.append(text.substring(currentPos));
138 }
139
140 return tf;
141 }
142
143
144
145
146
147
148
149 public static LayeredTextX toLayeredText(TextFragment tf, LocaleId loc) {
150 if (tf == null) {
151 return null;
152 }
153
154 LayeredTextX fs = new LayeredTextX();
155
156 if (loc != null) {
157 fs.setLanguage(loc.toString());
158 }
159
160
161 fs.setText(tf.getText());
162
163
164 fs.setTextWithCodes(tf.toText());
165
166 List<InlineCode> inlineCodes = new ArrayList<>();
167
168 if (tf.hasCode()) {
169 String codedText = tf.getCodedText();
170
171 int textPosition = 0;
172
173 for (int i = 0; i < codedText.length(); i++) {
174 char ch = codedText.charAt(i);
175
176 if (TextFragment.isMarker(ch)) {
177
178 char indexChar = codedText.charAt(++i);
179 Code code = tf.getCode(indexChar);
180
181 InlineCode ic = new InlineCode();
182 ic.setPosition(textPosition);
183 ic.setId(code.getId());
184 ic.setType(code.getType());
185 ic.setData(code.getData());
186
187
188 ic.setTagType(code.getTagType());
189
190
191 if (code.getOuterData() != null && !code.getOuterData().isEmpty()) {
192 ic.setOuterData(code.getOuterData());
193 }
194
195 if (code.getDisplayText() != null) {
196 ic.setDisplayText(code.getDisplayText());
197 }
198
199 if (code.getOriginalId() != null) {
200 ic.setOriginalId(code.getOriginalId());
201 }
202
203
204 int flag = 0;
205 if (code.hasReference())
206 flag |= 0x01;
207
208 if (code.isCloneable())
209 flag |= 0x02;
210
211 if (code.isDeleteable())
212 flag |= 0x04;
213
214 ic.setFlag(flag);
215
216 inlineCodes.add(ic);
217
218 } else {
219
220 textPosition++;
221 }
222 }
223 }
224
225 fs.setCodes(inlineCodes);
226
227 return fs;
228 }
229
230 public static LayeredSegment toLayeredSegment(LayeredTextX slt, LayeredTextX tlt) {
231 LayeredSegment lseg = new LayeredSegment();
232
233 lseg.setSrcLang(slt.getLanguage());
234 lseg.setSrcText(slt.getText());
235
236 lseg.setTrgLang(tlt.getLanguage());
237 lseg.setTrgText(tlt.getText());
238
239
240 List<InlineCodeRef> codeRefs = new ArrayList<>();
241
242 for (InlineCode c : slt.getCodes()) {
243 InlineCodeRef cr = new InlineCodeRef();
244
245 cr.setId(c.getId());
246 cr.setTagType(c.getTagType());
247 cr.setPosition(c.getPosition());
248
249 codeRefs.add(cr);
250 }
251
252 lseg.setSrcCodes(codeRefs);
253
254
255 codeRefs = new ArrayList<>();
256
257 for (InlineCode c : tlt.getCodes()) {
258 InlineCodeRef cr = new InlineCodeRef();
259
260 cr.setId(c.getId());
261 cr.setTagType(c.getTagType());
262 cr.setPosition(c.getPosition());
263
264 codeRefs.add(cr);
265 }
266
267 lseg.setTrgCodes(codeRefs);
268
269 return lseg;
270 }
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295 public static LayeredTextX tltFromLayeredSegment(LayeredSegment lseg, LayeredTextX slt) {
296 if (lseg == null || slt == null) {
297 return null;
298 }
299
300 LayeredTextX result = new LayeredTextX();
301
302
303 result.setLanguage(lseg.getTrgLang());
304 result.setText(lseg.getTrgText());
305
306 int maxPos = lseg.getTrgText().length() == 0 ? 0 : lseg.getTrgText().length() - 1;
307
308
309 List<InlineCodeRef> codeRefs = lseg.getTrgCodes();
310
311
312 List<InlineCode> sourceCodes = slt.getCodes();
313
314
315 class CodeKey {
316 final Integer id;
317 final TagType tagType;
318
319 CodeKey(Integer id, TagType tagType) {
320 this.id = id;
321 this.tagType = tagType;
322 }
323
324 @Override
325 public boolean equals(Object o) {
326 if (this == o)
327 return true;
328
329 if (o == null || getClass() != o.getClass())
330 return false;
331
332 CodeKey codeKey = (CodeKey) o;
333 return Objects.equals(id, codeKey.id) && tagType == codeKey.tagType;
334 }
335
336 @Override
337 public int hashCode() {
338 return Objects.hash(id, tagType);
339 }
340 }
341
342
343 Map<CodeKey, InlineCode> codeMap = new HashMap<>();
344 if (sourceCodes != null) {
345 for (InlineCode code : sourceCodes) {
346 codeMap.put(new CodeKey(code.getId(), code.getTagType()), code);
347 }
348 }
349
350
351 List<InlineCode> resultCodes = new ArrayList<>();
352
353 if (codeRefs != null) {
354 for (InlineCodeRef ref : codeRefs) {
355 InlineCode fullCode = codeMap.get(new CodeKey(ref.getId(), ref.getTagType()));
356
357 if (fullCode != null) {
358
359 InlineCode newCode = new InlineCode();
360
361 newCode.setId(fullCode.getId());
362
363 if (ref.getPosition() > maxPos) {
364 Log.warn(ConversionUtil.class,
365 "Code position from LLM exceeds the length of '{}', normalizing",
366 lseg.getTrgText());
367
368 newCode.setPosition(maxPos);
369
370 } else {
371 newCode.setPosition(ref.getPosition());
372 }
373
374 newCode.setType(fullCode.getType());
375 newCode.setTagType(ref.getTagType());
376 newCode.setData(fullCode.getData());
377 newCode.setOuterData(fullCode.getOuterData());
378 newCode.setFlag(fullCode.getFlag());
379 newCode.setDisplayText(fullCode.getDisplayText());
380 newCode.setOriginalId(fullCode.getOriginalId());
381
382 resultCodes.add(newCode);
383 }
384 }
385 }
386
387 result.setCodes(resultCodes);
388
389 return result;
390 }
391
392 public static List<MtTargetInfo> toMtTargets(AltTranslationsAnnotation ata) {
393 List<MtTargetInfo> altTrans = new ArrayList<>();
394
395 for (AltTranslation at : ata) {
396 if (at instanceof ConfidenceScoredTranslation) {
397 ConfidenceScoredTranslation cst = (ConfidenceScoredTranslation) at;
398
399 MtTargetInfo mti = new MtTargetInfo();
400
401 TextContainer target = cst.getTarget();
402 TextFragment ttf = target.getFirstContent();
403 LayeredTextX tlt = ConversionUtil.toLayeredText(ttf, cst.getTargetLocale());
404
405 mti.setMtTarget(tlt);
406 mti.setMtConfidenceScore(cst.getConfidenceScore());
407 mti.setMtModelId(cst.getOrigin());
408
409 altTrans.add(mti);
410 }
411 }
412
413 return altTrans;
414 }
415 }