1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package net.sf.okapi.connectors.google.v3;
20
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28 import com.acumenvelocity.ath.common.ControllerUtil;
29
30 import net.sf.okapi.common.IParameters;
31 import net.sf.okapi.common.LocaleId;
32 import net.sf.okapi.common.Util;
33 import net.sf.okapi.common.exceptions.OkapiException;
34 import net.sf.okapi.common.query.QueryResult;
35 import net.sf.okapi.common.resource.ITextUnit;
36 import net.sf.okapi.common.resource.TextFragment;
37 import net.sf.okapi.lib.translation.BaseConnector;
38 import net.sf.okapi.lib.translation.QueryUtil;
39
40
41
42
43 public class GoogleMTv3Connector extends BaseConnector {
44
45 private final Logger LOG = LoggerFactory.getLogger(getClass());
46 private GoogleMTv3Parameters params;
47 private QueryUtil util;
48 private GoogleMTv3API api;
49 private int failureCount;
50
51 public GoogleMTv3Connector() {
52 params = new GoogleMTv3Parameters();
53 util = new QueryUtil();
54
55 }
56
57 public GoogleMTv3Connector(GoogleMTv3API api) {
58 params = new GoogleMTv3Parameters();
59 util = new QueryUtil();
60 this.api = api;
61 }
62
63 @Override
64 public void setParameters(IParameters params) {
65 this.params = (GoogleMTv3Parameters) params;
66 }
67
68 @Override
69 public GoogleMTv3Parameters getParameters() {
70 return params;
71 }
72
73 @Override
74 public void close() {
75 if (api != null) {
76 api.close();
77 }
78 }
79
80 @Override
81 public String getName() {
82 return "Google-MTv3";
83 }
84
85 @Override
86 public String getSettingsDisplay() {
87 StringBuilder sb = new StringBuilder();
88 sb.append("Google Cloud Translation v3");
89
90 if (!Util.isEmpty(ControllerUtil.getProjectId())) {
91 sb.append("\nProject: ").append(ControllerUtil.getProjectId());
92 }
93
94 if (!Util.isEmpty(params.getProjectLocation())) {
95 sb.append("\nLocation: ").append(params.getProjectLocation());
96 }
97
98 if (!Util.isEmpty(params.getGlossaryId())) {
99 sb.append("\nGlossary: ").append(params.getGlossaryId());
100 }
101
102 return sb.toString();
103 }
104
105 @Override
106 public void open() {
107 failureCount = 0;
108
109
110 if (Util.isEmpty(ControllerUtil.getProjectId())) {
111 throw new OkapiException("Project ID is required for Google Cloud Translation API v3.");
112 }
113
114 if (Util.isEmpty(params.getApiKey()) && Util.isEmpty(params.getCredentialsPath())) {
115 throw new OkapiException("Either API Key or Service Account credentials must be provided.");
116 }
117
118
119 if (api == null || (api instanceof GoogleMTv3APIImpl)) {
120
121 if (api != null) {
122 api.close();
123 }
124
125 api = new GoogleMTv3APIImpl(params);
126 }
127 }
128
129 @Override
130 public int query(String plainText) {
131 return _query(plainText, plainText, new TextQueryResultBuilder(getName(), getWeight()));
132 }
133
134 @Override
135 public int query(TextFragment frag) {
136 return _query(util.toCodedHTML(frag), frag,
137 new FragmentQueryResultBuilder(getName(), getWeight()));
138 }
139
140 private void retryInterval(int retryCount, String operation) {
141 LOG.info("{} - retry {} (waiting {} ms)", operation, retryCount, params.getRetryIntervalMs());
142
143 try {
144 Thread.sleep(params.getRetryIntervalMs());
145
146 } catch (InterruptedException e) {
147 Thread.currentThread().interrupt();
148 throw new OkapiException("Interrupted while trying to contact Google Cloud Translation API");
149 }
150 }
151
152 protected <T> int _query(String queryText, T originalText, QueryResultBuilder<T> qrBuilder) {
153 current = -1;
154
155 if (queryText.isEmpty()) {
156 return 0;
157 }
158
159 List<QueryResult> queryResults = new ArrayList<>();
160 GoogleQueryBuilder<T> qb = new GoogleQueryBuilder<>(srcCode, trgCode);
161 qb.addQuery(queryText, originalText);
162
163 List<TranslationResponse> responses = executeQuery(qb, qrBuilder);
164
165 if (responses != null && !responses.isEmpty()) {
166 queryResults.addAll(qrBuilder.convertResponses(responses, originalText));
167
168 } else {
169
170 LOG.error("Received no results for query");
171
172 queryResults.add(qrBuilder.createDummyResponse(originalText));
173 }
174
175 if (!queryResults.isEmpty()) {
176 current = 0;
177 result = queryResults.iterator().next();
178 return 1;
179 }
180
181 throw new OkapiException("Could not retrieve results from Google Cloud Translation API after " +
182 params.getRetryCount() + " attempts.");
183 }
184
185 @Override
186 public List<List<QueryResult>> batchQueryText(List<String> plainTexts) {
187 return _batchQuery(plainTexts, plainTexts, new TextQueryResultBuilder(getName(), getWeight()));
188 }
189
190 @Override
191 public List<List<QueryResult>> batchQuery(List<TextFragment> fragments) {
192 return _batchQuery(util.toCodedHTML(fragments), fragments,
193 new FragmentQueryResultBuilder(getName(), getWeight()));
194 }
195
196 protected <T> List<List<QueryResult>> _batchQuery(List<String> texts, List<T> originalTexts,
197 QueryResultBuilder<T> qrBuilder) {
198
199 GoogleQueryBuilder<T> qb = new GoogleQueryBuilder<>(srcCode, trgCode);
200 current = -1;
201 List<List<QueryResult>> queryResults = new ArrayList<>(texts.size());
202
203 for (int i = 0; i < texts.size(); i++) {
204 String sourceText = texts.get(i);
205 T originalText = originalTexts.get(i);
206
207 if (qb.hasCapacity(sourceText)) {
208 qb.addQuery(sourceText, originalText);
209
210 } else {
211 queryResults.addAll(flushQuery(qb, qrBuilder));
212
213 if (qb.hasCapacity(sourceText)) {
214 qb.addQuery(sourceText, originalText);
215
216 } else {
217
218
219 TranslationResponse response = executeSingleSegmentQuery(qb, sourceText);
220
221 if (response != null) {
222 queryResults
223 .add(qrBuilder.convertResponses(Collections.singletonList(response), originalText));
224
225 } else {
226
227 LOG.error("Received no results for oversized query");
228
229
230
231 queryResults
232 .add(Collections.singletonList(qrBuilder.createDummyResponse(originalText)));
233 }
234 }
235 }
236 }
237
238 queryResults.addAll(flushQuery(qb, qrBuilder));
239 return queryResults;
240 }
241
242 protected <T> List<List<QueryResult>> flushQuery(GoogleQueryBuilder<T> qb,
243 QueryResultBuilder<T> qrBuilder) {
244
245 List<List<QueryResult>> queryResults = new ArrayList<>();
246
247 if (qb.getSourceCount() > 0) {
248 LOG.debug("Flushing batch query with {} segments", qb.getSourceCount());
249 List<TranslationResponse> batchResponses = executeQuery(qb, qrBuilder);
250
251 if (batchResponses != null) {
252 for (int j = 0; j < batchResponses.size(); j++) {
253 queryResults.add(qrBuilder.convertResponses(
254 Collections.singletonList(batchResponses.get(j)), qb.getSources().get(j)));
255 }
256
257 } else {
258
259 LOG.error("Received no results for batch query");
260
261
262
263 for (T source : qb.getSources()) {
264 queryResults.add(Collections.singletonList(qrBuilder.createDummyResponse(source)));
265 }
266 }
267
268 qb.reset();
269 }
270
271 return queryResults;
272 }
273
274 protected <T> TranslationResponse executeSingleSegmentQuery(GoogleQueryBuilder<T> qb,
275 String sourceText) {
276
277 for (int tries = 0; tries < params.getRetryCount(); tries++) {
278 try {
279 return api.translateSingleSegment(qb, sourceText);
280
281 } catch (GoogleMTv3ErrorException e) {
282 LOG.error("Error {} - {} for single segment query", e.getCode(), e.getMessage());
283
284 if (!isRetryableError(e.getCode())) {
285 throw new OkapiException("Non-retryable error from Google Cloud Translation API: " +
286 e.getMessage(), e);
287 }
288
289 } catch (Throwable e) {
290 throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
291 }
292
293 retryInterval(tries + 1, "executeSingleSegmentQuery");
294 }
295
296
297 if ((params.getFailuresBeforeAbort() > -1)
298 && (++failureCount > params.getFailuresBeforeAbort())) {
299 throw new OkapiException("Too many retry failures while querying the MT server.");
300 }
301
302 return null;
303 }
304
305 protected <T> List<TranslationResponse> executeQuery(GoogleQueryBuilder<T> qb,
306 QueryResultBuilder<T> qrBuilder) {
307
308 for (int tries = 0; tries < params.getRetryCount(); tries++) {
309 try {
310 LOG.info("Translating '{}'", qb.getQuery());
311
312 List<TranslationResponse> res = api.translate(qb);
313
314 LOG.info("Translated '{}' to ['{}', ...]", qb.getQuery(),
315 res != null && res.size() > 0 ? res.get(0) : null);
316
317 return res;
318
319 } catch (GoogleMTv3ErrorException e) {
320 LOG.error("Error {} - {} for batch query", e.getCode(), e.getMessage());
321
322 if (!isRetryableError(e.getCode())) {
323 throw new OkapiException("Non-retryable error from Google Cloud Translation API: " +
324 e.getMessage(), e);
325 }
326
327 } catch (Throwable e) {
328 throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
329 }
330
331 retryInterval(tries + 1, "executeQuery");
332 }
333
334
335 if ((params.getFailuresBeforeAbort() > -1)
336 && (++failureCount > params.getFailuresBeforeAbort())) {
337 throw new OkapiException("Too many retry failures while querying the MT server.");
338 }
339
340 return null;
341 }
342
343 private boolean isRetryableError(int code) {
344
345
346
347
348
349
350 return code == 429 || code == 500 || code == 503 || code == 504;
351 }
352
353 public List<LocaleId> getSupportedLanguages() {
354 try {
355 for (int tries = 0; tries < params.getRetryCount(); tries++) {
356 List<String> codes = api.getLanguages();
357
358 if (codes != null) {
359 List<LocaleId> locales = new ArrayList<>();
360
361 for (String code : codes) {
362 locales.add(convertGoogleLanguageCode(code));
363 }
364
365 return locales;
366 }
367
368 retryInterval(tries + 1, "getSupportedLanguages");
369 }
370
371 } catch (Throwable e) {
372 throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
373 }
374
375 throw new OkapiException(
376 "Could not retrieve language list from Google Cloud Translation API after " +
377 params.getRetryCount() + " attempts.");
378 }
379
380 protected LocaleId convertGoogleLanguageCode(String lang) {
381 return LocaleId.fromBCP47(lang);
382 }
383
384 @Override
385 public void leverage(ITextUnit tu) {
386 leverageUsingBatchQuery(tu);
387 }
388
389 @Override
390 public void batchLeverage(List<ITextUnit> tuList) {
391 batchLeverageUsingBatchQuery(tuList);
392 }
393
394 @Override
395 protected String toInternalCode(LocaleId locale) {
396
397 if (locale == null || locale == LocaleId.EMPTY || locale.toString().isEmpty()
398 || locale.toString().equalsIgnoreCase("und")) {
399 return "";
400 }
401
402 String code = locale.toBCP47();
403 String codelc = code.toLowerCase();
404
405 if (codelc.startsWith("sr-latn")) {
406 throw new OkapiException("Provided language: " + code + " is not supported by MT Engine.");
407
408 } else if (codelc.startsWith("sr-cyrl")) {
409 code = "sr";
410
411 } else {
412 switch (codelc) {
413 case "zh-hans":
414 code = "zh-CN";
415 break;
416
417 case "zh-hant":
418 code = "zh-TW";
419 break;
420 }
421
422 if (!code.startsWith("zh") && (code.length() > 3)) {
423 int p = code.indexOf('-');
424
425 if (p > -1) {
426 code = code.substring(0, p);
427 }
428 }
429 }
430
431 return code;
432 }
433 }