View Javadoc
1   /*
2    * ===========================================================================
3    * Copyright (C) 2011-2025 by the Okapi Framework contributors
4    * -----------------------------------------------------------------------------
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    * 
9    * http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   * ===========================================================================
17   */
18  
19  package net.sf.okapi.connectors.google.v3;
20  
21  import java.util.ArrayList;
22  import java.util.Collections;
23  import java.util.List;
24  
25  import org.slf4j.Logger;
26  import org.slf4j.LoggerFactory;
27  
28  import com.acumenvelocity.ath.common.ControllerUtil;
29  
30  import net.sf.okapi.common.IParameters;
31  import net.sf.okapi.common.LocaleId;
32  import net.sf.okapi.common.Util;
33  import net.sf.okapi.common.exceptions.OkapiException;
34  import net.sf.okapi.common.query.QueryResult;
35  import net.sf.okapi.common.resource.ITextUnit;
36  import net.sf.okapi.common.resource.TextFragment;
37  import net.sf.okapi.lib.translation.BaseConnector;
38  import net.sf.okapi.lib.translation.QueryUtil;
39  
40  /**
41   * Connector for Google Cloud Translation API v3 with glossary support.
42   */
43  public class GoogleMTv3Connector extends BaseConnector {
44  
45    private final Logger LOG = LoggerFactory.getLogger(getClass());
46    private GoogleMTv3Parameters params;
47    private QueryUtil util;
48    private GoogleMTv3API api;
49    private int failureCount;
50  
51    public GoogleMTv3Connector() {
52      params = new GoogleMTv3Parameters();
53      util = new QueryUtil();
54      // API is initialized in open() or when needed, to ensure params are set
55    }
56  
57    public GoogleMTv3Connector(GoogleMTv3API api) {
58      params = new GoogleMTv3Parameters();
59      util = new QueryUtil();
60      this.api = api;
61    }
62  
63    @Override
64    public void setParameters(IParameters params) {
65      this.params = (GoogleMTv3Parameters) params;
66    }
67  
68    @Override
69    public GoogleMTv3Parameters getParameters() {
70      return params;
71    }
72  
73    @Override
74    public void close() {
75      if (api != null) {
76        api.close();
77      }
78    }
79  
80    @Override
81    public String getName() {
82      return "Google-MTv3";
83    }
84  
85    @Override
86    public String getSettingsDisplay() {
87      StringBuilder sb = new StringBuilder();
88      sb.append("Google Cloud Translation v3");
89      
90      if (!Util.isEmpty(ControllerUtil.getProjectId())) {
91        sb.append("\nProject: ").append(ControllerUtil.getProjectId());
92      }
93      
94      if (!Util.isEmpty(params.getProjectLocation())) {
95        sb.append("\nLocation: ").append(params.getProjectLocation());
96      }
97      
98      if (!Util.isEmpty(params.getGlossaryId())) {
99        sb.append("\nGlossary: ").append(params.getGlossaryId());
100     }
101     
102     return sb.toString();
103   }
104 
105   @Override
106   public void open() {
107     failureCount = 0;
108 
109     // Validate required parameters
110     if (Util.isEmpty(ControllerUtil.getProjectId())) {
111       throw new OkapiException("Project ID is required for Google Cloud Translation API v3.");
112     }
113 
114     if (Util.isEmpty(params.getApiKey()) && Util.isEmpty(params.getCredentialsPath())) {
115       throw new OkapiException("Either API Key or Service Account credentials must be provided.");
116     }
117 
118     // Initialize API impl if not mocked
119     if (api == null || (api instanceof GoogleMTv3APIImpl)) {
120       // Close existing if we are re-opening
121       if (api != null) {
122         api.close();
123       }
124 
125       api = new GoogleMTv3APIImpl(params);
126     }
127   }
128 
129   @Override
130   public int query(String plainText) {
131     return _query(plainText, plainText, new TextQueryResultBuilder(getName(), getWeight()));
132   }
133 
134   @Override
135   public int query(TextFragment frag) {
136     return _query(util.toCodedHTML(frag), frag,
137         new FragmentQueryResultBuilder(getName(), getWeight()));
138   }
139 
140   private void retryInterval(int retryCount, String operation) {
141     LOG.info("{} - retry {} (waiting {} ms)", operation, retryCount, params.getRetryIntervalMs());
142 
143     try {
144       Thread.sleep(params.getRetryIntervalMs());
145 
146     } catch (InterruptedException e) {
147       Thread.currentThread().interrupt();
148       throw new OkapiException("Interrupted while trying to contact Google Cloud Translation API");
149     }
150   }
151 
152   protected <T> int _query(String queryText, T originalText, QueryResultBuilder<T> qrBuilder) {
153     current = -1;
154 
155     if (queryText.isEmpty()) {
156       return 0;
157     }
158 
159     List<QueryResult> queryResults = new ArrayList<>();
160     GoogleQueryBuilder<T> qb = new GoogleQueryBuilder<>(srcCode, trgCode);
161     qb.addQuery(queryText, originalText);
162 
163     List<TranslationResponse> responses = executeQuery(qb, qrBuilder);
164 
165     if (responses != null && !responses.isEmpty()) {
166       queryResults.addAll(qrBuilder.convertResponses(responses, originalText));
167 
168     } else {
169       // Underlying call failed for some reason, probably a timeout
170       LOG.error("Received no results for query");
171       // Return the source text as a dummy translation so that we can maintain the correct indexing
172       queryResults.add(qrBuilder.createDummyResponse(originalText));
173     }
174 
175     if (!queryResults.isEmpty()) {
176       current = 0;
177       result = queryResults.iterator().next();
178       return 1;
179     }
180 
181     throw new OkapiException("Could not retrieve results from Google Cloud Translation API after " +
182         params.getRetryCount() + " attempts.");
183   }
184 
185   @Override
186   public List<List<QueryResult>> batchQueryText(List<String> plainTexts) {
187     return _batchQuery(plainTexts, plainTexts, new TextQueryResultBuilder(getName(), getWeight()));
188   }
189 
190   @Override
191   public List<List<QueryResult>> batchQuery(List<TextFragment> fragments) {
192     return _batchQuery(util.toCodedHTML(fragments), fragments,
193         new FragmentQueryResultBuilder(getName(), getWeight()));
194   }
195 
196   protected <T> List<List<QueryResult>> _batchQuery(List<String> texts, List<T> originalTexts,
197       QueryResultBuilder<T> qrBuilder) {
198 
199     GoogleQueryBuilder<T> qb = new GoogleQueryBuilder<>(srcCode, trgCode);
200     current = -1;
201     List<List<QueryResult>> queryResults = new ArrayList<>(texts.size());
202 
203     for (int i = 0; i < texts.size(); i++) {
204       String sourceText = texts.get(i);
205       T originalText = originalTexts.get(i);
206 
207       if (qb.hasCapacity(sourceText)) {
208         qb.addQuery(sourceText, originalText);
209 
210       } else {
211         queryResults.addAll(flushQuery(qb, qrBuilder));
212 
213         if (qb.hasCapacity(sourceText)) {
214           qb.addQuery(sourceText, originalText);
215 
216         } else {
217           // If we still don't have capacity, it's an oversized segment that needs to be POSTed by
218           // itself.
219           TranslationResponse response = executeSingleSegmentQuery(qb, sourceText);
220 
221           if (response != null) {
222             queryResults
223                 .add(qrBuilder.convertResponses(Collections.singletonList(response), originalText));
224 
225           } else {
226             // Underlying call failed for some reason, probably a timeout
227             LOG.error("Received no results for oversized query");
228 
229             // Return the source text as a dummy translation so that we can maintain the correct
230             // indexing
231             queryResults
232                 .add(Collections.singletonList(qrBuilder.createDummyResponse(originalText)));
233           }
234         }
235       }
236     }
237 
238     queryResults.addAll(flushQuery(qb, qrBuilder));
239     return queryResults;
240   }
241 
242   protected <T> List<List<QueryResult>> flushQuery(GoogleQueryBuilder<T> qb,
243       QueryResultBuilder<T> qrBuilder) {
244 
245     List<List<QueryResult>> queryResults = new ArrayList<>();
246 
247     if (qb.getSourceCount() > 0) {
248       LOG.debug("Flushing batch query with {} segments", qb.getSourceCount());
249       List<TranslationResponse> batchResponses = executeQuery(qb, qrBuilder);
250 
251       if (batchResponses != null) {
252         for (int j = 0; j < batchResponses.size(); j++) {
253           queryResults.add(qrBuilder.convertResponses(
254               Collections.singletonList(batchResponses.get(j)), qb.getSources().get(j)));
255         }
256 
257       } else {
258         // Underlying call failed for some reason, probably a timeout
259         LOG.error("Received no results for batch query");
260 
261         // Return the source text as a dummy translation so that we can maintain the correct
262         // indexing
263         for (T source : qb.getSources()) {
264           queryResults.add(Collections.singletonList(qrBuilder.createDummyResponse(source)));
265         }
266       }
267 
268       qb.reset();
269     }
270 
271     return queryResults;
272   }
273 
274   protected <T> TranslationResponse executeSingleSegmentQuery(GoogleQueryBuilder<T> qb,
275       String sourceText) {
276 
277     for (int tries = 0; tries < params.getRetryCount(); tries++) {
278       try {
279         return api.translateSingleSegment(qb, sourceText);
280 
281       } catch (GoogleMTv3ErrorException e) {
282         LOG.error("Error {} - {} for single segment query", e.getCode(), e.getMessage());
283 
284         if (!isRetryableError(e.getCode())) {
285           throw new OkapiException("Non-retryable error from Google Cloud Translation API: " +
286               e.getMessage(), e);
287         }
288 
289       } catch (Throwable e) {
290         throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
291       }
292 
293       retryInterval(tries + 1, "executeSingleSegmentQuery");
294     }
295 
296     // All retries have failed
297     if ((params.getFailuresBeforeAbort() > -1)
298         && (++failureCount > params.getFailuresBeforeAbort())) {
299       throw new OkapiException("Too many retry failures while querying the MT server.");
300     }
301 
302     return null;
303   }
304 
305   protected <T> List<TranslationResponse> executeQuery(GoogleQueryBuilder<T> qb,
306       QueryResultBuilder<T> qrBuilder) {
307 
308     for (int tries = 0; tries < params.getRetryCount(); tries++) {
309       try {
310         LOG.info("Translating '{}'", qb.getQuery());
311         
312         List<TranslationResponse> res = api.translate(qb);
313         
314         LOG.info("Translated '{}' to ['{}', ...]", qb.getQuery(),
315             res != null && res.size() > 0 ? res.get(0) : null);
316 
317         return res;
318 
319       } catch (GoogleMTv3ErrorException e) {
320         LOG.error("Error {} - {} for batch query", e.getCode(), e.getMessage());
321 
322         if (!isRetryableError(e.getCode())) {
323           throw new OkapiException("Non-retryable error from Google Cloud Translation API: " +
324               e.getMessage(), e);
325         }
326 
327       } catch (Throwable e) {
328         throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
329       }
330 
331       retryInterval(tries + 1, "executeQuery");
332     }
333 
334     // All retries have failed
335     if ((params.getFailuresBeforeAbort() > -1)
336         && (++failureCount > params.getFailuresBeforeAbort())) {
337       throw new OkapiException("Too many retry failures while querying the MT server.");
338     }
339 
340     return null;
341   }
342 
343   private boolean isRetryableError(int code) {
344     // Retry on rate limit (429), service unavailable (503), and internal server error (500)
345     // Also handling standard Google GRPC error codes mapping to HTTP
346     // 429 = RESOURCE_EXHAUSTED
347     // 503 = UNAVAILABLE
348     // 500 = INTERNAL
349     // 504 = DEADLINE_EXCEEDED
350     return code == 429 || code == 500 || code == 503 || code == 504;
351   }
352 
353   public List<LocaleId> getSupportedLanguages() {
354     try {
355       for (int tries = 0; tries < params.getRetryCount(); tries++) {
356         List<String> codes = api.getLanguages();
357 
358         if (codes != null) {
359           List<LocaleId> locales = new ArrayList<>();
360 
361           for (String code : codes) {
362             locales.add(convertGoogleLanguageCode(code));
363           }
364 
365           return locales;
366         }
367 
368         retryInterval(tries + 1, "getSupportedLanguages");
369       }
370 
371     } catch (Throwable e) {
372       throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
373     }
374 
375     throw new OkapiException(
376         "Could not retrieve language list from Google Cloud Translation API after " +
377             params.getRetryCount() + " attempts.");
378   }
379 
380   protected LocaleId convertGoogleLanguageCode(String lang) {
381     return LocaleId.fromBCP47(lang);
382   }
383 
384   @Override
385   public void leverage(ITextUnit tu) {
386     leverageUsingBatchQuery(tu);
387   }
388 
389   @Override
390   public void batchLeverage(List<ITextUnit> tuList) {
391     batchLeverageUsingBatchQuery(tuList);
392   }
393 
394   @Override
395   protected String toInternalCode(LocaleId locale) {
396     // Handle empty/undefined locale for language detection
397     if (locale == null || locale == LocaleId.EMPTY || locale.toString().isEmpty()
398         || locale.toString().equalsIgnoreCase("und")) {
399       return "";
400     }
401 
402     String code = locale.toBCP47();
403     String codelc = code.toLowerCase();
404 
405     if (codelc.startsWith("sr-latn")) {
406       throw new OkapiException("Provided language: " + code + " is not supported by MT Engine.");
407 
408     } else if (codelc.startsWith("sr-cyrl")) {
409       code = "sr";
410 
411     } else {
412       switch (codelc) {
413       case "zh-hans":
414         code = "zh-CN";
415         break;
416 
417       case "zh-hant":
418         code = "zh-TW";
419         break;
420       }
421 
422       if (!code.startsWith("zh") && (code.length() > 3)) {
423         int p = code.indexOf('-');
424 
425         if (p > -1) {
426           code = code.substring(0, p);
427         }
428       }
429     }
430 
431     return code;
432   }
433 }