1 package com.acumenvelocity.ath.controller;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.URI;
7 import java.nio.file.Files;
8 import java.util.ArrayList;
9 import java.util.Date;
10 import java.util.List;
11 import java.util.Objects;
12 import java.util.UUID;
13 import java.util.concurrent.ExecutorService;
14 import java.util.concurrent.Executors;
15 import java.util.concurrent.TimeUnit;
16 import java.util.stream.Collectors;
17
18 import javax.ws.rs.core.Response.Status;
19
20 import org.apache.commons.io.FileUtils;
21 import org.apache.http.client.methods.HttpGet;
22 import org.apache.http.impl.client.CloseableHttpClient;
23 import org.apache.http.impl.client.HttpClients;
24 import org.apache.solr.client.solrj.SolrClient;
25 import org.apache.solr.client.solrj.SolrQuery;
26 import org.apache.solr.client.solrj.response.QueryResponse;
27 import org.apache.solr.common.SolrDocument;
28 import org.apache.solr.common.SolrDocumentList;
29 import org.apache.solr.common.SolrInputDocument;
30
31 import com.acumenvelocity.ath.common.AthUtil;
32 import com.acumenvelocity.ath.common.Const;
33 import com.acumenvelocity.ath.common.ControllerUtil;
34 import com.acumenvelocity.ath.common.JacksonUtil;
35 import com.acumenvelocity.ath.common.Log;
36 import com.acumenvelocity.ath.common.OkapiUtil;
37 import com.acumenvelocity.ath.common.Response;
38 import com.acumenvelocity.ath.common.SolrUtil;
39 import com.acumenvelocity.ath.common.exception.AthException;
40 import com.acumenvelocity.ath.gcs.AthStorage;
41 import com.acumenvelocity.ath.model.AlignDocumentRequest;
42 import com.acumenvelocity.ath.model.ConflictDownloadResponse;
43 import com.acumenvelocity.ath.model.ConflictResponse;
44 import com.acumenvelocity.ath.model.CreateDocumentSegmentRequest;
45 import com.acumenvelocity.ath.model.DocumentInfo;
46 import com.acumenvelocity.ath.model.DocumentInfosWrapper;
47 import com.acumenvelocity.ath.model.DocumentSegment;
48 import com.acumenvelocity.ath.model.DocumentSegmentWrapper;
49 import com.acumenvelocity.ath.model.DocumentSegmentsWrapper;
50 import com.acumenvelocity.ath.model.DocumentStatus;
51 import com.acumenvelocity.ath.model.ExportCompletedStatusResponse;
52 import com.acumenvelocity.ath.model.ExportDocumentRequest;
53 import com.acumenvelocity.ath.model.FailedStatusResponse;
54 import com.acumenvelocity.ath.model.ImportCompletedStatusResponse;
55 import com.acumenvelocity.ath.model.ImportDocumentRequest;
56 import com.acumenvelocity.ath.model.ModifiedSegmentsWrapper;
57 import com.acumenvelocity.ath.model.MtResources;
58 import com.acumenvelocity.ath.model.MtTargetInfo;
59 import com.acumenvelocity.ath.model.Origin;
60 import com.acumenvelocity.ath.model.PaginationInfo;
61 import com.acumenvelocity.ath.model.ProcessingResponse;
62 import com.acumenvelocity.ath.model.ProcessingStatusResponse;
63 import com.acumenvelocity.ath.model.UpdateDocumentSegmentRequest;
64 import com.acumenvelocity.ath.model.x.LayeredTextX;
65 import com.acumenvelocity.ath.solr.AthIndex;
66 import com.fasterxml.jackson.core.type.TypeReference;
67 import com.fasterxml.jackson.databind.JsonNode;
68
69 import io.swagger.oas.inflector.models.RequestContext;
70 import io.swagger.oas.inflector.models.ResponseContext;
71 import net.sf.okapi.common.Util;
72
73 public class DocumentController {
74 private static final ExecutorService EXECUTOR = Executors.newFixedThreadPool(10);
75
76 private File fileToImport;
77 private boolean isTempFile;
78
79 static {
80 Runtime.getRuntime().addShutdownHook(new Thread(() -> {
81 shutdownExecutor();
82 }));
83 }
84
85
86 public static void shutdownExecutor() {
87 if (EXECUTOR != null && !EXECUTOR.isShutdown()) {
88 EXECUTOR.shutdown();
89
90 try {
91 if (!EXECUTOR.awaitTermination(60, TimeUnit.SECONDS)) {
92 EXECUTOR.shutdownNow();
93 }
94
95 } catch (InterruptedException e) {
96 EXECUTOR.shutdownNow();
97 Thread.currentThread().interrupt();
98 }
99 }
100 }
101
102 public ResponseContext getDocuments(RequestContext request, Integer page, Integer pageSize) {
103 try {
104 SolrClient solrClient = AthIndex.getSolr().getClient();
105
106 SolrQuery q = new SolrQuery("*:*")
107 .setFields(Const.ATH_PROP_DOC_ID)
108 .addSort("_docid_", SolrQuery.ORDER.asc)
109 .setRows(Integer.MAX_VALUE);
110
111 QueryResponse response = solrClient.query(Const.SOLR_CORE_ATH_DOCS, q);
112
113 List<String> allDocIds = response.getResults().stream()
114 .map(doc -> doc.getFieldValue(Const.ATH_PROP_DOC_ID))
115 .filter(Objects::nonNull)
116 .map(Object::toString)
117 .collect(Collectors.toList());
118
119 DocumentInfosWrapper wrapper = new DocumentInfosWrapper();
120
121
122 if (allDocIds.isEmpty()) {
123 wrapper.documents(new ArrayList<>())
124 .pagination(new PaginationInfo()
125 .page(1)
126 .pageSize(0)
127 .totalItems(0L)
128 .totalPages(0)
129 .hasNext(false)
130 .hasPrevious(false));
131
132 return Response.success(200, wrapper);
133 }
134
135 long totalItems = allDocIds.size();
136 int size = (pageSize != null) ? Math.max(1, Math.min(100, pageSize)) : (int) totalItems;
137 int totalPages = (int) Math.ceil(totalItems / (double) size);
138
139 int pageNum;
140 List<String> docIdsToProcess;
141
142 if (page == null && pageSize == null) {
143
144 pageNum = 1;
145 int end = Math.min(size, allDocIds.size());
146 docIdsToProcess = allDocIds.subList(0, end);
147 totalPages = (int) Math.ceil(totalItems / (double) size);
148
149 } else {
150 pageNum = (page != null) ? Math.max(1, Math.min(page, Math.max(1, totalPages))) : 1;
151 int start = (pageNum - 1) * size;
152 int end = Math.min(start + size, allDocIds.size());
153 docIdsToProcess = allDocIds.subList(start, end);
154 }
155
156 List<DocumentInfo> resultDocs = docIdsToProcess.stream()
157 .map(this::getDocumentInfoInternal)
158 .filter(Objects::nonNull)
159 .collect(Collectors.toList());
160
161 PaginationInfo pagination = new PaginationInfo()
162 .page(pageNum)
163 .pageSize(size)
164 .totalItems(totalItems)
165 .totalPages(totalPages)
166 .hasNext(pageNum < totalPages)
167 .hasPrevious(pageNum > 1);
168
169 wrapper.documents(resultDocs)
170 .pagination(pagination);
171
172 return Response.success(200, wrapper);
173
174 } catch (Exception e) {
175 return Response.error(500, e, "Error fetching documents");
176 }
177 }
178
179 private DocumentInfo getDocumentInfoInternal(String docId) {
180 try {
181 String docQuery = Log.format("docId:\"{}\"", docId);
182
183 SolrClient solrClient = AthIndex.getSolr().getClient();
184
185
186 SolrQuery q = new SolrQuery(docQuery);
187 q.setRows(1);
188 QueryResponse docResponse = solrClient.query(Const.SOLR_CORE_ATH_DOCS, q);
189
190 if (docResponse.getResults().isEmpty()) {
191 return null;
192 }
193
194 SolrDocument doc = docResponse.getResults().get(0);
195
196
197 String segQuery = Log.format("docId:\"{}\"", docId);
198 long segmentCount = SolrUtil.getNumDocuments(Const.SOLR_CORE_ATH_DOC_SEGMENTS, segQuery);
199
200 DocumentInfo docInfo = new DocumentInfo();
201 docInfo.setDocId(UUID.fromString(docId));
202
203 docInfo.setDocFileName(SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_FILE_NAME, null));
204
205 docInfo.setDocGcsUrl(
206 AthUtil.toURI(SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_STORAGE_NAME, "")));
207
208 docInfo.setDocTrlGcsUrl(
209 AthUtil.toURI(SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_TRL_STORAGE_NAME, "")));
210
211 docInfo.setSrcLang(SolrUtil.safeGetField(doc, Const.ATH_PROP_SRC_LANG, "en"));
212 docInfo.setTrgLang(SolrUtil.safeGetField(doc, Const.ATH_PROP_TRG_LANG, "fr"));
213
214 docInfo.setSegmentsCount(segmentCount);
215
216 docInfo.setCreatedBy(AthUtil.safeToUuid(
217 SolrUtil.safeGetField(doc, Const.ATH_PROP_CREATED_BY, null), null));
218
219 docInfo.setUpdatedBy(AthUtil.safeToUuid(
220 SolrUtil.safeGetField(doc, Const.ATH_PROP_UPDATED_BY, null), null));
221
222 docInfo.setCreatedAt(AthUtil.safeToDate(doc.get(Const.ATH_PROP_CREATED_AT), null));
223 docInfo.setUpdatedAt(AthUtil.safeToDate(doc.get(Const.ATH_PROP_UPDATED_AT), null));
224
225 return docInfo;
226
227 } catch (Exception e) {
228 Log.error(this.getClass(), e, "Error getting document info for docId: {}", docId);
229 return null;
230 }
231 }
232
233 public ResponseContext importDocument(RequestContext request, UUID docId, JsonNode bodyNode) {
234
235 if (!ControllerUtil.checkParam(docId)) {
236 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
237 }
238
239 if (!ControllerUtil.checkParam(bodyNode)) {
240 return Response.error(400, "Invalid request parameter, bodyNode is null");
241 }
242
243 ImportDocumentRequest body = AthUtil.safeFromJsonNode(bodyNode,
244 ImportDocumentRequest.class, null);
245
246 if (body == null) {
247 return Response.error(400, "Invalid request body");
248 }
249
250 String srcLang = body.getSrcLang();
251 String trgLang = body.getTrgLang();
252 String filterId = body.getFilterId();
253 String filterParams = body.getFilterParams();
254 String srcSrx = body.getSrcSrx();
255 UUID tmId = body.getTmId();
256 Integer tmThreshold = body.getTmThreshold();
257 String mtEngineId = body.getMtEngineId();
258 String mtEngineParams = body.getMtEngineParams();
259
260 List<MtResources> mtCustomResources = body.getMtCustomResources();
261
262 Boolean mtProvideConfidenceScores = body.getMtProvideConfidenceScores();
263 Boolean mtUseTranslateLlm = body.getMtUseTranslateLlm();
264 Boolean mtSendPlainText = body.getMtSendPlainText();
265 Boolean useCodesReinsertionModel = body.getUseCodesReinsertionModel();
266 String codesReinsertionModelName = body.getCodesReinsertionModelName();
267 UUID userId = body.getUserId();
268
269 if (!ControllerUtil.checkParam(srcLang)) {
270 return Response.error(400, "Invalid request, srcLang is not specified");
271 }
272
273 if (!ControllerUtil.checkParam(trgLang)) {
274 return Response.error(400, "Invalid request, trgLang is not specified");
275 }
276
277 if (!ControllerUtil.checkParam(userId)) {
278 return Response.error(400, "Invalid request parameter User Id: " + userId);
279 }
280
281 String query = Log.format("docId:\"{}\"", docId);
282
283 try {
284 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
285 QueryResponse.class);
286
287 if (response.getResults().isEmpty()) {
288 return Response.error(404, "Document not found, docId: " + docId);
289 }
290
291 SolrDocument existingDoc = response.getResults().get(0);
292 String status = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_STATUS, null);
293
294 if (DocumentStatus.IMPORTING.toString().equals(status)
295 || DocumentStatus.EXPORTING.toString().equals(status)) {
296
297 ConflictResponse conflict = new ConflictResponse();
298 conflict.setError("Processing already in progress");
299 conflict.setStatus(AthUtil.safeToEnum(status, ConflictResponse.StatusEnum.class, null));
300 conflict.setDocId(docId);
301 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
302
303 return Response.builder()
304 .status(Status.CONFLICT)
305 .header("Location", Log.format("/document/{}/status", docId))
306 .entity(conflict)
307 .build();
308 }
309
310 DocumentInfo docInfo = getDocumentInfoInternal(docId.toString());
311
312
313 boolean newDoc = docInfo.getSegmentsCount() == 0;
314
315
316 String docFileName = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_FILE_NAME, null);
317 String docGcsUrl = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_STORAGE_NAME, null);
318
319 String docEncoding = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_FILE_ENCODING,
320 null);
321
322
323 SolrInputDocument doc = SolrUtil.toInputDocument(existingDoc);
324
325 doc.setField(Const.ATH_PROP_SRC_LANG, srcLang);
326 doc.setField(Const.ATH_PROP_TRG_LANG, trgLang);
327 doc.setField(Const.ATH_PROP_FILTER_ID, filterId);
328
329 SolrUtil.safeSetField(doc, Const.ATH_PROP_FILTER_PARAMS, filterParams);
330 SolrUtil.safeSetField(doc, Const.ATH_PROP_SRC_SRX, srcSrx);
331 SolrUtil.safeSetField(doc, Const.ATH_PROP_TM_ID, tmId);
332 doc.setField(Const.ATH_PROP_TM_THRESHOLD, tmThreshold != null ? tmThreshold : 75);
333
334 SolrUtil.safeSetField(doc, Const.ATH_PROP_MT_ENGINE_ID, mtEngineId);
335 SolrUtil.safeSetField(doc, Const.ATH_PROP_MT_ENGINE_PARAMS, mtEngineParams);
336
337
338
339
340
341
342
343
344
345
346 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.IMPORTING.toString());
347 doc.setField(Const.ATH_PROP_PROCESSED_BY, userId.toString());
348 doc.setField(Const.ATH_PROP_STARTED_AT, new Date());
349 doc.setField(Const.ATH_PROP_FINISHED_AT, null);
350 doc.setField(Const.ATH_PROP_UPDATED_BY, userId.toString());
351 doc.setField(Const.ATH_PROP_UPDATED_AT, new Date());
352
353 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
354 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
355
356
357 EXECUTOR.submit(() -> {
358 ResponseContext res = Response.success(200);
359
360 try {
361 res = ControllerUtil.importFile(
362 doc,
363 docId,
364 docFileName,
365 AthUtil.toURI(docGcsUrl),
366 docEncoding,
367 srcLang,
368 trgLang,
369 filterId,
370 filterParams,
371 srcSrx,
372 tmId,
373 tmThreshold,
374 mtEngineId,
375 mtEngineParams,
376 mtCustomResources,
377 mtProvideConfidenceScores,
378 mtUseTranslateLlm != null ? mtUseTranslateLlm : false,
379 mtSendPlainText != null ? mtSendPlainText : false,
380 useCodesReinsertionModel != null ? useCodesReinsertionModel : false,
381 codesReinsertionModelName,
382 userId,
383 newDoc);
384
385 if (res.getStatus() == 200) {
386 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.IMPORT_COMPLETED.toString());
387
388 } else {
389 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
390 doc.setField(Const.ATH_PROP_ERROR_MESSAGE, Response.getMessage(res));
391 }
392
393 } catch (Exception e) {
394 Log.error(getClass(), e, "Import failed");
395 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
396 doc.setField(Const.ATH_PROP_ERROR_MESSAGE, e.getMessage());
397 res = Response.error(500, e, "Import error");
398
399 } finally {
400 doc.setField(Const.ATH_PROP_FINISHED_AT, new Date());
401
402 try {
403 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
404 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
405 } catch (Exception e) {
406 Log.error(getClass(), e, "Failed to update Solr after import");
407 }
408 }
409
410 return res.getStatus() == 200;
411 });
412
413 ProcessingResponse processingResponse = new ProcessingResponse();
414 processingResponse.setStatus(ProcessingResponse.StatusEnum.IMPORTING);
415 processingResponse.setDocId(docId);
416 processingResponse.setStatusUrl(Log.format("/document/{}/status", docId));
417 processingResponse.setSubmittedAt(new Date());
418
419 return Response.builder()
420 .status(Status.ACCEPTED)
421 .header("Location", Log.format("/document/{}/status", docId))
422 .entity(processingResponse)
423 .build();
424
425 } catch (Exception e) {
426 return Response.error(500, "Error importing document -- " + e.getMessage());
427 }
428 }
429
430 public ResponseContext exportDocument(RequestContext request, UUID docId, JsonNode bodyNode) {
431
432 ExportDocumentRequest body = AthUtil.safeFromJsonNode(bodyNode, ExportDocumentRequest.class,
433 null);
434
435 if (body == null) {
436 return Response.error(400, "Invalid request body");
437 }
438
439 URI docOutGcsUrl = body.getDocOutGcsUrl();
440 String docOutEncoding = body.getDocOutEncoding();
441 UUID tmId = body.getTmId();
442 UUID userId = body.getUserId();
443
444 if (!ControllerUtil.checkParam(docId)) {
445 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
446 }
447
448 if (!ControllerUtil.checkParam(docOutGcsUrl)) {
449 return Response.error(400, "Invalid request, docOutStorageName is not specified");
450 }
451
452 if (!ControllerUtil.checkParam(userId)) {
453 return Response.error(400, "Invalid request parameter User Id: " + userId);
454 }
455
456 String query = Log.format("docId:\"{}\"", docId);
457
458 try {
459 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
460 QueryResponse.class);
461
462 if (response.getResults().isEmpty()) {
463 return Response.error(404, "Document not found, docId: " + docId);
464 }
465
466 SolrDocument doc = response.getResults().get(0);
467
468 String status = SolrUtil.safeGetField(doc, Const.ATH_PROP_STATUS, null);
469
470
471 if (DocumentStatus.IMPORTING.toString().equals(status)) {
472 ConflictResponse conflict = new ConflictResponse();
473 conflict.setError("Cannot start export");
474 conflict.setStatus(AthUtil.safeToEnum(status, ConflictResponse.StatusEnum.class, null));
475 conflict.setDocId(docId);
476 conflict.setMessage("Import must be completed before export can be triggered");
477 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
478
479 return Response.builder()
480 .status(Status.CONFLICT)
481 .entity(conflict)
482 .build();
483 }
484
485 if (DocumentStatus.EXPORTING.toString().equals(status)) {
486 ConflictResponse conflict = new ConflictResponse();
487 conflict.setError("Export already in progress");
488 conflict.setStatus(AthUtil.safeToEnum(status, ConflictResponse.StatusEnum.class, null));
489 conflict.setDocId(docId);
490 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
491
492 return Response.builder()
493 .status(Status.CONFLICT)
494 .entity(conflict)
495 .build();
496 }
497
498 SolrDocument tmDoc = null;
499
500 if (tmId != null) {
501 tmDoc = SolrUtil.getTmByTmId(tmId);
502
503 if (tmDoc == null) {
504 return Response.error(404,
505 "TM not found, we can import to an existing TM only (tmId = {})", tmId);
506 }
507 }
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522 SolrInputDocument updateDoc = SolrUtil.toInputDocument(doc);
523
524 String docFileName = doc.getFieldValue(Const.ATH_PROP_DOC_FILE_NAME).toString();
525
526 updateDoc.setField(Const.ATH_PROP_STATUS, DocumentStatus.EXPORTING.toString());
527 updateDoc.setField(Const.ATH_PROP_PROCESSED_BY, userId.toString());
528 updateDoc.setField(Const.ATH_PROP_STARTED_AT, new Date());
529 updateDoc.setField(Const.ATH_PROP_FINISHED_AT, null);
530 updateDoc.setField(Const.ATH_PROP_ERROR_MESSAGE, null);
531 updateDoc.setField(Const.ATH_PROP_ERROR_TYPE, null);
532
533 updateDoc.setField(Const.ATH_PROP_DOC_OUT_STORAGE_NAME, docOutGcsUrl.toString());
534
535 if (docOutEncoding == null) {
536 docOutEncoding = doc.getFieldValue(Const.ATH_PROP_DOC_OUT_FILE_ENCODING) != null
537 ? doc.getFieldValue(Const.ATH_PROP_DOC_OUT_FILE_ENCODING).toString()
538 : null;
539
540 if (docOutEncoding == null) {
541
542 docOutEncoding = doc.getFieldValue(Const.ATH_PROP_DOC_FILE_ENCODING) != null
543 ? doc.getFieldValue(Const.ATH_PROP_DOC_FILE_ENCODING).toString()
544 : null;
545 }
546 }
547
548 updateDoc.setField(Const.ATH_PROP_DOC_OUT_FILE_ENCODING, docOutEncoding);
549
550 if (tmId != null) {
551 updateDoc.setField(Const.ATH_PROP_EXPORT_TM_ID, tmId.toString());
552 }
553
554 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, updateDoc);
555 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
556
557
558
559
560
561
562
563
564
565 final SolrDocument tmDocRef = tmDoc;
566 final String outEnc = docOutEncoding;
567
568 EXECUTOR.submit(() -> {
569 ResponseContext res = Response.success(200);
570
571 try {
572 res = ControllerUtil.exportFile(doc, tmDocRef, updateDoc, docOutGcsUrl, outEnc,
573 tmId, userId);
574
575
576 if (res.getStatus() == 200) {
577 Log.info(getClass(), "Export of '{}' succeeded", docFileName);
578 updateDoc.setField(Const.ATH_PROP_STATUS, DocumentStatus.EXPORT_COMPLETED.toString());
579
580 } else {
581 Log.warn(getClass(), Response.getMessage(res));
582 updateDoc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
583 updateDoc.setField(Const.ATH_PROP_ERROR_MESSAGE, Response.getMessage(res));
584 }
585
586 } catch (Exception e) {
587 Log.error(DocumentController.class, e, "Export error");
588 updateDoc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
589 updateDoc.setField(Const.ATH_PROP_ERROR_MESSAGE, e.getMessage());
590 res = Response.error(500, e, "Export error");
591 }
592
593 updateDoc.setField(Const.ATH_PROP_FINISHED_AT, new Date());
594
595 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, updateDoc);
596 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
597
598 return res.getStatus() == 200;
599 });
600
601 ProcessingResponse processingResponse = new ProcessingResponse();
602 processingResponse.setStatus(ProcessingResponse.StatusEnum.EXPORTING);
603 processingResponse.setDocId(docId);
604 processingResponse.setStatusUrl(Log.format("/document/{}/status", docId));
605 processingResponse.setSubmittedAt(new Date());
606
607 return Response.builder()
608 .status(Status.ACCEPTED)
609 .header("Location", Log.format("/document/{}/status", docId))
610 .entity(processingResponse)
611 .build();
612
613 } catch (Exception e) {
614 return Response.error(500, "Error starting document export -- " + e.getMessage());
615 }
616 }
617
618
619
620 public ResponseContext getDocumentStatus(RequestContext request, UUID docId) {
621 if (!ControllerUtil.checkParam(docId)) {
622 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
623 }
624
625 String query = Log.format("docId:\"{}\"", docId);
626
627 try {
628 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
629 QueryResponse.class);
630
631 if (response.getResults().isEmpty()) {
632 return Response.error(404, "Document not found, docId: " + docId);
633 }
634
635 SolrDocument doc = response.getResults().get(0);
636 String status = SolrUtil.safeGetField(doc, Const.ATH_PROP_STATUS, null);
637
638 if (DocumentStatus.IMPORTING.toString().equals(status)
639 || DocumentStatus.EXPORTING.toString().equals(status)) {
640
641 ProcessingStatusResponse processingResponse = new ProcessingStatusResponse();
642 processingResponse.setStatus(AthUtil.safeToEnum(status,
643 ProcessingStatusResponse.StatusEnum.class, null));
644 processingResponse.setDocId(docId);
645
646 Date startedAt = AthUtil.safeToDate(doc.get(Const.ATH_PROP_STARTED_AT), null);
647 processingResponse.setStartedAt(startedAt);
648
649 return Response.builder()
650 .status(Status.ACCEPTED)
651 .header("Retry-After", "30")
652 .entity(processingResponse)
653 .build();
654
655 } else if (DocumentStatus.IMPORT_COMPLETED.toString().equals(status)) {
656
657 ImportCompletedStatusResponse completedResponse = new ImportCompletedStatusResponse();
658 completedResponse.setStatus(AthUtil.safeToEnum(status,
659 ImportCompletedStatusResponse.StatusEnum.class,
660 ImportCompletedStatusResponse.StatusEnum.IMPORT_COMPLETED));
661 completedResponse.setDocId(docId);
662
663 Date finishedAt = AthUtil.safeToDate(doc.get(Const.ATH_PROP_FINISHED_AT), null);
664 completedResponse.setCompletedAt(finishedAt);
665 completedResponse.setExportUrl(Log.format("/document/{}/export", docId));
666
667 return Response.success(200, completedResponse);
668
669 } else if (DocumentStatus.EXPORT_COMPLETED.toString().equals(status)) {
670
671 ExportCompletedStatusResponse exportResponse = new ExportCompletedStatusResponse();
672 exportResponse.setStatus(AthUtil.safeToEnum(status,
673 ExportCompletedStatusResponse.StatusEnum.class,
674 ExportCompletedStatusResponse.StatusEnum.EXPORT_COMPLETED));
675 exportResponse.setDocId(docId);
676
677 Date finishedAt = AthUtil.safeToDate(doc.get(Const.ATH_PROP_FINISHED_AT), null);
678 exportResponse.setCompletedAt(finishedAt);
679 exportResponse.setDownloadUrl(Log.format("/document/{}", docId));
680
681 return Response.success(200, exportResponse);
682
683 } else if ("FAILED".equals(status)) {
684
685 FailedStatusResponse failedResponse = new FailedStatusResponse();
686 failedResponse.setStatus(FailedStatusResponse.StatusEnum.FAILED);
687 failedResponse.setDocId(docId);
688
689 String errorMessage = doc.getFieldValue(Const.ATH_PROP_ERROR_MESSAGE) != null
690 ? doc.getFieldValue(Const.ATH_PROP_ERROR_MESSAGE).toString()
691 : "Unknown error";
692 failedResponse.setErrorMessage(errorMessage);
693
694 String errorType = SolrUtil.safeGetField(doc, Const.ATH_PROP_ERROR_TYPE,
695 "UNKNOWN_ERROR");
696
697 failedResponse.setErrorType(AthUtil.safeToEnum(errorType,
698 FailedStatusResponse.ErrorTypeEnum.class,
699 FailedStatusResponse.ErrorTypeEnum.UNKNOWN_ERROR));
700
701 Date finishedAt = AthUtil.safeToDate(doc.get(Const.ATH_PROP_FINISHED_AT), null);
702 failedResponse.setFailedAt(finishedAt);
703
704 return Response.success(200, failedResponse);
705
706 } else {
707
708 return Response.error(500, "Unknown document status: " + status);
709 }
710
711 } catch (Exception e) {
712 return Response.error(500, "Error getting document status -- " + e.getMessage());
713 }
714 }
715
716
717
718 public ResponseContext downloadDocument(RequestContext request, UUID docId) {
719 if (!ControllerUtil.checkParam(docId)) {
720 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
721 }
722
723 String query = Log.format("docId:\"{}\"", docId);
724
725 try {
726 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
727 QueryResponse.class);
728
729 if (response.getResults().isEmpty()) {
730 return Response.error(404, "Document not found, docId: " + docId);
731 }
732
733 SolrDocument doc = response.getResults().get(0);
734
735 String status = SolrUtil.safeGetField(doc, Const.ATH_PROP_STATUS, null);
736
737 if (DocumentStatus.EXPORT_COMPLETED.toString().equals(status)) {
738
739 String docFileName = SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_FILE_NAME,
740 "download");
741
742 String docOutStorageName = SolrUtil.safeGetField(doc,
743 Const.ATH_PROP_DOC_OUT_STORAGE_NAME, null);
744
745 if (docOutStorageName == null) {
746 return Response.error(500,
747 "Cannot create the export file -- GCS storage name (doc_out_storage_name) is not "
748 + "specified");
749 }
750
751 File exportedFile = null;
752
753 try {
754 exportedFile = AthUtil.createTempFile();
755
756
757 AthStorage.loadFile(AthUtil.toURI(docOutStorageName), exportedFile);
758
759 } catch (IOException e) {
760 return Response.error(500, "Cannot create temp file -- " + e.getMessage());
761 }
762
763 return Response.builder()
764 .status(Status.OK)
765 .header("Content-Disposition",
766 Log.format("attachment; filename=\"{}\"", docFileName))
767 .entity(exportedFile)
768 .build();
769
770 } else if (DocumentStatus.IMPORTING.toString().equals(status)) {
771 ConflictDownloadResponse conflict = new ConflictDownloadResponse();
772 conflict.setError("Document not ready for download");
773 conflict.setStatus(AthUtil.safeToEnum(status,
774 ConflictDownloadResponse.StatusEnum.class, null));
775 conflict.setMessage("Document import is still in progress. Check status endpoint.");
776 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
777
778 return Response.builder()
779 .status(Status.CONFLICT)
780 .entity(conflict)
781 .build();
782
783 } else if (DocumentStatus.IMPORT_COMPLETED.toString().equals(status)) {
784 ConflictDownloadResponse conflict = new ConflictDownloadResponse();
785 conflict.setError("Document not ready for download");
786 conflict.setStatus(AthUtil.safeToEnum(status,
787 ConflictDownloadResponse.StatusEnum.class, null));
788 conflict.setMessage("Call POST /document/{doc_id}/export to generate the translation");
789 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
790 conflict.setExportUrl(Log.format("/document/{}/export", docId));
791
792 return Response.builder()
793 .status(Status.CONFLICT)
794 .entity(conflict)
795 .build();
796
797 } else if (DocumentStatus.EXPORTING.toString().equals(status)) {
798 ConflictDownloadResponse conflict = new ConflictDownloadResponse();
799 conflict.setError("Document not ready for download");
800 conflict.setStatus(AthUtil.safeToEnum(status,
801 ConflictDownloadResponse.StatusEnum.class, null));
802 conflict.setMessage("Document export is still in progress. Check status endpoint.");
803 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
804
805 return Response.builder()
806 .status(Status.CONFLICT)
807 .entity(conflict)
808 .build();
809
810 } else {
811 return Response.error(500, "Unexpected document status: " + status);
812 }
813
814 } catch (Exception e) {
815 return Response.error(500, "Error downloading document -- " + e.getMessage());
816 }
817 }
818
819 public ResponseContext deleteDocument(RequestContext request, UUID docId) {
820 if (!ControllerUtil.checkParam(docId)) {
821 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
822 }
823
824 String query = Log.format("docId:\"{}\"", docId);
825
826 try {
827
828 if (SolrUtil.getNumDocuments(Const.SOLR_CORE_ATH_DOCS, query) <= 0) {
829 return Response.error(404, "Document not found, docId: " + docId);
830 }
831
832
833 AthIndex.deleteByQuery(Const.SOLR_CORE_ATH_DOCS, query);
834 AthIndex.deleteByQuery(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query);
835
836
837
838 return Response.success(204, "Document (id={}) was deleted successfully", docId);
839
840 } catch (Exception e) {
841 String st = Log.format("Error deleting Document (id={}) -- {}", docId, e.getMessage());
842 Log.error(this.getClass(), e, st);
843 return Response.error(500, st);
844 }
845 }
846
847
848
849 public ResponseContext getDocumentSegments(RequestContext request, UUID docId,
850 Integer page, Integer pageSize) {
851
852 if (!ControllerUtil.checkParam(docId)) {
853 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
854 }
855
856 String query = Log.format("docId:\"{}\"", docId);
857
858 try {
859 long totalItems = SolrUtil.getNumDocuments(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query);
860
861 if (totalItems <= 0) {
862 DocumentSegmentsWrapper emptyWrapper = new DocumentSegmentsWrapper()
863 .documentSegments(new ArrayList<>())
864 .pagination(new PaginationInfo()
865 .page(1)
866 .pageSize(0)
867 .totalItems(0L)
868 .totalPages(0)
869 .hasNext(false)
870 .hasPrevious(false));
871
872 return Response.success(200, emptyWrapper);
873 }
874
875
876 int size = (pageSize != null) ? Math.max(1, Math.min(100, pageSize)) : (int) totalItems;
877 int totalPages = (int) Math.ceil(totalItems / (double) size);
878 int pageNum = (page != null) ? Math.max(1, Math.min(page, Math.max(1, totalPages))) : 1;
879
880 SolrClient solrClient = AthIndex.getSolr().getClient();
881 SolrQuery solrQuery = new SolrQuery(query)
882 .setStart((pageNum - 1) * size)
883 .setRows(size)
884 .addSort(Const.ATH_PROP_POSITION, SolrQuery.ORDER.asc);
885
886 QueryResponse response = solrClient.query(Const.SOLR_CORE_ATH_DOC_SEGMENTS, solrQuery);
887 SolrDocumentList docList = response.getResults();
888
889 List<DocumentSegment> segments = docList.stream()
890 .map(this::toDocumentSegment)
891 .filter(Objects::nonNull)
892 .collect(Collectors.toList());
893
894
895
896 PaginationInfo pagination = new PaginationInfo()
897 .page(pageNum)
898 .pageSize(size)
899 .totalItems(totalItems)
900 .totalPages(totalPages)
901 .hasNext(pageNum < totalPages)
902 .hasPrevious(pageNum > 1);
903
904 DocumentSegmentsWrapper wrapper = new DocumentSegmentsWrapper()
905 .documentSegments(segments)
906 .pagination(pagination);
907
908 return Response.success(200, wrapper);
909
910 } catch (Exception e) {
911 return Response.error(500, e, "Error fetching document segments");
912 }
913 }
914
915
916
917
918
919
920
921
922
923
924
925
926 public ResponseContext createDocumentSegment(RequestContext request, UUID docId,
927 JsonNode bodyNode) {
928
929 if (!ControllerUtil.checkParam(docId)) {
930 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
931 }
932
933 if (!ControllerUtil.checkParam(bodyNode)) {
934 return Response.error(400, "Invalid request parameter, bodyNode is null");
935 }
936
937 CreateDocumentSegmentRequest body = AthUtil.safeFromJsonNode(bodyNode,
938 CreateDocumentSegmentRequest.class, null);
939
940 if (body == null) {
941 return Response.error(400, "Invalid request body");
942 }
943
944 try {
945 LayeredTextX source = body.getSource();
946 LayeredTextX target = body.getTarget();
947 Long position = body.getPosition();
948 Origin origin = body.getOrigin();
949 UUID userId = body.getUserId();
950
951 SolrDocument solrDoc = SolrUtil.getDocumentByDocId(docId);
952
953 if (solrDoc == null) {
954 return Response.error(404, "Document not found, docId: " + docId);
955 }
956
957
958
959 SolrUtil.moveDocSegmentsBelow(docId, position);
960
961 DocumentSegment segment = new DocumentSegment();
962
963 segment.setSource(source);
964 segment.setTarget(target);
965 segment.setPosition(position);
966 segment.setOrigin(origin);
967
968 segment.setId(SolrUtil.buildDocSegSolrId(docId, position));
969 segment.setDocFileName(SolrUtil.safeGetField(solrDoc, Const.ATH_PROP_DOC_FILE_NAME, null));
970
971 segment.setDocId(docId);
972 segment.setDocSegId(UUID.randomUUID());
973 segment.setSrcLang(SolrUtil.safeGetField(solrDoc, Const.ATH_PROP_SRC_LANG, null));
974 segment.setTrgLang(SolrUtil.safeGetField(solrDoc, Const.ATH_PROP_TRG_LANG, null));
975
976 segment.setCreatedBy(userId);
977 segment.setCreatedAt(new Date());
978
979
980 SolrInputDocument segDoc = toSolrDoc(segment);
981 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOC_SEGMENTS, segDoc);
982 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOC_SEGMENTS);
983
984 return Response.success(201, "Document segment created successfully");
985
986 } catch (Exception e) {
987 return Response.error(500, "Error creating document segment -- " + e.getMessage());
988 }
989 }
990
991 public ResponseContext getDocumentSegment(RequestContext request, UUID docId,
992 UUID docSegId) {
993
994 if (!ControllerUtil.checkParam(docId)) {
995 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
996 }
997
998 if (!ControllerUtil.checkParam(docSegId)) {
999 return Response.error(400, "Invalid request parameter Segment Id: " + docSegId);
1000 }
1001
1002 String query = Log.format("docId:\"{}\" AND docSegId:\"{}\"", docId, docSegId);
1003
1004 try {
1005 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query, null,
1006 QueryResponse.class);
1007
1008 SolrDocumentList docList = response.getResults();
1009
1010 if (docList.isEmpty()) {
1011 return Response.error(404, "Document segment not found");
1012 }
1013
1014 SolrDocument doc = docList.get(0);
1015 DocumentSegment segment = toDocumentSegment(doc);
1016
1017 if (segment == null) {
1018 return Response.error(500, "Error parsing segment data");
1019 }
1020
1021 DocumentSegmentWrapper wrapper = new DocumentSegmentWrapper();
1022 wrapper.setDocumentSegment(segment);
1023
1024 return Response.success(200, wrapper);
1025
1026 } catch (Exception e) {
1027 return Response.error(500, "Error fetching document segment -- " + e.getMessage());
1028 }
1029 }
1030
1031 public ResponseContext updateDocumentSegment(RequestContext request, UUID docId,
1032 UUID docSegId, JsonNode bodyNode) {
1033
1034 if (!ControllerUtil.checkParam(docId)) {
1035 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1036 }
1037
1038 if (!ControllerUtil.checkParam(docSegId)) {
1039 return Response.error(400, "Invalid request parameter Segment Id: " + docSegId);
1040 }
1041
1042 if (!ControllerUtil.checkParam(bodyNode)) {
1043 return Response.error(400, "Invalid request, bodyNode is null");
1044 }
1045
1046 UpdateDocumentSegmentRequest body = AthUtil.safeFromJsonNode(bodyNode,
1047 UpdateDocumentSegmentRequest.class, null);
1048
1049 if (body == null) {
1050 return Response.error(400, "Invalid request body");
1051 }
1052
1053 try {
1054 LayeredTextX target = body.getTarget();
1055 Origin origin = body.getOrigin();
1056 UUID userId = body.getUserId();
1057
1058 SolrDocument solrDoc = SolrUtil.getDocumentSegment(docId, docSegId);
1059
1060
1061 DocumentSegment segment = toDocumentSegment(solrDoc);
1062
1063 segment.setTarget(target);
1064 segment.setOrigin(origin);
1065
1066 segment.setUpdatedBy(userId);
1067 segment.setUpdatedAt(new Date());
1068
1069
1070 SolrInputDocument segDoc = toSolrDoc(segment);
1071 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOC_SEGMENTS, segDoc);
1072 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOC_SEGMENTS);
1073
1074 return Response.success(200, "Document segment updated successfully");
1075
1076 } catch (Exception e) {
1077 return Response.error(500, "Error updating document segment -- " + e.getMessage());
1078 }
1079 }
1080
1081 public ResponseContext deleteDocumentSegment(RequestContext request, UUID docId,
1082 UUID docSegId) {
1083
1084 if (!ControllerUtil.checkParam(docId)) {
1085 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1086 }
1087
1088 if (!ControllerUtil.checkParam(docSegId)) {
1089 return Response.error(400, "Invalid request parameter Segment Id: " + docSegId);
1090 }
1091
1092 String query = Log.format("docId:\"{}\" AND docSegId:\"{}\"", docId, docSegId);
1093
1094 try {
1095 if (SolrUtil.getNumDocuments(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query) <= 0) {
1096 return Response.error(404, "Document segment not found");
1097 }
1098
1099 AthIndex.deleteByQuery(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query);
1100
1101 return Response.success(204, "Document segment deleted successfully");
1102
1103 } catch (Exception e) {
1104 String st = Log.format("Error deleting document segment (docId={}, segId={}) -- {}",
1105 docId, docSegId, e.getMessage());
1106 Log.error(this.getClass(), e, st);
1107 return Response.error(500, st);
1108 }
1109 }
1110
1111
1112
1113 private DocumentSegment toDocumentSegment(SolrDocument doc) {
1114 try {
1115 DocumentSegment segment = new DocumentSegment();
1116
1117
1118 segment.setId(SolrUtil.safeGetField(doc, Const.ATH_PROP_SOLR_ID, null));
1119
1120 segment.setDocId(AthUtil.safeToUuid(
1121 SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_ID, null), null));
1122
1123 segment.setDocSegId(AthUtil.safeToUuid(
1124 SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_SEG_ID, null), null));
1125
1126 segment.setTuId(SolrUtil.safeGetField(doc, Const.ATH_PROP_TU_ID, null));
1127
1128 segment.setDocFileName(SolrUtil.safeGetField(doc, Const.ATH_PROP_DOC_FILE_NAME, null));
1129 segment.setSrcLang(SolrUtil.safeGetField(doc, Const.ATH_PROP_SRC_LANG, null));
1130 segment.setTrgLang(SolrUtil.safeGetField(doc, Const.ATH_PROP_TRG_LANG, null));
1131
1132
1133 segment.setPosition(SolrUtil.safeGetLongField(doc, Const.ATH_PROP_POSITION, null));
1134
1135
1136 String sourceJson = doc.getFieldValue(Const.ATH_PROP_SOURCE_JSON) != null
1137 ? doc.getFieldValue(Const.ATH_PROP_SOURCE_JSON).toString()
1138 : null;
1139
1140 String targetJson = doc.getFieldValue(Const.ATH_PROP_TARGET_JSON) != null
1141 ? doc.getFieldValue(Const.ATH_PROP_TARGET_JSON).toString()
1142 : null;
1143
1144 if (sourceJson != null) {
1145 LayeredTextX slt = JacksonUtil.fromJson(sourceJson, LayeredTextX.class);
1146 segment.setSource(slt);
1147 }
1148
1149 if (targetJson != null) {
1150 LayeredTextX tlt = JacksonUtil.fromJson(targetJson, LayeredTextX.class);
1151 segment.setTarget(tlt);
1152 }
1153
1154
1155 segment.setOrigin(Origin.HT);
1156 segment.setTmMatchScore(0);
1157 segment.setMtConfidenceScore(0d);
1158
1159 segment.setOrigin(AthUtil.safeToEnum(
1160 SolrUtil.safeGetField(doc, Const.ATH_PROP_ORIGIN, null),
1161 Origin.class, null));
1162
1163 if (segment.getOrigin() == Origin.TM) {
1164 segment.setTmMatchScore(AthUtil.safeToInt(
1165 SolrUtil.safeGetField(doc, Const.ATH_PROP_TM_MATCH_SCORE, null), 0));
1166
1167 } else if (segment.getOrigin() == Origin.MT) {
1168 segment.setMtConfidenceScore(AthUtil.safeToDouble(
1169 doc.getFieldValue(Const.ATH_PROP_MT_CONFIDENCE_SCORE), 0d));
1170 }
1171
1172 String altTransJson = doc.getFieldValue(Const.ATH_PROP_ALT_TRANS_JSON) != null
1173 ? doc.getFieldValue(Const.ATH_PROP_ALT_TRANS_JSON).toString()
1174 : null;
1175
1176 if (altTransJson != null) {
1177 TypeReference<List<MtTargetInfo>> ref = new TypeReference<>() {
1178 };
1179
1180 List<MtTargetInfo> altTrans = JacksonUtil.fromJson(altTransJson, ref);
1181 segment.setMtTargets(altTrans);
1182 }
1183
1184 segment.setMtTargetIndex(SolrUtil.safeGetIntField(doc, Const.ATH_PROP_ALT_TRANS_INDEX, -1));
1185
1186 if (segment.getMtTargetIndex() == -1
1187 && segment.getMtTargets() != null
1188 && segment.getMtTargets().size() > 0) {
1189
1190 segment.setMtTargetIndex(0);
1191
1192 if (segment.getMtConfidenceScore() == 0) {
1193 MtTargetInfo mti = segment.getMtTargets().get(0);
1194 segment.setMtConfidenceScore(mti.getMtConfidenceScore());
1195 }
1196 }
1197
1198
1199 segment.setCreatedAt(AthUtil.safeToDate(doc.get(Const.ATH_PROP_CREATED_AT), null));
1200 segment.setUpdatedAt(AthUtil.safeToDate(doc.get(Const.ATH_PROP_UPDATED_AT), null));
1201
1202 segment.setCreatedBy(AthUtil.safeToUuid(
1203 SolrUtil.safeGetField(doc, Const.ATH_PROP_CREATED_BY, null), null));
1204
1205 segment.setUpdatedBy(AthUtil.safeToUuid(
1206 SolrUtil.safeGetField(doc, Const.ATH_PROP_UPDATED_BY, null), null));
1207
1208 return segment;
1209
1210 } catch (Exception e) {
1211 Log.error(this.getClass(), e, "Error converting Solr document to DocumentSegment");
1212 return null;
1213 }
1214 }
1215
1216 private SolrInputDocument toSolrDoc(DocumentSegment segment) throws AthException {
1217 SolrInputDocument doc = new SolrInputDocument();
1218
1219 if (segment.getId() != null) {
1220 doc.addField(Const.ATH_PROP_SOLR_ID, segment.getId().toString());
1221 }
1222
1223 if (segment.getDocSegId() != null) {
1224 doc.addField(Const.ATH_PROP_DOC_SEG_ID, segment.getDocSegId().toString());
1225 }
1226
1227 if (segment.getDocId() != null) {
1228 doc.addField(Const.ATH_PROP_DOC_ID, segment.getDocId().toString());
1229 }
1230
1231 if (segment.getTuId() != null) {
1232 doc.addField(Const.ATH_PROP_TU_ID, segment.getTuId());
1233 }
1234
1235 if (segment.getDocFileName() != null) {
1236 doc.addField(Const.ATH_PROP_DOC_FILE_NAME, segment.getDocFileName());
1237 }
1238
1239 if (segment.getSrcLang() != null) {
1240 doc.addField(Const.ATH_PROP_SRC_LANG, segment.getSrcLang());
1241 }
1242
1243 if (segment.getTrgLang() != null) {
1244 doc.addField(Const.ATH_PROP_TRG_LANG, segment.getTrgLang());
1245 }
1246
1247 if (segment.getPosition() != null) {
1248 doc.addField(Const.ATH_PROP_POSITION, segment.getPosition().toString());
1249 }
1250
1251
1252 if (segment.getSource() != null) {
1253 String sourceJson = JacksonUtil.toJson(segment.getSource(), false);
1254 doc.addField(Const.ATH_PROP_SOURCE_JSON, sourceJson);
1255
1256
1257 if (segment.getSource().getText() != null) {
1258 doc.addField(Const.ATH_PROP_SOURCE, segment.getSource().getText());
1259 }
1260
1261
1262 if (segment.getSource().getTextWithCodes() != null) {
1263 doc.addField(Const.ATH_PROP_SOURCE_WITH_CODES,
1264 segment.getSource().getTextWithCodes());
1265 }
1266 }
1267
1268 if (segment.getTarget() != null) {
1269 String targetJson = JacksonUtil.toJson(segment.getTarget(), false);
1270 doc.addField(Const.ATH_PROP_TARGET_JSON, targetJson);
1271
1272
1273 if (segment.getTarget().getText() != null) {
1274 doc.addField(Const.ATH_PROP_TARGET, segment.getTarget().getText());
1275 }
1276
1277
1278 if (segment.getTarget().getTextWithCodes() != null) {
1279 doc.addField(Const.ATH_PROP_TARGET_WITH_CODES,
1280 segment.getTarget().getTextWithCodes());
1281 }
1282 }
1283
1284 if (segment.getOrigin() != null) {
1285 doc.addField(Const.ATH_PROP_ORIGIN, segment.getOrigin().name());
1286 }
1287
1288 List<MtTargetInfo> altTrans = segment.getMtTargets();
1289
1290 if (altTrans != null) {
1291 String altTransJson = JacksonUtil.toJson(altTrans, false);
1292 doc.addField(Const.ATH_PROP_ALT_TRANS_JSON, altTransJson);
1293 }
1294
1295 doc.addField(Const.ATH_PROP_ALT_TRANS_INDEX, -1);
1296
1297 if (segment.getTmMatchScore() != null) {
1298 doc.addField(Const.ATH_PROP_TM_MATCH_SCORE, segment.getTmMatchScore());
1299 }
1300
1301 if (segment.getMtConfidenceScore() != null) {
1302 doc.addField(Const.ATH_PROP_MT_CONFIDENCE_SCORE, segment.getMtConfidenceScore());
1303 }
1304
1305
1306 if (segment.getCreatedAt() != null) {
1307 doc.addField(Const.ATH_PROP_CREATED_AT, segment.getCreatedAt());
1308 }
1309
1310 if (segment.getUpdatedAt() != null) {
1311 doc.addField(Const.ATH_PROP_UPDATED_AT, segment.getUpdatedAt());
1312 }
1313
1314
1315 if (segment.getCreatedBy() != null) {
1316 doc.addField(Const.ATH_PROP_CREATED_BY, segment.getCreatedBy().toString());
1317 }
1318
1319 if (segment.getUpdatedBy() != null) {
1320 doc.addField(Const.ATH_PROP_UPDATED_BY, segment.getUpdatedBy().toString());
1321 }
1322
1323 return doc;
1324 }
1325
1326 public ResponseContext getModifiedSegmentsSummary(RequestContext request, UUID docId,
1327 Integer page, Integer pageSize) {
1328
1329 if (!ControllerUtil.checkParam(docId)) {
1330 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1331 }
1332
1333 try {
1334 SolrDocument docInfo = SolrUtil.getDocumentByDocId(docId);
1335
1336 if (docInfo == null) {
1337 return Response.error(404, "Document not found");
1338 }
1339
1340 Date docUpdatedAt = AthUtil.safeToDate(docInfo.get(Const.ATH_PROP_UPDATED_AT), null);
1341
1342
1343 String query;
1344 if (docUpdatedAt == null) {
1345
1346
1347 query = Log.format("docId:\"{}\" AND updatedAt:[* TO *]", docId);
1348 } else {
1349
1350
1351 query = Log.format("docId:\"{}\" AND updatedAt:{{} TO *}", docId,
1352 AthUtil.dateToString(Const.QUARTZ_DATE_FORMAT, docUpdatedAt));
1353 }
1354
1355 long totalItems = SolrUtil.getNumDocuments(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query);
1356
1357 int pageNum = (page != null && page >= 1) ? page : 1;
1358 int size = (pageSize != null) ? Math.max(1, Math.min(100, pageSize)) : (int) totalItems;
1359 int totalPages = (int) Math.ceil((double) totalItems / size);
1360
1361 if (pageNum > totalPages && totalPages > 0) {
1362 pageNum = totalPages;
1363 }
1364
1365 SolrQuery solrQuery = new SolrQuery(query)
1366 .setRows(size)
1367 .setStart((pageNum - 1) * size)
1368 .addSort(Const.ATH_PROP_POSITION, SolrQuery.ORDER.asc);
1369
1370 QueryResponse resp = AthIndex.getSolr().getClient().query(Const.SOLR_CORE_ATH_DOC_SEGMENTS,
1371 solrQuery);
1372
1373 List<DocumentSegment> segments = resp.getResults().stream()
1374 .map(this::toDocumentSegment)
1375 .filter(Objects::nonNull)
1376 .collect(Collectors.toList());
1377
1378 PaginationInfo pagination = new PaginationInfo()
1379 .page(pageNum)
1380 .pageSize(size)
1381 .totalItems(totalItems)
1382 .totalPages(totalPages)
1383 .hasNext(pageNum < totalPages)
1384 .hasPrevious(pageNum > 1);
1385
1386 ModifiedSegmentsWrapper wrapper = new ModifiedSegmentsWrapper()
1387 .modSegments(segments)
1388 .pagination(pagination);
1389
1390 return Response.success(200, wrapper);
1391
1392 } catch (Exception e) {
1393 return Response.error(500, e, "Error fetching modified segments");
1394 }
1395 }
1396
1397 public ResponseContext clearModifiedSegmentsSummary(RequestContext request, UUID docId) {
1398 if (!ControllerUtil.checkParam(docId)) {
1399 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1400 }
1401
1402 try {
1403
1404 SolrDocument existingDoc = SolrUtil.getDocumentByDocId(docId);
1405
1406 if (existingDoc == null) {
1407 return Response.error(404, "Document not found, docId: " + docId);
1408 }
1409
1410
1411 SolrInputDocument doc = SolrUtil.toInputDocument(existingDoc);
1412 doc.setField(Const.ATH_PROP_UPDATED_AT, new Date());
1413
1414 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
1415 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
1416
1417 return Response.success(200, "Modified segments summary cleared successfully");
1418
1419 } catch (Exception e) {
1420 return Response.error(500, "Error clearing modified segments summary -- " + e.getMessage());
1421 }
1422 }
1423
1424 public ResponseContext createDocument(RequestContext request, File docFile, String docUrl,
1425 String docFileName, String docEncoding, UUID docId, String docGcsUrl, UUID userId) {
1426
1427
1428
1429
1430 docFileName = docFile != null
1431 ? Util.getFilename(docFile.getAbsolutePath(), true) :
1432 Util.getFilename(docGcsUrl, true);
1433
1434 if (!ControllerUtil.checkParam(docFileName)) {
1435 return Response.error(400, "Invalid request, doc_file_name is not specified");
1436 }
1437
1438 if (!ControllerUtil.checkParam(docId)) {
1439 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1440 }
1441
1442 if (!ControllerUtil.checkParam(docGcsUrl)) {
1443 return Response.error(400, "Invalid request, doc_gcs_url is not specified");
1444 }
1445
1446 if (!ControllerUtil.checkParam(userId)) {
1447 return Response.error(400, "Invalid request parameter User Id: " + userId);
1448 }
1449
1450
1451 fileToImport = null;
1452
1453 try {
1454
1455 if (docFile != null && docFile.exists()) {
1456 fileToImport = docFile;
1457 isTempFile = false;
1458 Log.info(getClass(), "Using uploaded file: {}", docFile.getAbsolutePath());
1459 }
1460
1461
1462 else if (!Util.isEmpty(docUrl)) {
1463 URI uri = AthUtil.toURI(docUrl);
1464
1465 if (!uri.getScheme().matches("^(http|https)$")) {
1466 return Response.error(400, "doc_url must be HTTP or HTTPS");
1467 }
1468
1469 File tempFile = AthUtil.createTempFile();
1470
1471 if (tempFile == null) {
1472 return Response.error(500, "Failed to create temp file for URL download");
1473 }
1474
1475 try (CloseableHttpClient client = HttpClients.createDefault()) {
1476 HttpGet httpGet = new HttpGet(docUrl);
1477
1478 client.execute(httpGet, response -> {
1479 if (response.getStatusLine().getStatusCode() >= 400) {
1480 throw new IOException("HTTP " + response.getStatusLine().getStatusCode());
1481 }
1482
1483 try (InputStream in = response.getEntity().getContent()) {
1484 FileUtils.copyInputStreamToFile(in, tempFile);
1485 }
1486
1487 return null;
1488 });
1489 }
1490
1491 fileToImport = tempFile;
1492 isTempFile = true;
1493 Log.info(getClass(), "Downloaded from URL to temp file: {}", tempFile.getAbsolutePath());
1494 }
1495
1496
1497 String query = Log.format("docId:\"{}\"", docId);
1498 SolrDocument existingDoc = null;
1499
1500 try {
1501 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
1502 QueryResponse.class);
1503
1504 if (!response.getResults().isEmpty()) {
1505 existingDoc = response.getResults().get(0);
1506 }
1507
1508 } catch (Exception e) {
1509 Log.error(this.getClass(), e, "Error checking document existence");
1510 }
1511
1512
1513 if (fileToImport != null) {
1514 AthStorage.storeFile(AthUtil.toURI(docGcsUrl), OkapiUtil.getMimeType(docGcsUrl),
1515 fileToImport);
1516 }
1517
1518
1519 SolrInputDocument doc = existingDoc == null ? new SolrInputDocument()
1520 : SolrUtil.toInputDocument(existingDoc);
1521
1522 doc.setField(Const.ATH_PROP_DOC_ID, docId.toString());
1523 doc.setField(Const.ATH_PROP_DOC_FILE_NAME, docFileName);
1524 doc.setField(Const.ATH_PROP_DOC_STORAGE_NAME, docGcsUrl.toString());
1525 doc.setField(Const.ATH_PROP_DOC_FILE_ENCODING, docEncoding);
1526 doc.setField(Const.ATH_PROP_CREATED_BY, userId.toString());
1527 doc.setField(Const.ATH_PROP_CREATED_AT, new Date());
1528
1529 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
1530 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
1531
1532
1533 if (isTempFile && fileToImport != null && fileToImport.exists()) {
1534 try {
1535 Files.deleteIfExists(fileToImport.toPath());
1536 Log.info(getClass(), "Deleted temp file: {}", fileToImport.getAbsolutePath());
1537
1538 } catch (Exception ex) {
1539 Log.warn(getClass(), "Failed to delete temp file: {}", ex.getMessage());
1540 }
1541 }
1542
1543 return Response.success(201, "Document uploaded successfully");
1544
1545 } catch (Exception e) {
1546 return Response.error(500, e, "Error uploading document");
1547
1548 } finally {
1549 if (isTempFile && fileToImport != null && fileToImport.exists()) {
1550 try {
1551 Files.deleteIfExists(fileToImport.toPath());
1552
1553 } catch (Exception ignored) {
1554 }
1555 }
1556 }
1557 }
1558
1559 public ResponseContext alignDocument(RequestContext request, UUID docId, JsonNode bodyNode) {
1560
1561 if (!ControllerUtil.checkParam(docId)) {
1562 return Response.error(400, "Invalid request parameter Doc Id: " + docId);
1563 }
1564
1565 if (!ControllerUtil.checkParam(bodyNode)) {
1566 return Response.error(400, "Invalid request parameter, bodyNode is null");
1567 }
1568
1569 AlignDocumentRequest body = AthUtil.safeFromJsonNode(bodyNode,
1570 AlignDocumentRequest.class, null);
1571
1572 if (body == null) {
1573 return Response.error(400, "Invalid request body");
1574 }
1575
1576 String srcLang = body.getSrcLang();
1577 String trgLang = body.getTrgLang();
1578 URI docTrlGcsUrl = body.getDocTrlGcsUrl();
1579 String docTrlEncoding = body.getDocTrlEncoding();
1580 String srcSrx = body.getSrcSrx();
1581 String trgSrx = body.getTrgSrx();
1582 Boolean useAlignmentModel = body.getUseAlignmentModel();
1583 String alignmentModelName = body.getAlignmentModelName();
1584 Boolean useCodesReinsertionModel = body.getUseCodesReinsertionModel();
1585 String codesReinsertionModelName = body.getCodesReinsertionModelName();
1586 UUID userId = body.getUserId();
1587
1588 if (!ControllerUtil.checkParam(srcLang)) {
1589 return Response.error(400, "Invalid request, srcLang is not specified");
1590 }
1591
1592 if (!ControllerUtil.checkParam(trgLang)) {
1593 return Response.error(400, "Invalid request, trgLang is not specified");
1594 }
1595
1596 if (!ControllerUtil.checkParam(docTrlGcsUrl)) {
1597 return Response.error(400, "Invalid request, doc_trl_gcs_url is not specified");
1598 }
1599
1600 if (!ControllerUtil.checkParam(userId)) {
1601 return Response.error(400, "Invalid request parameter User Id: " + userId);
1602 }
1603
1604 if (!AthStorage.exists(docTrlGcsUrl)) {
1605 return Response.error(404, "Translation object not found in GCS: " + docTrlGcsUrl);
1606 }
1607
1608 String query = Log.format("docId:\"{}\"", docId);
1609
1610 try {
1611 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
1612 QueryResponse.class);
1613
1614 if (response.getResults().isEmpty()) {
1615 return Response.error(404, "Document not found, docId: " + docId);
1616 }
1617
1618 SolrDocument existingDoc = response.getResults().get(0);
1619 String status = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_STATUS, null);
1620
1621 if (DocumentStatus.IMPORTING.toString().equals(status)
1622 || DocumentStatus.EXPORTING.toString().equals(status)) {
1623
1624 ConflictResponse conflict = new ConflictResponse();
1625 conflict.setError("Processing already in progress");
1626 conflict.setStatus(AthUtil.safeToEnum(status, ConflictResponse.StatusEnum.class, null));
1627 conflict.setDocId(docId);
1628 conflict.setStatusUrl(Log.format("/document/{}/status", docId));
1629
1630 return Response.builder()
1631 .status(Status.CONFLICT)
1632 .header("Location", Log.format("/document/{}/status", docId))
1633 .entity(conflict)
1634 .build();
1635 }
1636
1637
1638 String docFileName = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_FILE_NAME, null);
1639
1640 URI docGcsUrl = AthUtil
1641 .toURI(SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_STORAGE_NAME, null));
1642
1643 String docEncoding = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_DOC_FILE_ENCODING,
1644 null);
1645
1646 String filterId = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_FILTER_ID, null);
1647 String filterParams = SolrUtil.safeGetField(existingDoc, Const.ATH_PROP_FILTER_PARAMS, null);
1648
1649
1650 if (Util.isEmpty(filterId)) {
1651 String fileExt = Util.getExtension(docFileName);
1652 filterId = ControllerUtil.getFilterId(fileExt);
1653
1654 if (Util.isEmpty(filterId)) {
1655 return Response.error(400, "Cannot determine filter for file: " + docFileName);
1656 }
1657 }
1658
1659
1660 if (Util.isEmpty(docTrlEncoding)) {
1661 docTrlEncoding = docEncoding;
1662 }
1663
1664
1665 SolrInputDocument doc = SolrUtil.toInputDocument(existingDoc);
1666
1667 doc.setField(Const.ATH_PROP_SRC_LANG, srcLang);
1668 doc.setField(Const.ATH_PROP_TRG_LANG, trgLang);
1669 doc.setField(Const.ATH_PROP_DOC_TRL_STORAGE_NAME, docTrlGcsUrl.toString());
1670 doc.setField(Const.ATH_PROP_DOC_TRL_FILE_ENCODING, docTrlEncoding);
1671
1672 SolrUtil.safeSetField(doc, Const.ATH_PROP_FILTER_ID, filterId);
1673 SolrUtil.safeSetField(doc, Const.ATH_PROP_FILTER_PARAMS, filterParams);
1674 SolrUtil.safeSetField(doc, Const.ATH_PROP_SRC_SRX, srcSrx);
1675 SolrUtil.safeSetField(doc, Const.ATH_PROP_TRG_SRX, trgSrx);
1676
1677 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.IMPORTING.toString());
1678 doc.setField(Const.ATH_PROP_PROCESSED_BY, userId.toString());
1679 doc.setField(Const.ATH_PROP_STARTED_AT, new Date());
1680 doc.setField(Const.ATH_PROP_FINISHED_AT, null);
1681 doc.setField(Const.ATH_PROP_UPDATED_BY, userId.toString());
1682 doc.setField(Const.ATH_PROP_UPDATED_AT, new Date());
1683
1684 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
1685 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
1686
1687 final String docTrlEncodingRef = docTrlEncoding;
1688 final String filterIdRef = filterId;
1689
1690
1691 EXECUTOR.submit(() -> {
1692 ResponseContext res = Response.success(200);
1693
1694 try {
1695 res = ControllerUtil.alignFile(
1696 doc,
1697 docId,
1698 null,
1699 docFileName,
1700 docGcsUrl,
1701 docEncoding,
1702 docTrlGcsUrl,
1703 docTrlEncodingRef,
1704 srcLang,
1705 trgLang,
1706 filterIdRef,
1707 filterParams,
1708 srcSrx,
1709 trgSrx,
1710 true,
1711 useAlignmentModel != null ? useAlignmentModel : false,
1712 alignmentModelName,
1713 useCodesReinsertionModel != null ? useCodesReinsertionModel : false,
1714 codesReinsertionModelName,
1715 userId);
1716
1717 if (res.getStatus() == 200) {
1718 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.IMPORT_COMPLETED.toString());
1719
1720 } else {
1721 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
1722 doc.setField(Const.ATH_PROP_ERROR_MESSAGE, Response.getMessage(res));
1723 }
1724
1725 } catch (Exception e) {
1726 Log.error(getClass(), e, "Alignment failed");
1727 doc.setField(Const.ATH_PROP_STATUS, DocumentStatus.FAILED.toString());
1728 doc.setField(Const.ATH_PROP_ERROR_MESSAGE, e.getMessage());
1729 res = Response.error(500, e, "Alignment error");
1730
1731 } finally {
1732 doc.setField(Const.ATH_PROP_FINISHED_AT, new Date());
1733
1734 try {
1735 AthIndex.getSolr().getClient().add(Const.SOLR_CORE_ATH_DOCS, doc);
1736 AthIndex.getSolr().getClient().commit(Const.SOLR_CORE_ATH_DOCS);
1737
1738 } catch (Exception e) {
1739 Log.error(getClass(), e, "Failed to update Solr after alignment");
1740 }
1741 }
1742
1743 return res.getStatus() == 200;
1744 });
1745
1746 ProcessingResponse processingResponse = new ProcessingResponse();
1747 processingResponse.setStatus(ProcessingResponse.StatusEnum.IMPORTING);
1748 processingResponse.setDocId(docId);
1749 processingResponse.setStatusUrl(Log.format("/document/{}/status", docId));
1750 processingResponse.setSubmittedAt(new Date());
1751
1752 return Response.builder()
1753 .status(Status.ACCEPTED)
1754 .header("Location", Log.format("/document/{}/status", docId))
1755 .entity(processingResponse)
1756 .build();
1757
1758 } catch (Exception e) {
1759 return Response.error(500, "Error aligning document -- " + e.getMessage());
1760 }
1761 }
1762 }