View Javadoc
1   package com.acumenvelocity.ath.solr.doc;
2   
3   import java.util.Comparator;
4   import java.util.HashMap;
5   import java.util.List;
6   import java.util.Map;
7   
8   import org.apache.solr.client.solrj.response.QueryResponse;
9   import org.apache.solr.common.SolrDocument;
10  import org.apache.solr.common.SolrDocumentList;
11  
12  import com.acumenvelocity.ath.common.Const;
13  import com.acumenvelocity.ath.common.ConversionUtil;
14  import com.acumenvelocity.ath.common.JacksonUtil;
15  import com.acumenvelocity.ath.common.Log;
16  import com.acumenvelocity.ath.common.exception.AthRuntimeException;
17  import com.acumenvelocity.ath.model.x.LayeredTextX;
18  import com.acumenvelocity.ath.solr.AthIndex;
19  import com.acumenvelocity.ath.solr.Solr;
20  import com.acumenvelocity.ath.steps.BaseMergerStep;
21  
22  import net.sf.okapi.common.LocaleId;
23  import net.sf.okapi.common.resource.ITextUnit;
24  import net.sf.okapi.common.resource.Segment;
25  import net.sf.okapi.common.resource.TextContainer;
26  import net.sf.okapi.common.resource.TextFragment;
27  
28  public class SolrDocMergerStep extends BaseMergerStep {
29  
30    private String docId;
31  
32    public SolrDocMergerStep(String docId) {
33      super();
34      this.docId = docId;
35    }
36  
37    @Override
38    public String getName() {
39      return "Solr Document Merger Step";
40    }
41  
42    @Override
43    public String getDescription() {
44      return "Merges Text Unit resources with translated segments from Solr";
45    }
46  
47    @Override
48    protected void processTu(ITextUnit tu) {
49      try {
50        LocaleId targetLocale = super.getTargetLocale();
51  
52        // Check if target exists for this locale, if not create empty target
53        if (!tu.hasTarget(targetLocale)) {
54          // Create empty target without copying source segmentation
55          tu.createTarget(targetLocale, false, ITextUnit.CREATE_EMPTY);
56        }
57  
58        String query = Log.format("docId:\"{}\" AND tuId:\"{}\"",
59            docId,
60            tu.getId());
61  
62        // Set up query parameters
63        Map<String, Object> queryParams = new HashMap<>();
64  
65        queryParams.put(Solr.FIELDS, new String[] {
66            Const.ATH_PROP_TARGET_JSON,
67            Const.ATH_PROP_POSITION // Included for sorting
68        });
69  
70        // Ensure we get all segments for this text unit
71        queryParams.put(Solr.ROWS, Integer.MAX_VALUE);
72  
73        // Query the ath_doc_segments Solr core
74        QueryResponse response = AthIndex.getMany(
75            Const.SOLR_CORE_ATH_DOC_SEGMENTS,
76            query,
77            queryParams,
78            QueryResponse.class);
79  
80        SolrDocumentList docList = response.getResults();
81  
82        if (docList.getNumFound() > 0) {
83          // Sort documents by position field
84          List<SolrDocument> sortedDocs = new java.util.ArrayList<>(docList);
85  
86          sortedDocs.sort(Comparator.comparingLong(doc -> {
87            Object posObj = doc.get(Const.ATH_PROP_POSITION);
88  
89            if (posObj instanceof Long) {
90              return (Long) posObj;
91  
92            } else if (posObj instanceof Integer) {
93              return ((Integer) posObj).longValue();
94            }
95  
96            return 0L;
97          }));
98  
99          // Always create a new empty target
100         TextContainer target = tu.setTarget(getTargetLocale(), new TextContainer());
101 
102         // Process all segments
103         for (int segmentIndex = 0; segmentIndex < sortedDocs.size(); segmentIndex++) {
104           SolrDocument doc = sortedDocs.get(segmentIndex);
105 
106           // Get targetJson field
107           String targetJson = doc._getStr(Const.ATH_PROP_TARGET_JSON, null);
108 
109           if (targetJson != null) {
110             // Deserialize JSON to LayeredText
111             LayeredTextX targetLt = JacksonUtil.fromJson(targetJson, LayeredTextX.class);
112 
113             if (targetLt != null) {
114               // Convert LayeredText to TextFragment
115               TextFragment targetTf = ConversionUtil.toTextFragment(targetLt);
116 
117               // Create a new segment with the loop index as ID
118               Segment targetSegment = new Segment(String.valueOf(segmentIndex), targetTf);
119 
120               // Append the segment to the target container
121               target.getSegments().append(targetSegment);
122 
123               Log.trace(this.getClass(),
124                   "Loaded target segment for docId={}, segmentIndex={}: {}",
125                   docId, segmentIndex, targetTf.getText());
126 
127             } else {
128               Log.warn(this.getClass(),
129                   "Failed to deserialize targetJson for docId={}, segmentIndex={}",
130                   docId, segmentIndex);
131             }
132 
133           } else {
134             Log.warn(this.getClass(),
135                 "No targetJson found for docId={}, segmentIndex={}",
136                 docId, segmentIndex);
137           }
138         }
139 
140       } else {
141         Log.warn(this.getClass(),
142             "No document found in ath_doc for docId={}, tuId={}",
143             docId, tu.getId());
144       }
145 
146     } catch (Exception e) {
147       AthRuntimeException.logAndThrow(this.getClass(), e);
148     }
149   }
150 
151   @Override
152   protected void setTargetSegment(ITextUnit tu, Segment sseg, Segment tseg, long position) {
153     // Not used, the TU is processed the whole
154   }
155 
156 }