View Javadoc
1   package com.acumenvelocity.ath.solr.doc;
2   
3   import java.util.Date;
4   import java.util.List;
5   import java.util.UUID;
6   
7   import org.apache.solr.client.solrj.SolrClient;
8   import org.apache.solr.common.SolrInputDocument;
9   
10  import com.acumenvelocity.ath.common.Const;
11  import com.acumenvelocity.ath.common.ConversionUtil;
12  import com.acumenvelocity.ath.common.JacksonUtil;
13  import com.acumenvelocity.ath.common.Log;
14  import com.acumenvelocity.ath.common.SolrUtil;
15  import com.acumenvelocity.ath.model.MtTargetInfo;
16  import com.acumenvelocity.ath.model.Origin;
17  import com.acumenvelocity.ath.model.x.LayeredTextX;
18  import com.acumenvelocity.ath.solr.AthIndex;
19  import com.acumenvelocity.ath.solr.SolrIndexWriterStep;
20  
21  import net.sf.okapi.common.Event;
22  import net.sf.okapi.common.LocaleId;
23  import net.sf.okapi.common.annotation.AltTranslation;
24  import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
25  import net.sf.okapi.common.query.MatchType;
26  import net.sf.okapi.common.resource.ITextUnit;
27  import net.sf.okapi.common.resource.Segment;
28  
29  public class SolrDocWriterStep extends SolrIndexWriterStep {
30  
31    private UUID docId;
32    private String docFileName;
33    private UUID userId;
34    private boolean newDoc;
35    private long position;
36  
37    public SolrDocWriterStep(UUID docId, String docFileName, UUID userId, boolean newDoc) {
38      super(AthIndex.getIndex().getClient(), Const.ATH_SOLR_BATCH_SIZE,
39          Const.SOLR_CORE_ATH_DOC_SEGMENTS, true, false);
40  
41      this.docId = docId;
42      this.docFileName = docFileName;
43      this.userId = userId;
44      this.newDoc = newDoc;
45    }
46  
47    @Override
48    public String getName() {
49      return "Solr Document Writer Step";
50    }
51  
52    @Override
53    public String getDescription() {
54      return "Writes text units to Solr in batches";
55    }
56  
57    @Override
58    protected Event handleStartDocument(Event event) {
59      // Reset the position within the document
60      position = 1;
61      return super.handleStartDocument(event);
62    }
63  
64    @Override
65    protected SolrInputDocument getSolrDocument(SolrClient solrClient, String coreName,
66        String sourceWithCodes) {
67  
68      SolrInputDocument doc = null;
69  
70      if (newDoc) {
71        doc = new SolrInputDocument();
72  
73      } else {
74        try {
75          doc = SolrUtil.getDocumentBySolrId(solrClient, coreName,
76              SolrUtil.buildDocSegSolrId(docId, position));
77  
78        } catch (Exception e) {
79          Log.warn(getClass(), "Error getting a Solr document for '{}' (position: {}) -- {}",
80              sourceWithCodes, position, e.getMessage());
81        }
82  
83        if (doc == null) {
84          doc = new SolrInputDocument();
85        }
86      }
87  
88      return doc;
89    }
90  
91    @Override
92    protected void populateSolrDocument(SolrInputDocument doc, ITextUnit tu, Segment sseg,
93        Segment tseg, LocaleId srcLoc, LocaleId trgLoc) throws Exception {
94  
95      // XXX Keep in sync with BaseMergerStep
96      if (sseg == null) {
97        return;
98      }
99  
100     if (tseg == null && isRequireTarget()) {
101       position++;
102       return;
103     }
104 
105     // Unique id (docId + position) to overwrite an existing doc segment if the position is the same
106     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOLR_ID,
107         SolrUtil.buildDocSegSolrId(docId, position));
108 
109     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_SEG_ID, UUID.randomUUID().toString());
110 
111     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_ID, docId.toString());
112     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TU_ID, tu.getId());
113     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_FILE_NAME, docFileName);
114 
115     doc.setField(Const.ATH_PROP_POSITION, position);
116 
117     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SRC_LANG, srcLoc.toString());
118     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TRG_LANG, trgLoc.toString());
119 
120     LayeredTextX slt = ConversionUtil.toLayeredText(sseg.getContent(), srcLoc);
121     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE, slt.getText());
122     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE_WITH_CODES, slt.getTextWithCodes());
123     SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE_JSON, JacksonUtil.toJson(slt, false));
124 
125     if (tseg != null) {
126       LayeredTextX tlt = ConversionUtil.toLayeredText(tseg.getContent(), trgLoc);
127       SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET, tlt.getText(), false);
128 
129       SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET_WITH_CODES, tlt.getTextWithCodes(),
130           false);
131 
132       SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET_JSON, JacksonUtil.toJson(tlt, false));
133 
134       AltTranslationsAnnotation ata = tseg.getAnnotation(AltTranslationsAnnotation.class);
135 
136       // Write origin
137       if (ata != null) {
138         ata.sort();
139         AltTranslation at = ata.getFirst();
140 
141         if (at.getType() == Const.AL_MATCH_TYPE) {
142           SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.AL.toString());
143 
144         } else if (at.getType() == MatchType.MT) {
145           SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.MT.toString());
146 
147         } else if (at.getCombinedScore() > 0) {
148           SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.TM.toString());
149           doc.setField(Const.ATH_PROP_TM_MATCH_SCORE, at.getCombinedScore());
150         }
151 
152         List<MtTargetInfo> altTrans = ConversionUtil.toMtTargets(ata);
153         
154         SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ALT_TRANS_JSON,
155             JacksonUtil.toJson(altTrans, false));
156         
157         doc.setField(Const.ATH_PROP_ALT_TRANS_INDEX, -1);
158       }
159     }
160 
161     if (newDoc) {
162       SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_CREATED_BY, userId.toString());
163       doc.setField(Const.ATH_PROP_CREATED_AT, new Date());
164 
165     } else {
166       SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_UPDATED_BY, userId.toString());
167       doc.setField(Const.ATH_PROP_UPDATED_AT, new Date());
168     }
169 
170     position++;
171   }
172 
173 }