1 package com.acumenvelocity.ath.solr.doc;
2
3 import java.util.Date;
4 import java.util.List;
5 import java.util.UUID;
6
7 import org.apache.solr.client.solrj.SolrClient;
8 import org.apache.solr.common.SolrInputDocument;
9
10 import com.acumenvelocity.ath.common.Const;
11 import com.acumenvelocity.ath.common.ConversionUtil;
12 import com.acumenvelocity.ath.common.JacksonUtil;
13 import com.acumenvelocity.ath.common.Log;
14 import com.acumenvelocity.ath.common.SolrUtil;
15 import com.acumenvelocity.ath.model.MtTargetInfo;
16 import com.acumenvelocity.ath.model.Origin;
17 import com.acumenvelocity.ath.model.x.LayeredTextX;
18 import com.acumenvelocity.ath.solr.AthIndex;
19 import com.acumenvelocity.ath.solr.SolrIndexWriterStep;
20
21 import net.sf.okapi.common.Event;
22 import net.sf.okapi.common.LocaleId;
23 import net.sf.okapi.common.annotation.AltTranslation;
24 import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
25 import net.sf.okapi.common.query.MatchType;
26 import net.sf.okapi.common.resource.ITextUnit;
27 import net.sf.okapi.common.resource.Segment;
28
29 public class SolrDocWriterStep extends SolrIndexWriterStep {
30
31 private UUID docId;
32 private String docFileName;
33 private UUID userId;
34 private boolean newDoc;
35 private long position;
36
37 public SolrDocWriterStep(UUID docId, String docFileName, UUID userId, boolean newDoc) {
38 super(AthIndex.getIndex().getClient(), Const.ATH_SOLR_BATCH_SIZE,
39 Const.SOLR_CORE_ATH_DOC_SEGMENTS, true, false);
40
41 this.docId = docId;
42 this.docFileName = docFileName;
43 this.userId = userId;
44 this.newDoc = newDoc;
45 }
46
47 @Override
48 public String getName() {
49 return "Solr Document Writer Step";
50 }
51
52 @Override
53 public String getDescription() {
54 return "Writes text units to Solr in batches";
55 }
56
57 @Override
58 protected Event handleStartDocument(Event event) {
59
60 position = 1;
61 return super.handleStartDocument(event);
62 }
63
64 @Override
65 protected SolrInputDocument getSolrDocument(SolrClient solrClient, String coreName,
66 String sourceWithCodes) {
67
68 SolrInputDocument doc = null;
69
70 if (newDoc) {
71 doc = new SolrInputDocument();
72
73 } else {
74 try {
75 doc = SolrUtil.getDocumentBySolrId(solrClient, coreName,
76 SolrUtil.buildDocSegSolrId(docId, position));
77
78 } catch (Exception e) {
79 Log.warn(getClass(), "Error getting a Solr document for '{}' (position: {}) -- {}",
80 sourceWithCodes, position, e.getMessage());
81 }
82
83 if (doc == null) {
84 doc = new SolrInputDocument();
85 }
86 }
87
88 return doc;
89 }
90
91 @Override
92 protected void populateSolrDocument(SolrInputDocument doc, ITextUnit tu, Segment sseg,
93 Segment tseg, LocaleId srcLoc, LocaleId trgLoc) throws Exception {
94
95
96 if (sseg == null) {
97 return;
98 }
99
100 if (tseg == null && isRequireTarget()) {
101 position++;
102 return;
103 }
104
105
106 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOLR_ID,
107 SolrUtil.buildDocSegSolrId(docId, position));
108
109 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_SEG_ID, UUID.randomUUID().toString());
110
111 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_ID, docId.toString());
112 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TU_ID, tu.getId());
113 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_DOC_FILE_NAME, docFileName);
114
115 doc.setField(Const.ATH_PROP_POSITION, position);
116
117 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SRC_LANG, srcLoc.toString());
118 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TRG_LANG, trgLoc.toString());
119
120 LayeredTextX slt = ConversionUtil.toLayeredText(sseg.getContent(), srcLoc);
121 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE, slt.getText());
122 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE_WITH_CODES, slt.getTextWithCodes());
123 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_SOURCE_JSON, JacksonUtil.toJson(slt, false));
124
125 if (tseg != null) {
126 LayeredTextX tlt = ConversionUtil.toLayeredText(tseg.getContent(), trgLoc);
127 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET, tlt.getText(), false);
128
129 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET_WITH_CODES, tlt.getTextWithCodes(),
130 false);
131
132 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_TARGET_JSON, JacksonUtil.toJson(tlt, false));
133
134 AltTranslationsAnnotation ata = tseg.getAnnotation(AltTranslationsAnnotation.class);
135
136
137 if (ata != null) {
138 ata.sort();
139 AltTranslation at = ata.getFirst();
140
141 if (at.getType() == Const.AL_MATCH_TYPE) {
142 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.AL.toString());
143
144 } else if (at.getType() == MatchType.MT) {
145 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.MT.toString());
146
147 } else if (at.getCombinedScore() > 0) {
148 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ORIGIN, Origin.TM.toString());
149 doc.setField(Const.ATH_PROP_TM_MATCH_SCORE, at.getCombinedScore());
150 }
151
152 List<MtTargetInfo> altTrans = ConversionUtil.toMtTargets(ata);
153
154 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_ALT_TRANS_JSON,
155 JacksonUtil.toJson(altTrans, false));
156
157 doc.setField(Const.ATH_PROP_ALT_TRANS_INDEX, -1);
158 }
159 }
160
161 if (newDoc) {
162 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_CREATED_BY, userId.toString());
163 doc.setField(Const.ATH_PROP_CREATED_AT, new Date());
164
165 } else {
166 SolrUtil.safeSetField(tu, doc, Const.ATH_PROP_UPDATED_BY, userId.toString());
167 doc.setField(Const.ATH_PROP_UPDATED_AT, new Date());
168 }
169
170 position++;
171 }
172
173 }