1 package com.acumenvelocity.ath.common;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.Map;
7 import java.util.UUID;
8
9 import org.apache.solr.client.solrj.SolrClient;
10 import org.apache.solr.client.solrj.SolrQuery;
11 import org.apache.solr.client.solrj.response.QueryResponse;
12 import org.apache.solr.common.SolrDocument;
13 import org.apache.solr.common.SolrDocumentList;
14 import org.apache.solr.common.SolrInputDocument;
15
16 import com.acumenvelocity.ath.common.exception.AthException;
17 import com.acumenvelocity.ath.solr.AthIndex;
18 import com.acumenvelocity.ath.solr.Solr;
19
20 import net.sf.okapi.common.Base64Util;
21 import net.sf.okapi.common.Util;
22 import net.sf.okapi.common.resource.ITextUnit;
23
24 public class SolrUtil {
25
26 private static final int MAX_QUERY_LENGTH = Integer.MAX_VALUE;
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 public static SolrInputDocument toInputDocument(SolrDocument solrDoc) {
66 SolrInputDocument inputDoc = new SolrInputDocument();
67
68 for (String fieldName : solrDoc.getFieldNames()) {
69
70 if (fieldName.startsWith("_")) {
71 continue;
72 }
73
74
75 inputDoc.setField(fieldName, solrDoc.getFieldValue(fieldName));
76 }
77
78 return inputDoc;
79 }
80
81
82
83
84
85
86
87
88
89
90 public static SolrInputDocument getDocumentBySolrId(SolrClient solrClient, String coreName,
91 String id) throws Exception {
92
93 SolrQuery query = new SolrQuery("id:\"" + id + "\"");
94 query.setRows(1);
95
96 QueryResponse response = solrClient.query(coreName, query);
97 SolrDocumentList docs = response.getResults();
98
99 if (docs == null || docs.isEmpty()) {
100 return null;
101 }
102
103 SolrDocument solrDoc = docs.get(0);
104 SolrInputDocument inputDoc = new SolrInputDocument();
105
106 for (String fieldName : solrDoc.getFieldNames()) {
107
108 if (fieldName.startsWith("_")) {
109 continue;
110 }
111
112 inputDoc.setField(fieldName, solrDoc.getFieldValue(fieldName));
113 }
114
115 return inputDoc;
116 }
117
118 public static SolrDocument getDocumentByDocId(UUID docId) {
119 String query = Log.format("docId:\"{}\"", docId);
120
121 try {
122 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
123 QueryResponse.class);
124
125 if (response.getResults().isEmpty()) {
126 return null;
127 }
128
129 return response.getResults().get(0);
130
131 } catch (Exception e) {
132
133 }
134
135 return null;
136 }
137
138 public static SolrDocument getDocumentSegment(UUID docId, UUID docSegId) {
139 String query = Log.format("docId:\"{}\" AND docSegId:\"{}\"", docId, docSegId);
140
141 try {
142 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query, null,
143 QueryResponse.class);
144
145 SolrDocumentList docs = response.getResults();
146
147 if (docs == null || docs.isEmpty()) {
148 return null;
149 }
150
151 return docs.get(0);
152
153 } catch (Exception e) {
154 Log.warn(SolrUtil.class, "Error finding a document segment: " + e.getMessage());
155 }
156
157 return null;
158 }
159
160 public static SolrDocument getTmByTmId(UUID tmId) {
161 String query = Log.format("tmId:\"{}\"", tmId);
162
163 try {
164 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_TMS, query, null,
165 QueryResponse.class);
166
167 if (response.getResults().isEmpty()) {
168 return null;
169 }
170
171 return response.getResults().get(0);
172
173 } catch (Exception e) {
174
175 }
176
177 return null;
178 }
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 public static void moveDocSegmentsBelow(UUID docId, Long position) {
259 int maxRetries = 3;
260 int retryCount = 0;
261
262 while (retryCount < maxRetries) {
263 try {
264
265 if (!acquireLock(docId)) {
266
267 Thread.sleep(100 * (retryCount + 1));
268 retryCount++;
269 continue;
270 }
271
272 try {
273
274 performRepositioning(docId, position);
275
276 } finally {
277
278 releaseLock(docId);
279 }
280
281
282 break;
283
284 } catch (Exception e) {
285
286 break;
287 }
288 }
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308 private static boolean acquireLock(UUID docId) throws Exception {
309 long currentTime = System.currentTimeMillis();
310 long expiredBefore = currentTime - Const.SOLR_DOC_LOCK_TIMEOUT_MS;
311
312
313 String query = Log.format("id:\"{}\"", docId);
314 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
315 QueryResponse.class);
316
317 if (response.getResults().isEmpty()) {
318 return false;
319 }
320
321 SolrDocument doc = response.getResults().get(0);
322 Long lockTimestamp = safeGetLongField(doc, "lockTimestamp", null);
323 Long version = (Long) doc.getFieldValue("_version_");
324
325
326 if (lockTimestamp != null && lockTimestamp > expiredBefore) {
327 return false;
328 }
329
330
331 Map<String, Object> updateDoc = new HashMap<>();
332 updateDoc.put("id", docId.toString());
333 updateDoc.put("lockTimestamp", Map.of("set", currentTime));
334 updateDoc.put("_version_", version);
335
336 try {
337 AthIndex.createOne(Const.SOLR_CORE_ATH_DOCS, updateDoc);
338 return true;
339 } catch (Exception e) {
340
341 return false;
342 }
343 }
344
345
346
347
348
349
350
351
352
353
354
355 private static void releaseLock(UUID docId) throws Exception {
356 Map<String, Object> updateDoc = new HashMap<>();
357 updateDoc.put("id", docId.toString());
358 updateDoc.put("lockTimestamp", Map.of("set", null));
359
360 AthIndex.createOne(Const.SOLR_CORE_ATH_DOCS, updateDoc);
361 }
362
363
364
365
366
367
368
369
370
371
372
373
374 private static void performRepositioning(UUID docId, Long position) throws Exception {
375
376 String query = Log.format("docId:\"{}\" AND position:[{} TO *]", docId, position);
377
378 QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query, null,
379 QueryResponse.class);
380
381 if (response.getResults().isEmpty()) {
382 return;
383 }
384
385
386 List<Map<String, Object>> updatedDocs = new ArrayList<>();
387
388
389 boolean hasDocsMovingToPositionOne = false;
390
391
392 for (SolrDocument solrDoc : response.getResults()) {
393
394 Map<String, Object> doc = new HashMap<>();
395
396
397 for (String fieldName : solrDoc.getFieldNames()) {
398 if (!"_version_".equals(fieldName)) {
399 doc.put(fieldName, solrDoc.getFieldValue(fieldName));
400 }
401 }
402
403
404 Long currentPosition = safeGetLongField(solrDoc, "position", 0L);
405 Long newPosition = currentPosition + 1;
406
407 if (newPosition == 1L) {
408 hasDocsMovingToPositionOne = true;
409 }
410
411
412 doc.put("position", newPosition);
413
414
415 String newId = SolrUtil.buildDocSegSolrId(docId, newPosition);
416 doc.put(Const.ATH_PROP_SOLR_ID, newId);
417
418 updatedDocs.add(doc);
419 }
420
421
422
423 if (hasDocsMovingToPositionOne) {
424
425 String shiftQuery = Log.format("docId:\"{}\" AND position:[1 TO *]", docId);
426 QueryResponse shiftResponse = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, shiftQuery,
427 null,
428 QueryResponse.class);
429
430 for (SolrDocument solrDoc : shiftResponse.getResults()) {
431 Long existingPosition = safeGetLongField(solrDoc, "position", 0L);
432
433
434 if (existingPosition >= position) {
435 continue;
436 }
437
438
439 Map<String, Object> doc = new HashMap<>();
440
441
442 for (String fieldName : solrDoc.getFieldNames()) {
443 if (!"_version_".equals(fieldName)) {
444 doc.put(fieldName, solrDoc.getFieldValue(fieldName));
445 }
446 }
447
448
449 Long newPosition = existingPosition + 1;
450 doc.put("position", newPosition);
451
452
453 String newId = SolrUtil.buildDocSegSolrId(docId, newPosition);
454 doc.put(Const.ATH_PROP_SOLR_ID, newId);
455
456 updatedDocs.add(doc);
457 }
458 }
459
460
461 if (!updatedDocs.isEmpty()) {
462 AthIndex.createMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, updatedDocs);
463 }
464 }
465
466 public static boolean checkTmFuzzyScore(int score) {
467 return score >= 0 && score <= 101;
468 }
469
470
471
472
473
474
475 public static String escapeQueryCharsNoWs(String s) {
476 StringBuilder sb = new StringBuilder();
477 for (int i = 0; i < s.length(); i++) {
478 char c = s.charAt(i);
479
480 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')'
481 || c == ':' || c == '^'
482 || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}'
483 || c == '~' || c == '*' || c == '?'
484 || c == '|' || c == '&' || c == ';' || c == '/') {
485 sb.append('\\');
486 }
487 sb.append(c);
488 }
489 return sb.toString();
490 }
491
492 public static String normalizeQuery(String query) {
493 return normalizeQuery(query, MAX_QUERY_LENGTH);
494 }
495
496 public static String normalizeQuery(String query, int maxLen) {
497 if (Util.isEmpty(query)) {
498 return query;
499 }
500
501
502 String st = escapeQueryCharsNoWs(query);
503
504 if (maxLen < 1) {
505 maxLen = 1;
506 }
507
508 if (st.length() > maxLen) {
509 st = st.substring(0, st.charAt(maxLen - 1) == '\\' ? maxLen - 1 : maxLen);
510 Log.warn(Solr.class,
511 "Query string is longer than {} chars, truncated to '{}'", maxLen,
512 st);
513 }
514
515 return st;
516 }
517
518 public static long getNumDocuments() {
519 return getNumDocuments(Const.SOLR_CORE_ATH_TM_SEGMENTS);
520 }
521
522 public static long getNumDocuments(String coreName) {
523 return getNumDocuments(coreName, "*:*");
524 }
525
526 public static long getNumDocuments(String coreName, String query) {
527 try {
528 QueryResponse response = AthIndex.getMany(coreName, query, null,
529 QueryResponse.class);
530
531 SolrDocumentList docList = response.getResults();
532 return docList.getNumFound();
533
534 } catch (Exception e) {
535
536 return -1;
537 }
538 }
539
540 public static void safeAddField(ITextUnit tu, SolrInputDocument doc, String name, String value)
541 throws AthException {
542
543 safeAddField(tu, doc, name, value, true);
544 }
545
546 public static void safeAddField(SolrInputDocument doc, String name, String value) {
547 if (!Util.isEmpty(value)) {
548 doc.addField(name, value);
549 }
550 }
551
552 public static void safeAddField(SolrInputDocument doc, String name, UUID value) {
553 if (value != null) {
554 doc.addField(name, value.toString());
555 }
556 }
557
558 public static void safeAddField(ITextUnit tu, SolrInputDocument doc, String name, String value,
559 boolean strictValueCheck) throws AthException {
560
561 if (doc == null) {
562 AthException.logAndThrow(Solr.class, "Doc is null for TU:\n{}", tu.getId());
563 }
564
565 if (Util.isEmpty(name)) {
566 AthException.logAndThrow(Solr.class, "Null or empty name for TU:\n{}", tu.getId());
567 }
568
569 if (Util.isEmpty(value) || Util.isEmpty(value.trim())) {
570 if (strictValueCheck) {
571 AthException.logAndThrow(Solr.class, "Null or empty value of the '{}' field for TU:\n{}",
572 name, tu.getId());
573
574 } else {
575
576 return;
577 }
578 }
579
580 doc.addField(name, value);
581 }
582
583 public static void safeSetField(ITextUnit tu, SolrInputDocument doc, String name, String value)
584 throws AthException {
585
586 safeSetField(tu, doc, name, value, true);
587 }
588
589 public static void safeSetField(ITextUnit tu, SolrInputDocument doc, String name, String value,
590 boolean strictValueCheck) throws AthException {
591
592 if (doc == null) {
593 AthException.logAndThrow(Solr.class, "Doc is null for TU:\n{}", tu.getId());
594 }
595
596 if (Util.isEmpty(name)) {
597 AthException.logAndThrow(Solr.class, "Null or empty name for TU:\n{}", tu.getId());
598 }
599
600 if (Util.isEmpty(value) || Util.isEmpty(value.trim())) {
601 if (strictValueCheck) {
602 AthException.logAndThrow(Solr.class, "Null or empty value of the '{}' field for TU:\n{}",
603 name, tu.getId());
604
605 } else {
606
607 return;
608 }
609 }
610
611 doc.setField(name, value);
612 }
613
614 public static void safeSetField(SolrInputDocument doc, String name, String value) {
615 if (!Util.isEmpty(value)) {
616 doc.setField(name, value);
617 }
618 }
619
620 public static void safeSetField(SolrInputDocument doc, String name, UUID value) {
621 if (value != null) {
622 doc.setField(name, value);
623 }
624 }
625
626
627
628
629
630 public static String safeGetField(SolrDocument doc, String fieldName, String defVal) {
631 Object val = doc == null ? null : doc.getFieldValue(fieldName);
632 return val != null ? val.toString() : defVal;
633 }
634
635 public static Long safeGetLongField(SolrDocument doc, String fieldName, Long defVal) {
636 Object val = doc == null ? null : doc.getFieldValue(fieldName);
637
638 if (val == null) {
639 return defVal;
640 }
641
642 try {
643 if (val instanceof Long) {
644 return (Long) val;
645
646 } else if (val instanceof Number) {
647 return ((Number) val).longValue();
648
649 } else {
650
651 return Long.parseLong(val.toString());
652 }
653
654 } catch (NumberFormatException e) {
655 return defVal;
656 }
657 }
658
659 public static Integer safeGetIntField(SolrDocument doc, String fieldName, Integer defVal) {
660 Object val = doc == null ? null : doc.getFieldValue(fieldName);
661
662 if (val == null) {
663 return defVal;
664 }
665
666 try {
667 if (val instanceof Integer) {
668 return (Integer) val;
669
670 } else if (val instanceof Number) {
671 return ((Number) val).intValue();
672
673 } else {
674
675 return Integer.parseInt(val.toString());
676 }
677
678 } catch (NumberFormatException e) {
679 return defVal;
680 }
681 }
682
683 public static String buildDocSegSolrId(UUID docId, Long position) {
684 return Base64Util.encodeString(Log.format("{}-{}", docId.toString(), position));
685 }
686
687 public static String buildTmSegSolrId(UUID tmId, String sourceWithCodes) {
688 return Base64Util.encodeString(Log.format("{}-{}", tmId.toString(), sourceWithCodes));
689 }
690 }