SolrUtil

package com.acumenvelocity.ath.common;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;

import com.acumenvelocity.ath.common.exception.AthException;
import com.acumenvelocity.ath.solr.AthIndex;
import com.acumenvelocity.ath.solr.Solr;

import net.sf.okapi.common.Base64Util;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.resource.ITextUnit;

public class SolrUtil {

  private static final int MAX_QUERY_LENGTH = Integer.MAX_VALUE;

  /**
   * Converts a SolrDocument to a SolrInputDocument for reindexing or updates.
   *
   * <p>
   * This method creates a new SolrInputDocument by copying all user-defined fields from the
   * source SolrDocument. Internal Solr fields (those starting with underscore) are automatically
   * excluded to allow Solr to manage its own metadata fields like {@code _version_},
   * {@code _root_},
   * and {@code _nest_path_}.
   *
   * <p>
   * This is commonly used when:
   * <ul>
   * <li>Reading documents from Solr, modifying them, and writing them back</li>
   * <li>Copying documents between collections</li>
   * <li>Performing batch updates on existing documents</li>
   * </ul>
   *
   * <p>
   * <strong>Example Usage:</strong>
   *
   * <pre>
   * SolrDocument doc = // ... retrieved from Solr query
   * SolrInputDocument inputDoc = toInputDocument(doc);
   * inputDoc.setField("status", "updated");
   * solr.add(inputDoc);
   * </pre>
   *
   * <p>
   * <strong>Note:</strong> This method uses {@code setField()} rather than {@code addField()}
   * to prevent duplicate field values, ensuring each field appears only once in the resulting
   * document.
   *
   * @param solrDoc the source SolrDocument to convert; must not be null
   * @return a new SolrInputDocument containing all non-internal fields from the source document
   * @throws NullPointerException if solrDoc is null
   */
  public static SolrInputDocument toInputDocument(SolrDocument solrDoc) {
    SolrInputDocument inputDoc = new SolrInputDocument();

    for (String fieldName : solrDoc.getFieldNames()) {
      // Skip internal Solr fields
      if (fieldName.startsWith("_")) {
        continue;
      }

      // Use setField instead of addField to prevent duplicates
      inputDoc.setField(fieldName, solrDoc.getFieldValue(fieldName));
    }

    return inputDoc;
  }

  /**
   * Fetches a SolrInputDocument by its unique ID.
   *
   * @param solrClient The SolrClient instance.
   * @param coreName   The Solr core or collection name.
   * @param id         The unique document ID.
   * @return The SolrInputDocument, or null if not found.
   * @throws Exception if Solr query fails.
   */
  public static SolrInputDocument getDocumentBySolrId(SolrClient solrClient, String coreName,
      String id) throws Exception {

    SolrQuery query = new SolrQuery("id:\"" + id + "\"");
    query.setRows(1);

    QueryResponse response = solrClient.query(coreName, query);
    SolrDocumentList docs = response.getResults();

    if (docs == null || docs.isEmpty()) {
      return null;
    }

    SolrDocument solrDoc = docs.get(0);
    SolrInputDocument inputDoc = new SolrInputDocument();

    for (String fieldName : solrDoc.getFieldNames()) {
      // Skip internal Solr fields
      if (fieldName.startsWith("_")) {
        continue;
      }
      // Use setField instead of addField to prevent duplicates
      inputDoc.setField(fieldName, solrDoc.getFieldValue(fieldName));
    }

    return inputDoc;
  }

  public static SolrDocument getDocumentByDocId(UUID docId) {
    String query = Log.format("docId:\"{}\"", docId);

    try {
      QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
          QueryResponse.class);

      if (response.getResults().isEmpty()) {
        return null;
      }

      return response.getResults().get(0);

    } catch (Exception e) {
      // Do nothing
    }

    return null;
  }

  public static SolrDocument getDocumentSegment(UUID docId, UUID docSegId) {
    String query = Log.format("docId:\"{}\" AND docSegId:\"{}\"", docId, docSegId);

    try {
      QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query, null,
          QueryResponse.class);

      SolrDocumentList docs = response.getResults();

      if (docs == null || docs.isEmpty()) {
        return null;
      }

      return docs.get(0);

    } catch (Exception e) {
      Log.warn(SolrUtil.class, "Error finding a document segment: " + e.getMessage());
    }

    return null;
  }

  public static SolrDocument getTmByTmId(UUID tmId) {
    String query = Log.format("tmId:\"{}\"", tmId);

    try {
      QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_TMS, query, null,
          QueryResponse.class);

      if (response.getResults().isEmpty()) {
        return null;
      }

      return response.getResults().get(0);

    } catch (Exception e) {
      // Do nothing
    }

    return null;
  }

  /**
   * Shifts document segments at or below a specified position down by one position.
   *
   * <p>
   * This method is used to make room for inserting a new document segment at a specific position
   * by incrementing the position of all existing segments at or after that position. The method
   * handles both normal position updates and edge cases where position data may be missing or
   * invalid.
   *
   * <p>
   * <strong>Normal Operation:</strong>
   * <ul>
   * <li>Queries Solr for all document segments matching the given docId with position >= the
   * specified position</li>
   * <li>Increments each matching document's position by 1</li>
   * <li>Recalculates the Solr ID for each document based on the new position using
   * {@link SolrUtil#buildDocSegSolrId(UUID, Long)}</li>
   * <li>Writes all modified documents back to Solr in a batch operation, overwriting the old
   * documents</li>
   * </ul>
   *
   * <p>
   * <strong>Edge Case Handling - Missing Position Data:</strong>
   * <br>
   * When a document's position field is missing, null, or invalid (defaults to 0), incrementing by
   * 1
   * results in a new position of 1. This creates a potential conflict with existing documents
   * already at
   * position 1 or above. To handle this scenario:
   * <ul>
   * <li>The method detects when any documents will be moved to position 1</li>
   * <li>Executes an additional query to find all existing documents at position >= 1 that weren't
   * already included in the initial query</li>
   * <li>Shifts these additional documents down by incrementing their positions as well</li>
   * <li>Ensures no position conflicts occur when documents with failed position reads are inserted
   * at the head</li>
   * </ul>
   *
   * <p>
   * <strong>Example Usage:</strong>
   *
   * <pre>
   * // Inserting a new segment at position 3 - all segments at positions 3, 4, 5, etc.
   * // will be moved to positions 4, 5, 6, etc.
   * moveDocSegmentsBelow(docId, 3L);
   * </pre>
   *
   * <p>
   * <strong>Thread Safety and Distributed Locking:</strong> This method is thread-safe across
   * multiple
   * worker nodes in a distributed system. It uses Solr-based distributed locking to ensure that
   * only one
   * worker node can modify segments for a given docId at a time. The method will retry up to 3
   * times with
   * exponential backoff if the lock is already held by another node. Concurrent operations on
   * different
   * documents can proceed in parallel without blocking each other.
   *
   * <p>
   * <strong>Error Handling:</strong> All exceptions are silently caught and ignored. The method
   * follows a fail-silent pattern and will not throw exceptions to the caller. Locks are always
   * released
   * in a finally block to prevent deadlocks.
   *
   * <p>
   * <strong>Internal Fields:</strong> The Solr internal field {@code _version_} is explicitly
   * excluded from document copying to allow Solr to manage its own versioning for optimistic
   * concurrency control.
   *
   * @param docId    the UUID of the document whose segments should be shifted; must not be null
   * @param position the position threshold; all segments at this position or higher will be shifted
   *                 down
   *                 (position incremented by 1); must not be null
   *
   * @see SolrUtil#buildDocSegSolrId(UUID, Long)
   * @see AthIndex#getMany(String, String, Map, Class)
   * @see AthIndex#createMany(String, List)
   */
  public static void moveDocSegmentsBelow(UUID docId, Long position) {
    int maxRetries = 3;
    int retryCount = 0;

    while (retryCount < maxRetries) {
      try {
        // Try to acquire lock on the parent document
        if (!acquireLock(docId)) {
          // Lock already held by another node, wait and retry
          Thread.sleep(100 * (retryCount + 1)); // exponential backoff
          retryCount++;
          continue;
        }

        try {
          // Perform the actual repositioning work
          performRepositioning(docId, position);

        } finally {
          // Always release the lock
          releaseLock(docId);
        }

        // Success, exit retry loop
        break;

      } catch (Exception e) {
        // Do nothing
        break;
      }
    }
  }

  /**
   * Attempts to acquire a distributed lock on a document using Solr's atomic update capabilities.
   *
   * <p>
   * This method uses optimistic locking to ensure only one worker node can hold the lock at a time
   * across a distributed system. The lock is implemented as a timestamp field that records when the
   * lock was acquired.
   *
   * <p>
   * The method attempts to atomically update the document's lock field only if it's currently null
   * or expired (older than a timeout threshold). This prevents race conditions where multiple nodes
   * try to acquire the lock simultaneously.
   *
   * @param docId the UUID of the document to lock; must not be null
   * @return true if the lock was successfully acquired, false if another node holds the lock
   * @throws Exception if the Solr operation fails
   */
  private static boolean acquireLock(UUID docId) throws Exception {
    long currentTime = System.currentTimeMillis();
    long expiredBefore = currentTime - Const.SOLR_DOC_LOCK_TIMEOUT_MS;

    // Query to check current lock status
    String query = Log.format("id:\"{}\"", docId);
    QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOCS, query, null,
        QueryResponse.class);

    if (response.getResults().isEmpty()) {
      return false; // Document doesn't exist
    }

    SolrDocument doc = response.getResults().get(0);
    Long lockTimestamp = safeGetLongField(doc, "lockTimestamp", null);
    Long version = (Long) doc.getFieldValue("_version_");

    // Check if lock is available (null or expired)
    if (lockTimestamp != null && lockTimestamp > expiredBefore) {
      return false; // Lock is held by another node
    }

    // Try to acquire the lock using atomic update with version check
    Map<String, Object> updateDoc = new HashMap<>();
    updateDoc.put("id", docId.toString());
    updateDoc.put("lockTimestamp", Map.of("set", currentTime));
    updateDoc.put("_version_", version); // Optimistic locking

    try {
      AthIndex.createOne(Const.SOLR_CORE_ATH_DOCS, updateDoc);
      return true; // Lock acquired successfully
    } catch (Exception e) {
      // Version conflict - another node acquired the lock first
      return false;
    }
  }

  /**
   * Releases a distributed lock on a document by clearing the lock timestamp field.
   *
   * <p>
   * This method should always be called in a finally block to ensure locks are released
   * even if an exception occurs during processing.
   *
   * @param docId the UUID of the document to unlock; must not be null
   * @throws Exception if the Solr operation fails
   */
  private static void releaseLock(UUID docId) throws Exception {
    Map<String, Object> updateDoc = new HashMap<>();
    updateDoc.put("id", docId.toString());
    updateDoc.put("lockTimestamp", Map.of("set", null)); // Clear the lock

    AthIndex.createOne(Const.SOLR_CORE_ATH_DOCS, updateDoc);
  }

  /**
   * Performs the actual repositioning of document segments.
   *
   * <p>
   * This is the core logic extracted from the original method, to be executed
   * while holding the distributed lock.
   *
   * @param docId    the UUID of the document whose segments should be shifted
   * @param position the position threshold for shifting segments
   * @throws Exception if any Solr operation fails
   */
  private static void performRepositioning(UUID docId, Long position) throws Exception {
    // Query for documents with docId matching AND position >= the given position
    String query = Log.format("docId:\"{}\" AND position:[{} TO *]", docId, position);

    QueryResponse response = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, query, null,
        QueryResponse.class);

    if (response.getResults().isEmpty()) {
      return;
    }

    // Prepare list to hold modified documents
    List<Map<String, Object>> updatedDocs = new ArrayList<>();

    // Track if we have any documents that will be moved to position 1
    boolean hasDocsMovingToPositionOne = false;

    // Process each document
    for (SolrDocument solrDoc : response.getResults()) {
      // Create a map for the updated document
      Map<String, Object> doc = new HashMap<>();

      // Copy all existing fields (except _version_)
      for (String fieldName : solrDoc.getFieldNames()) {
        if (!"_version_".equals(fieldName)) {
          doc.put(fieldName, solrDoc.getFieldValue(fieldName));
        }
      }

      // Get current position and increment by 1
      Long currentPosition = safeGetLongField(solrDoc, "position", 0L);
      Long newPosition = currentPosition + 1; // docs with no position are inserted at the head

      if (newPosition == 1L) {
        hasDocsMovingToPositionOne = true;
      }

      // Update position field
      doc.put("position", newPosition);

      // Update the Solr ID based on new position
      String newId = SolrUtil.buildDocSegSolrId(docId, newPosition);
      doc.put(Const.ATH_PROP_SOLR_ID, newId);

      updatedDocs.add(doc);
    }

    // If documents are being moved to position 1, we need to shift existing docs at position 1 and
    // above
    if (hasDocsMovingToPositionOne) {
      // Query for all documents at position >= 1 (which weren't already included)
      String shiftQuery = Log.format("docId:\"{}\" AND position:[1 TO *]", docId);
      QueryResponse shiftResponse = AthIndex.getMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, shiftQuery,
          null,
          QueryResponse.class);

      for (SolrDocument solrDoc : shiftResponse.getResults()) {
        Long existingPosition = safeGetLongField(solrDoc, "position", 0L);

        // Skip if this document was already processed in the first query
        if (existingPosition >= position) {
          continue;
        }

        // Create a map for the updated document
        Map<String, Object> doc = new HashMap<>();

        // Copy all existing fields (except _version_)
        for (String fieldName : solrDoc.getFieldNames()) {
          if (!"_version_".equals(fieldName)) {
            doc.put(fieldName, solrDoc.getFieldValue(fieldName));
          }
        }

        // Increment position by 1
        Long newPosition = existingPosition + 1;
        doc.put("position", newPosition);

        // Update the Solr ID based on new position
        String newId = SolrUtil.buildDocSegSolrId(docId, newPosition);
        doc.put(Const.ATH_PROP_SOLR_ID, newId);

        updatedDocs.add(doc);
      }
    }

    // Write all modified documents back to Solr
    if (!updatedDocs.isEmpty()) {
      AthIndex.createMany(Const.SOLR_CORE_ATH_DOC_SEGMENTS, updatedDocs);
    }
  }

  public static boolean checkTmFuzzyScore(int score) {
    return score >= 0 && score <= 101;
  }

  /**
   * @see org.apache.solr.client.solrj.util.ClientUtils
   * @see https://stackoverflow.com/questions/44708872/why-does-solr-clientutilsescapequerychars-escape-spaces
   * @return
   */
  public static String escapeQueryCharsNoWs(String s) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
      // These characters are part of the query syntax and must be escaped
      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')'
          || c == ':' || c == '^'
          || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}'
          || c == '~' || c == '*' || c == '?'
          || c == '|' || c == '&' || c == ';' || c == '/') {
        sb.append('\\');
      }
      sb.append(c);
    }
    return sb.toString();
  }

  public static String normalizeQuery(String query) {
    return normalizeQuery(query, MAX_QUERY_LENGTH);
  }

  public static String normalizeQuery(String query, int maxLen) {
    if (Util.isEmpty(query)) {
      return query;
    }

    // String st = ClientUtils.escapeQueryChars(query);
    String st = escapeQueryCharsNoWs(query);

    if (maxLen < 1) {
      maxLen = 1;
    }

    if (st.length() > maxLen) {
      st = st.substring(0, st.charAt(maxLen - 1) == '\\' ? maxLen - 1 : maxLen);
      Log.warn(Solr.class,
          "Query string is longer than {} chars, truncated to '{}'", maxLen,
          st);
    }

    return st;
  }

  public static long getNumDocuments() {
    return getNumDocuments(Const.SOLR_CORE_ATH_TM_SEGMENTS);
  }

  public static long getNumDocuments(String coreName) {
    return getNumDocuments(coreName, "*:*");
  }

  public static long getNumDocuments(String coreName, String query) {
    try {
      QueryResponse response = AthIndex.getMany(coreName, query, null,
          QueryResponse.class);

      SolrDocumentList docList = response.getResults();
      return docList.getNumFound();

    } catch (Exception e) {
      // TODO Log error
      return -1;
    }
  }

  public static void safeAddField(ITextUnit tu, SolrInputDocument doc, String name, String value)
      throws AthException {

    safeAddField(tu, doc, name, value, true);
  }

  public static void safeAddField(SolrInputDocument doc, String name, String value) {
    if (!Util.isEmpty(value)) {
      doc.addField(name, value);
    }
  }

  public static void safeAddField(SolrInputDocument doc, String name, UUID value) {
    if (value != null) {
      doc.addField(name, value.toString());
    }
  }

  public static void safeAddField(ITextUnit tu, SolrInputDocument doc, String name, String value,
      boolean strictValueCheck) throws AthException {

    if (doc == null) {
      AthException.logAndThrow(Solr.class, "Doc is null for TU:\n{}", tu.getId());
    }

    if (Util.isEmpty(name)) {
      AthException.logAndThrow(Solr.class, "Null or empty name for TU:\n{}", tu.getId());
    }

    if (Util.isEmpty(value) || Util.isEmpty(value.trim())) {
      if (strictValueCheck) {
        AthException.logAndThrow(Solr.class, "Null or empty value of the '{}' field for TU:\n{}",
            name, tu.getId());

      } else {
        // Silently quit w/o adding the field
        return;
      }
    }

    doc.addField(name, value);
  }

  public static void safeSetField(ITextUnit tu, SolrInputDocument doc, String name, String value)
      throws AthException {

    safeSetField(tu, doc, name, value, true);
  }

  public static void safeSetField(ITextUnit tu, SolrInputDocument doc, String name, String value,
      boolean strictValueCheck) throws AthException {

    if (doc == null) {
      AthException.logAndThrow(Solr.class, "Doc is null for TU:\n{}", tu.getId());
    }

    if (Util.isEmpty(name)) {
      AthException.logAndThrow(Solr.class, "Null or empty name for TU:\n{}", tu.getId());
    }

    if (Util.isEmpty(value) || Util.isEmpty(value.trim())) {
      if (strictValueCheck) {
        AthException.logAndThrow(Solr.class, "Null or empty value of the '{}' field for TU:\n{}",
            name, tu.getId());

      } else {
        // Silently quit w/o adding the field
        return;
      }
    }

    doc.setField(name, value);
  }

  public static void safeSetField(SolrInputDocument doc, String name, String value) {
    if (!Util.isEmpty(value)) {
      doc.setField(name, value);
    }
  }

  public static void safeSetField(SolrInputDocument doc, String name, UUID value) {
    if (value != null) {
      doc.setField(name, value);
    }
  }

  /**
   * Safely gets a field value from a SolrDocument and converts to String.
   * Returns defVal if field is null or empty.
   */
  public static String safeGetField(SolrDocument doc, String fieldName, String defVal) {
    Object val = doc == null ? null : doc.getFieldValue(fieldName);
    return val != null ? val.toString() : defVal;
  }

  public static Long safeGetLongField(SolrDocument doc, String fieldName, Long defVal) {
    Object val = doc == null ? null : doc.getFieldValue(fieldName);

    if (val == null) {
      return defVal;
    }

    try {
      if (val instanceof Long) {
        return (Long) val;

      } else if (val instanceof Number) {
        return ((Number) val).longValue();

      } else {
        // Assume string representation of a number (e.g., "1", "10")
        return Long.parseLong(val.toString());
      }

    } catch (NumberFormatException e) {
      return defVal;
    }
  }

  public static Integer safeGetIntField(SolrDocument doc, String fieldName, Integer defVal) {
    Object val = doc == null ? null : doc.getFieldValue(fieldName);

    if (val == null) {
      return defVal;
    }

    try {
      if (val instanceof Integer) {
        return (Integer) val;

      } else if (val instanceof Number) {
        return ((Number) val).intValue();

      } else {
        // Assume string representation of a number (e.g., "1", "10")
        return Integer.parseInt(val.toString());
      }

    } catch (NumberFormatException e) {
      return defVal;
    }
  }

  public static String buildDocSegSolrId(UUID docId, Long position) {
    return Base64Util.encodeString(Log.format("{}-{}", docId.toString(), position));
  }

  public static String buildTmSegSolrId(UUID tmId, String sourceWithCodes) {
    return Base64Util.encodeString(Log.format("{}-{}", tmId.toString(), sourceWithCodes));
  }
}