View Javadoc
1   package com.acumenvelocity.ath.openai;
2   
3   import java.util.ArrayList;
4   import java.util.Collections;
5   import java.util.List;
6   
7   import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentInput;
8   import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentOutput;
9   import com.acumenvelocity.ath.common.Const;
10  import com.acumenvelocity.ath.common.JacksonUtil;
11  import com.acumenvelocity.ath.common.Log;
12  import com.acumenvelocity.ath.model.InlineCodeRef;
13  import com.acumenvelocity.ath.model.LayeredSegment;
14  import com.fasterxml.jackson.core.type.TypeReference;
15  import com.openai.client.OpenAIClient;
16  import com.openai.client.okhttp.OpenAIOkHttpClient;
17  import com.openai.models.chat.completions.ChatCompletion;
18  import com.openai.models.chat.completions.ChatCompletionCreateParams;
19  
20  import net.sf.okapi.common.Util;
21  
22  public class OpenAi {
23  
24    private static final String CODES_REINSERTION_PROMPT = "For every object in the input array, "
25        + "align words and phrases in its trgText against its srcText. "
26        + "Reinsert source inline codes into trgCodes positioning them around aligned target words. "
27        + "CRITICAL REQUIREMENTS: "
28        + "1. WORD BOUNDARIES: Codes MUST be placed only at word boundaries (before the first "
29        + "character or after the last character of a word), NEVER in the middle of a word. "
30        + "A code splitting a word (e.g., 'о<code>т</code>ображать') is a CRITICAL ERROR. "
31        + "2. PAIRED CODES: Two codes with the same id are paired (OPENING and CLOSING tagType). "
32        + "If paired codes wrap only whitespace in srcText, they must wrap only whitespace in "
33        + "trgText. If they wrap text in srcText, they must wrap text in trgText. Wrapping text "
34        + "in target when source wraps only spaces is a SEVERE ERROR. "
35        + "3. NESTING: Always preserve well-formed nesting of paired codes with no overlapping. "
36        + "4. UNPAIRED CODES: If a code has no pair in the segment, treat it as an isolated "
37        + "placeholder (its pair is in another segment) and place it at the nearest appropriate "
38        + "word boundary. "
39        + "5. ORDER: Preserve the codes order wherever possible. "
40        + "VALIDATION: Before finalizing positions, verify each code is at a word boundary "
41        + "(adjacent to space, punctuation, or segment start/end), not inside a word. "
42        + "Input: JSON array of segments. "
43        + "Output: array of trgCodes JSON arrays, with length equal to the input array length. "
44        + "Return ONLY the JSON array, no explanations, no markdown, no preamble. This is an "
45        + "API call with strict response schema - make all decisions autonomously.";
46  
47    private static final String COMBINED_ALIGNMENT_PROMPT = "You are a bilingual document alignment "
48        + "expert. Perform TWO-STAGE alignment:\n\n"
49        + "STAGE 1 - PARAGRAPH ALIGNMENT:\n"
50        + "- Align sourceParagraphs with targetParagraphs\n"
51        + "- Documents may have different paragraph structures (split, merged, reordered)\n"
52        + "- Use context, segment previews, and content similarity\n"
53        + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
54  
55        + "STAGE 2 - SENTENCE ALIGNMENT (within each paragraph pair):\n"
56        + "- For EACH paragraph alignment, align the sentences within\n"
57        + "- Source positions refer to segment positions within SOURCE paragraphs of that pair\n"
58        + "- Target positions refer to segment positions within TARGET paragraphs of that pair\n"
59        + "- For MULTI_MATCH paragraphs, treat all segments from all source/target paragraphs as continuous lists\n"
60        + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
61  
62        + "CRITICAL REQUIREMENTS:\n"
63        + "1. Every paragraph must appear in exactly one paragraph alignment\n"
64        + "2. Every segment must appear in exactly one sentence alignment\n"
65        + "3. For DELETED: source segment has no target (we'll insert empty target)\n"
66        + "4. For INSERTED: target segment has no source (we'll insert empty source)\n"
67        + "5. After alignment, source and target will have EQUAL segment counts with MATCHING IDs\n"
68        + "6. Sentence positions are relative to the aligned paragraph group (0-based continuous)\n\n"
69  
70        + "OUTPUT FORMAT - Return EXACTLY this JSON structure (no additional fields):\n"
71        + "{\n"
72        + "  \"alignments\": [\n"
73        + "    {\n"
74        + "      \"paragraphAlignment\": {\n"
75        + "        \"type\": \"MATCH\",\n"
76        + "        \"sourceParagraphPositions\": [0],\n"
77        + "        \"targetParagraphPositions\": [0]\n"
78        + "      },\n"
79        + "      \"sentenceAlignments\": [\n"
80        + "        {\n"
81        + "          \"type\": \"MATCH\",\n"
82        + "          \"sourcePositions\": [0],\n"
83        + "          \"targetPositions\": [0]\n"
84        + "        },\n"
85        + "        {\n"
86        + "          \"type\": \"MATCH\",\n"
87        + "          \"sourcePositions\": [1],\n"
88        + "          \"targetPositions\": [1]\n"
89        + "        }\n"
90        + "      ]\n"
91        + "    },\n"
92        + "    {\n"
93        + "      \"paragraphAlignment\": {\n"
94        + "        \"type\": \"MULTI_MATCH\",\n"
95        + "        \"sourceParagraphPositions\": [1, 2],\n"
96        + "        \"targetParagraphPositions\": [1]\n"
97        + "      },\n"
98        + "      \"sentenceAlignments\": [\n"
99        + "        {\n"
100       + "          \"type\": \"MATCH\",\n"
101       + "          \"sourcePositions\": [0],\n"
102       + "          \"targetPositions\": [0]\n"
103       + "        },\n"
104       + "        {\n"
105       + "          \"type\": \"MULTI_MATCH\",\n"
106       + "          \"sourcePositions\": [1, 2],\n"
107       + "          \"targetPositions\": [1]\n"
108       + "        }\n"
109       + "      ]\n"
110       + "    }\n"
111       + "  ]\n"
112       + "}\n\n"
113 
114       + "CRITICAL: Use exact field names:\n"
115       + "- \"sourceParagraphPositions\" (NOT \"source_paragraphs\")\n"
116       + "- \"targetParagraphPositions\" (NOT \"target_paragraphs\")\n"
117       + "- \"sourcePositions\" (NOT \"source_positions\")\n"
118       + "- \"targetPositions\" (NOT \"target_positions\")\n"
119       + "- Values must be INTEGER ARRAYS, not objects with position/id\n\n"
120 
121       + "Never return anything else. Make all decisions independently.";
122 
123   private static OpenAIClient client;
124 
125   public static void init() throws Exception {
126     client = OpenAIOkHttpClient.builder()
127         .apiKey(Const.ATH_OPENAI_API_KEY)
128         .build();
129   }
130 
131   /**
132    * Reinserts inline codes into target segments using an LLM to align them with source codes.
133    * 
134    * <p>
135    * This method analyzes source and target text segments and uses an OpenAI model to
136    * determine the correct positions for inline codes in the target text based on word/phrase
137    * alignment with the source text.
138    * </p>
139    * 
140    * <p>
141    * The method guarantees to return a list of the same size as the input layeredSegments,
142    * with empty lists for segments that have no codes or when processing fails.
143    * </p>
144    * 
145    * @param modelName       the OpenAI model name to use (e.g., "gpt-5-mini")
146    * @param layeredSegments the list of segments containing source and target text with code
147    *                        references
148    * @return a list of code reference lists, one per input segment. Never null, always the same
149    *         size as layeredSegments. Individual lists are never null (empty lists for no codes).
150    */
151   public static List<List<InlineCodeRef>> reinsertCodes(String modelName,
152       List<LayeredSegment> layeredSegments) {
153 
154     // Validate parameters and return empty results if invalid
155     if (Util.isEmpty(modelName) || Util.isEmpty(layeredSegments)) {
156       Log.warn(OpenAi.class, "Invalid parameters: modelName='{}', layeredSegments size={}",
157           modelName, layeredSegments == null ? "null" : layeredSegments.size());
158 
159       return layeredSegments == null ? new ArrayList<>()
160           : Collections.nCopies(layeredSegments.size(), new ArrayList<>());
161     }
162 
163     // Check if all srcCodes arrays are empty - skip API call if so
164     boolean hasAnyCodes = layeredSegments.stream()
165         .anyMatch(seg -> seg.getSrcCodes() != null && !seg.getSrcCodes().isEmpty());
166 
167     if (!hasAnyCodes) {
168       Log.debug(OpenAi.class, "All source segments have no codes, skipping API call");
169       return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
170     }
171 
172     try {
173       String batchJson = JacksonUtil.toJson(layeredSegments, false);
174       Log.debug(OpenAi.class, "OpenAI input: {}", batchJson);
175 
176       ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
177           .model(modelName)
178           .addSystemMessage(CODES_REINSERTION_PROMPT)
179           .addUserMessage(batchJson)
180           .build();
181 
182       // Call the API
183       ChatCompletion completion = client.chat().completions().create(params);
184 
185       // Extract the result (the filled trgCodes JSON array)
186       String json = completion.choices().get(0).message().content()
187           .orElse("");
188 
189       if (json.isEmpty()) {
190         Log.warn(OpenAi.class, "OpenAI API returned empty content");
191         return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
192       }
193 
194       Log.debug(OpenAi.class, "OpenAI output: {}", json);
195 
196       TypeReference<List<List<InlineCodeRef>>> ref = new TypeReference<>() {
197       };
198 
199       List<List<InlineCodeRef>> trgCodesList = JacksonUtil.fromJson(json, ref);
200 
201       // Ensure the response is never null and has the correct size
202       if (trgCodesList == null) {
203         Log.error(OpenAi.class, "Failed to parse OpenAI response, returning empty results");
204         return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
205       }
206 
207       // Normalize the result to match expected size
208       if (trgCodesList.size() != layeredSegments.size()) {
209         Log.warn(OpenAi.class,
210             "OpenAI API returned wrong number of results -- expected: {}, actual: {}",
211             layeredSegments.size(), trgCodesList.size());
212 
213         // Pad or trim to match expected size
214         List<List<InlineCodeRef>> normalizedList = new ArrayList<>(layeredSegments.size());
215 
216         for (int i = 0; i < layeredSegments.size(); i++) {
217           if (i < trgCodesList.size() && trgCodesList.get(i) != null) {
218             normalizedList.add(trgCodesList.get(i));
219 
220           } else {
221             normalizedList.add(new ArrayList<>());
222           }
223         }
224 
225         return normalizedList;
226       }
227 
228       // Replace any null lists with empty lists
229       for (int i = 0; i < trgCodesList.size(); i++) {
230         if (trgCodesList.get(i) == null) {
231           trgCodesList.set(i, new ArrayList<>());
232         }
233       }
234 
235       return trgCodesList;
236 
237     } catch (Exception e) {
238       Log.error(OpenAi.class, "Exception calling OpenAI API: {}", e.getMessage(), e);
239 
240       // Return empty lists for all segments so caller can continue
241       return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
242     }
243   }
244 
245   /**
246    * Performs combined paragraph and sentence alignment using an LLM.
247    * 
248    * <p>
249    * This method sends a request to align both paragraphs and sentences in a single API call.
250    * The input contains source and target paragraphs with their segmented sentences. The LLM
251    * first aligns paragraphs (which may be split, merged, or reordered between documents),
252    * then aligns sentences within each aligned paragraph pair.
253    * </p>
254    * 
255    * @param modelName the OpenAI model name to use (e.g., "gpt-4o-mini")
256    * @param input     the alignment input containing source and target paragraphs with segments
257    * @return the alignment output with paragraph and sentence alignments
258    * @throws Exception if the API call fails or returns invalid data
259    */
260   public static CombinedAlignmentOutput alignParagraphsAndSentences(String modelName,
261       CombinedAlignmentInput input) throws Exception {
262 
263     // Validate parameters
264     if (Util.isEmpty(modelName) || input == null) {
265       Log.warn(OpenAi.class, "Invalid parameters: modelName='{}', input={}",
266           modelName, input == null ? "null" : "present");
267       throw new IllegalArgumentException("modelName and input are required");
268     }
269 
270     String inputJson = JacksonUtil.toJson(input, false);
271     Log.debug(OpenAi.class, "LLM alignment input: {}", inputJson);
272 
273     try {
274       ChatCompletionCreateParams params = ChatCompletionCreateParams.builder()
275           .model(modelName)
276           .addSystemMessage(COMBINED_ALIGNMENT_PROMPT)
277           .addUserMessage(inputJson)
278           .build();
279 
280       ChatCompletion completion = client.chat().completions().create(params);
281       String outputJson = completion.choices().get(0).message().content().orElse("");
282 
283       if (outputJson.isEmpty()) {
284         Log.warn(OpenAi.class, "OpenAI API returned empty content");
285         throw new Exception("OpenAI API returned empty content");
286       }
287 
288       Log.debug(OpenAi.class, "LLM alignment output: {}", outputJson);
289 
290       CombinedAlignmentOutput output = JacksonUtil.fromJson(outputJson,
291           CombinedAlignmentOutput.class);
292 
293       if (output == null || output.alignments == null) {
294         Log.error(OpenAi.class, "Failed to parse OpenAI response");
295         throw new Exception("Failed to parse OpenAI response");
296       }
297 
298       return output;
299 
300     } catch (Exception e) {
301       Log.error(OpenAi.class, "Exception calling OpenAI API: {}", e.getMessage(), e);
302       throw e;
303     }
304   }
305 
306   public static void done() throws Exception {
307     client.close();
308   }
309 
310   public static OpenAIClient getClient() {
311     return client;
312   }
313 }