View Javadoc
1   package com.acumenvelocity.ath.gemini;
2   
3   import java.util.ArrayList;
4   import java.util.Arrays;
5   import java.util.Collections;
6   import java.util.List;
7   
8   import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentInput;
9   import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentOutput;
10  import com.acumenvelocity.ath.common.Const;
11  import com.acumenvelocity.ath.common.JacksonUtil;
12  import com.acumenvelocity.ath.common.Log;
13  import com.acumenvelocity.ath.common.exception.AthException;
14  import com.acumenvelocity.ath.model.InlineCodeRef;
15  import com.acumenvelocity.ath.model.LayeredSegment;
16  import com.fasterxml.jackson.core.type.TypeReference;
17  import com.google.genai.Client;
18  import com.google.genai.types.Content;
19  import com.google.genai.types.GenerateContentConfig; // <-- ADDED
20  import com.google.genai.types.GenerateContentResponse;
21  import com.google.genai.types.Part;
22  // import com.google.genai.types.SystemInstruction; <-- REMOVED: This class is not available in
23  // 1.27.0
24  
25  import net.sf.okapi.common.Util;
26  
27  public class GenAi {
28  
29    // Prompt for the inline code reinsertion task
30    private static final String CODES_REINSERTION_PROMPT = "For every object in the input array, "
31        + "create an M:N mapping between the words in srcText and trgText. "
32        + "Copy all source inline codes into trgCodes changing the position fields according to "
33        + "their correct trgText positions based on the M:N mapping. "
34        + "CRITICAL REQUIREMENTS: "
35        + "1. WORD BOUNDARIES - MANDATORY POSITIONS: Each segment contains a trgWordBreakPositions "
36        + "array with valid word-break positions (computed by ICU4J). You MUST place ALL target "
37        + "codes ONLY at the positions that exist in that array. Placing a code at ANY position NOT "
38        + "in trgWordBreakPositions is STRICTLY PROHIBITED and will cause system failure. "
39        + "The position field in each InlineCodeRef in trgCodes MUST be a value from "
40        + "trgWordBreakPositions. NO EXCEPTIONS. "
41        + "2. PAIRED CODES: Two codes with the same id are paired (OPENING and CLOSING tagType). "
42        + "If paired codes wrap only whitespace in srcText, they must wrap only whitespace in "
43        + "trgText. If they wrap text in srcText, they must wrap text in trgText. Wrapping text "
44        + "in target when source wraps only spaces is a SEVERE ERROR. "
45        + "3. NESTING: Always preserve well-formed nesting of paired codes with no overlapping with "
46        + "other pairs of codes. "
47        + "4. UNPAIRED CODES: If a code has no pair in the segment, treat it as an isolated "
48        + "placeholder (its pair is in another segment). Find the position in trgWordBreakPositions "
49        + "that best aligns with the source code's context. This position MUST still be from "
50        + "trgWordBreakPositions - choose the closest valid position that maintains alignment and "
51        + "doesn't overlap with other paired codes. "
52        + "5. ORDER: Preserve the codes order wherever possible. "
53        + "VALIDATION CHECKLIST: "
54        + "- Verify EVERY position value in trgCodes exists in trgWordBreakPositions "
55        + "- Verify paired codes wrap appropriate content (text or whitespace) "
56        + "- Verify no code nesting violations "
57        + "Input: JSON array of segments with trgWordBreakPositions arrays. "
58        + "Output: array of trgCodes JSON arrays, with length equal to the input array length. "
59        + "Each InlineCodeRef position MUST be from the corresponding trgWordBreakPositions. "
60        + "Return ONLY the JSON array, no explanations, no markdown, no preamble. This is an "
61        + "API call with strict response schema - make all decisions autonomously.";
62  
63    // private static final String CODES_REINSERTION_PROMPT = "Align srcText with trgText and reinsert
64    // srcCodes into trgCodes. "
65    // + "RULES: 1) trgCodes positions MUST be from trgWordBreakPositions array "
66    // + "2) Paired codes (same id): preserve text/whitespace wrapping "
67    // + "3) No overlapping, maintain nesting "
68    // + "4) Preserve order. "
69    // + "Return JSON array of trgCodes, same length as input. No explanations.";
70  
71    // Prompt for the combined paragraph and sentence alignment task
72    private static final String COMBINED_ALIGNMENT_PROMPT = "You are a bilingual document alignment "
73        + "expert. Perform TWO-STAGE alignment:\n\n"
74        + "STAGE 1 - PARAGRAPH ALIGNMENT:\n"
75        + "- Align sourceParagraphs with targetParagraphs\n"
76        + "- Documents may have different paragraph structures (split, merged, reordered)\n"
77        + "- Use context, segment previews, and content similarity\n"
78        + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
79  
80        + "STAGE 2 - SENTENCE ALIGNMENT (within each paragraph pair):\n"
81        + "- For EACH paragraph alignment, align the sentences within\n"
82        + "- Source positions refer to segment positions within SOURCE paragraphs of that pair\n"
83        + "- Target positions refer to segment positions within TARGET paragraphs of that pair\n"
84        + "- For MULTI_MATCH paragraphs, treat all segments from all source/target paragraphs as continuous lists\n"
85        + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
86  
87        + "CRITICAL REQUIREMENTS:\n"
88        + "1. Every paragraph must appear in exactly one paragraph alignment\n"
89        + "2. Every segment must appear in exactly one sentence alignment\n"
90        + "3. For DELETED: source segment has no target (we'll insert empty target)\n"
91        + "4. For INSERTED: target segment has no source (we'll insert empty source)\n"
92        + "5. After alignment, source and target will have EQUAL segment counts with MATCHING IDs\n"
93        + "6. Sentence positions are relative to the aligned paragraph group (0-based continuous)\n\n"
94  
95        + "OUTPUT FORMAT - Return EXACTLY this JSON structure (no additional fields):\n"
96        + "{\n"
97        + "  \"alignments\": [\n"
98        + "    {\n"
99        + "      \"paragraphAlignment\": {\n"
100       + "        \"type\": \"MATCH\",\n"
101       + "        \"sourceParagraphPositions\": [0],\n"
102       + "        \"targetParagraphPositions\": [0]\n"
103       + "      },\n"
104       + "      \"sentenceAlignments\": [\n"
105       + "        {\n"
106       + "          \"type\": \"MATCH\",\n"
107       + "          \"sourcePositions\": [0],\n"
108       + "          \"targetPositions\": [0]\n"
109       + "        },\n"
110       + "        {\n"
111       + "          \"type\": \"MATCH\",\n"
112       + "          \"sourcePositions\": [1],\n"
113       + "          \"targetPositions\": [1]\n"
114       + "        }\n"
115       + "      ]\n"
116       + "    },\n"
117       + "    {\n"
118       + "      \"paragraphAlignment\": {\n"
119       + "        \"type\": \"MULTI_MATCH\",\n"
120       + "        \"sourceParagraphPositions\": [1, 2],\n"
121       + "        \"targetParagraphPositions\": [1]\n"
122       + "      },\n"
123       + "      \"sentenceAlignments\": [\n"
124       + "        {\n"
125       + "          \"type\": \"MATCH\",\n"
126       + "          \"sourcePositions\": [0],\n"
127       + "          \"targetPositions\": [0]\n"
128       + "        },\n"
129       + "        {\n"
130       + "          \"type\": \"MULTI_MATCH\",\n"
131       + "          \"sourcePositions\": [1, 2],\n"
132       + "          \"targetPositions\": [1]\n"
133       + "        }\n"
134       + "      ]\n"
135       + "    }\n"
136       + "  ]\n"
137       + "}\n\n"
138 
139       + "CRITICAL: Use exact field names:\n"
140       + "- \"sourceParagraphPositions\" (NOT \"source_paragraphs\")\n"
141       + "- \"targetParagraphPositions\" (NOT \"target_paragraphs\")\n"
142       + "- \"sourcePositions\" (NOT \"source_positions\")\n"
143       + "- \"targetPositions\" (NOT \"target_positions\")\n"
144       + "- Values must be INTEGER ARRAYS, not objects with position/id\n\n"
145 
146       + "Never return anything else. Make all decisions independently.";
147 
148   private static Client client;
149 
150   /**
151    * Initializes the Gemini Client using the API key from environment variables or Const.
152    * 
153    * @throws AthException
154    */
155   public static void init() throws AthException {
156     if (Const.ATH_GEMINI_API_KEY != null && !Const.ATH_GEMINI_API_KEY.isEmpty()) {
157       client = Client.builder().apiKey(Const.ATH_GEMINI_API_KEY).build();
158 
159     } else {
160       AthException.logAndThrow(GenAi.class, "Gemini API client is not initialized");
161     }
162   }
163 
164   /**
165    * Reinserts inline codes into target segments using an LLM to align them with source codes.
166    * *
167    * <p>
168    * This method analyzes source and target text segments and uses a Gemini model to
169    * determine the correct positions for inline codes in the target text based on word/phrase
170    * alignment with the source text.
171    * </p>
172    * *
173    * <p>
174    * The method guarantees to return a list of the same size as the input layeredSegments,
175    * with empty lists for segments that have no codes or when processing fails.
176    * </p>
177    * * @param modelName the Gemini model name to use (e.g., "gemini-2.5-flash")
178    * * @param layeredSegments the list of segments containing source and target text with code
179    * references
180    * 
181    * @return a list of code reference lists, one per input segment. Never null, always the same
182    *         size as layeredSegments. Individual lists are never null (empty lists for no codes).
183    */
184   public static List<List<InlineCodeRef>> reinsertCodes(String modelName,
185       List<LayeredSegment> layeredSegments) {
186 
187     // Validate parameters and return empty results if invalid
188     if (client == null || Util.isEmpty(modelName) || Util.isEmpty(layeredSegments)) {
189       Log.warn(GenAi.class,
190           "Invalid parameters or client not initialized: client= {}, modelName='{}', "
191               + "layeredSegments size={}",
192           client, modelName, layeredSegments == null ? "null" : layeredSegments.size());
193 
194       return layeredSegments == null ? new ArrayList<>()
195           : Collections.nCopies(layeredSegments.size(), new ArrayList<>());
196     }
197 
198     // Use default model if an empty modelName is passed
199     if (Util.isEmpty(modelName)) {
200       modelName = Const.GEMINI_CODE_REINSERTION_MODEL;
201     }
202 
203     // Check if all srcCodes arrays are empty - skip API call if so
204     boolean hasAnyCodes = layeredSegments.stream()
205         .anyMatch(seg -> seg.getSrcCodes() != null && !seg.getSrcCodes().isEmpty());
206 
207     if (!hasAnyCodes) {
208       Log.debug(GenAi.class, "All source segments have no codes, skipping API call");
209       return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
210     }
211 
212     try {
213       String batchJson = JacksonUtil.toJson(layeredSegments, false);
214       Log.debug(GenAi.class, "Gemini reinsert codes input: {}", batchJson);
215 
216       // --- FIX START: Use Content object for system instruction ---
217       // 1. Construct the System Instruction as a Content object (correct for v1.27.0)
218       Content systemInstructionContent = Content.builder()
219           .parts(Arrays.asList(Part.builder().text(CODES_REINSERTION_PROMPT).build()))
220           .build();
221 
222       // 2. Configure System Instruction in GenerateContentConfig
223       GenerateContentConfig config = GenerateContentConfig.builder()
224           .systemInstruction(systemInstructionContent)
225           .temperature(0.0f)
226           .responseMimeType("application/json")
227           .build();
228 
229       // 3. Construct the Content list for the request (Only user content is needed)
230       List<Content> contents = Arrays.asList(
231           Content.builder().role("user")
232               .parts(Arrays.asList(Part.builder().text(batchJson).build())).build());
233 
234       // 4. Pass the config object as the third argument
235       GenerateContentResponse response = client.models.generateContent(modelName, contents, config);
236       // --- FIX END ---
237 
238       // Extract the result (the filled trgCodes JSON array)
239       String json = response.text();
240 
241       if (json == null || json.isEmpty()) {
242         Log.warn(GenAi.class, "Gemini API returned empty content");
243         return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
244       }
245 
246       Log.debug(GenAi.class, "Gemini reinsert codes output: {}", json);
247 
248       TypeReference<List<List<InlineCodeRef>>> ref = new TypeReference<>() {
249       };
250 
251       List<List<InlineCodeRef>> trgCodesList = JacksonUtil.fromJson(json, ref);
252 
253       // Ensure the response is never null and has the correct size
254       if (trgCodesList == null) {
255         Log.error(GenAi.class, "Failed to parse Gemini response, returning empty results");
256         return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
257       }
258 
259       // Normalize the result to match expected size
260       if (trgCodesList.size() != layeredSegments.size()) {
261         Log.warn(GenAi.class,
262             "Gemini API returned wrong number of results -- expected: {}, actual: {}",
263             layeredSegments.size(), trgCodesList.size());
264 
265         // Pad or trim to match expected size
266         List<List<InlineCodeRef>> normalizedList = new ArrayList<>(layeredSegments.size());
267 
268         for (int i = 0; i < layeredSegments.size(); i++) {
269           if (i < trgCodesList.size() && trgCodesList.get(i) != null) {
270             normalizedList.add(trgCodesList.get(i));
271 
272           } else {
273             normalizedList.add(new ArrayList<>());
274           }
275         }
276 
277         return normalizedList;
278       }
279 
280       // Replace any null lists with empty lists
281       for (int i = 0; i < trgCodesList.size(); i++) {
282         if (trgCodesList.get(i) == null) {
283           trgCodesList.set(i, new ArrayList<>());
284         }
285       }
286 
287       return trgCodesList;
288 
289     } catch (Exception e) {
290       Log.error(GenAi.class, "Exception calling Gemini API: {}", e.getMessage(), e);
291 
292       // Return empty lists for all segments so caller can continue
293       return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
294     }
295   }
296 
297   /**
298    * Performs combined paragraph and sentence alignment using an LLM.
299    * *
300    * <p>
301    * This method sends a request to align both paragraphs and sentences in a single API call.
302    * The input contains source and target paragraphs with their segmented sentences. The LLM
303    * first aligns paragraphs (which may be split, merged, or reordered between documents),
304    * then aligns sentences within each aligned paragraph pair.
305    * </p>
306    * * @param modelName the Gemini model name to use (e.g., "gemini-2.5-pro")
307    * * @param input the alignment input containing source and target paragraphs with segments
308    * 
309    * @return the alignment output with paragraph and sentence alignments
310    * @throws Exception if the API call fails or returns invalid data
311    */
312   public static CombinedAlignmentOutput alignParagraphsAndSentences(String modelName,
313       CombinedAlignmentInput input) throws Exception {
314 
315     // Validate parameters
316     if (client == null || Util.isEmpty(modelName) || input == null) {
317       Log.warn(GenAi.class,
318           "Invalid parameters or client not initialized: modelName='{}', input={}",
319           modelName, input == null ? "null" : "present");
320       
321       throw new IllegalArgumentException(
322           "Client must be initialized, and modelName and input are required");
323     }
324 
325     // Use default model if an empty modelName is passed
326     if (Util.isEmpty(modelName)) {
327       modelName = Const.GEMINI_ALIGNMENT_MODEL;
328     }
329 
330     String inputJson = JacksonUtil.toJson(input, false);
331     Log.debug(GenAi.class, "Gemini alignment input: {}", inputJson);
332 
333     try {
334       // --- FIX START: Use Content object for system instruction ---
335       // 1. Construct the System Instruction as a Content object (correct for v1.27.0)
336       Content systemInstructionContent = Content.builder()
337           .parts(Arrays.asList(Part.builder().text(COMBINED_ALIGNMENT_PROMPT).build()))
338           .build();
339 
340       // 2. Configure System Instruction in GenerateContentConfig
341       GenerateContentConfig config = GenerateContentConfig.builder()
342           .systemInstruction(systemInstructionContent)
343           .temperature(0.0f)
344           .responseMimeType("application/json")
345           .build();
346 
347       // 3. Construct the Content list for the request (Only user content is needed)
348       List<Content> contents = Arrays.asList(
349           Content.builder().role("user")
350               .parts(Arrays.asList(Part.builder().text(inputJson).build())).build());
351 
352       // 4. Pass the config object as the third argument
353       GenerateContentResponse response = client.models.generateContent(modelName, contents, config);
354       // --- FIX END ---
355 
356       String outputJson = response.text();
357 
358       if (outputJson == null || outputJson.isEmpty()) {
359         Log.warn(GenAi.class, "Gemini API returned empty content");
360         throw new Exception("Gemini API returned empty content");
361       }
362 
363       Log.debug(GenAi.class, "Gemini alignment output: {}", outputJson);
364 
365       CombinedAlignmentOutput output = JacksonUtil.fromJson(outputJson,
366           CombinedAlignmentOutput.class);
367 
368       if (output == null || output.alignments == null) {
369         Log.error(GenAi.class, "Failed to parse Gemini response");
370         throw new Exception("Failed to parse Gemini response");
371       }
372 
373       return output;
374 
375     } catch (Exception e) {
376       Log.error(GenAi.class, "Exception calling Gemini API: {}", e.getMessage(), e);
377       throw e;
378     }
379   }
380 
381   /**
382    * Closes the Gemini Client connection.
383    */
384   public static void done() {
385     if (client != null) {
386       // client.close() is available on the Client interface
387       client.close();
388     }
389   }
390 
391   /**
392    * Returns the initialized Gemini Client.
393    * * @return the Gemini Client instance
394    */
395   public static Client getClient() {
396     return client;
397   }
398 }