1 package com.acumenvelocity.ath.gemini;
2
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collections;
6 import java.util.List;
7
8 import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentInput;
9 import com.acumenvelocity.ath.common.AlignmentData.CombinedAlignmentOutput;
10 import com.acumenvelocity.ath.common.Const;
11 import com.acumenvelocity.ath.common.JacksonUtil;
12 import com.acumenvelocity.ath.common.Log;
13 import com.acumenvelocity.ath.common.exception.AthException;
14 import com.acumenvelocity.ath.model.InlineCodeRef;
15 import com.acumenvelocity.ath.model.LayeredSegment;
16 import com.fasterxml.jackson.core.type.TypeReference;
17 import com.google.genai.Client;
18 import com.google.genai.types.Content;
19 import com.google.genai.types.GenerateContentConfig;
20 import com.google.genai.types.GenerateContentResponse;
21 import com.google.genai.types.Part;
22
23
24
25 import net.sf.okapi.common.Util;
26
27 public class GenAi {
28
29
30 private static final String CODES_REINSERTION_PROMPT = "For every object in the input array, "
31 + "create an M:N mapping between the words in srcText and trgText. "
32 + "Copy all source inline codes into trgCodes changing the position fields according to "
33 + "their correct trgText positions based on the M:N mapping. "
34 + "CRITICAL REQUIREMENTS: "
35 + "1. WORD BOUNDARIES - MANDATORY POSITIONS: Each segment contains a trgWordBreakPositions "
36 + "array with valid word-break positions (computed by ICU4J). You MUST place ALL target "
37 + "codes ONLY at the positions that exist in that array. Placing a code at ANY position NOT "
38 + "in trgWordBreakPositions is STRICTLY PROHIBITED and will cause system failure. "
39 + "The position field in each InlineCodeRef in trgCodes MUST be a value from "
40 + "trgWordBreakPositions. NO EXCEPTIONS. "
41 + "2. PAIRED CODES: Two codes with the same id are paired (OPENING and CLOSING tagType). "
42 + "If paired codes wrap only whitespace in srcText, they must wrap only whitespace in "
43 + "trgText. If they wrap text in srcText, they must wrap text in trgText. Wrapping text "
44 + "in target when source wraps only spaces is a SEVERE ERROR. "
45 + "3. NESTING: Always preserve well-formed nesting of paired codes with no overlapping with "
46 + "other pairs of codes. "
47 + "4. UNPAIRED CODES: If a code has no pair in the segment, treat it as an isolated "
48 + "placeholder (its pair is in another segment). Find the position in trgWordBreakPositions "
49 + "that best aligns with the source code's context. This position MUST still be from "
50 + "trgWordBreakPositions - choose the closest valid position that maintains alignment and "
51 + "doesn't overlap with other paired codes. "
52 + "5. ORDER: Preserve the codes order wherever possible. "
53 + "VALIDATION CHECKLIST: "
54 + "- Verify EVERY position value in trgCodes exists in trgWordBreakPositions "
55 + "- Verify paired codes wrap appropriate content (text or whitespace) "
56 + "- Verify no code nesting violations "
57 + "Input: JSON array of segments with trgWordBreakPositions arrays. "
58 + "Output: array of trgCodes JSON arrays, with length equal to the input array length. "
59 + "Each InlineCodeRef position MUST be from the corresponding trgWordBreakPositions. "
60 + "Return ONLY the JSON array, no explanations, no markdown, no preamble. This is an "
61 + "API call with strict response schema - make all decisions autonomously.";
62
63
64
65
66
67
68
69
70
71
72 private static final String COMBINED_ALIGNMENT_PROMPT = "You are a bilingual document alignment "
73 + "expert. Perform TWO-STAGE alignment:\n\n"
74 + "STAGE 1 - PARAGRAPH ALIGNMENT:\n"
75 + "- Align sourceParagraphs with targetParagraphs\n"
76 + "- Documents may have different paragraph structures (split, merged, reordered)\n"
77 + "- Use context, segment previews, and content similarity\n"
78 + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
79
80 + "STAGE 2 - SENTENCE ALIGNMENT (within each paragraph pair):\n"
81 + "- For EACH paragraph alignment, align the sentences within\n"
82 + "- Source positions refer to segment positions within SOURCE paragraphs of that pair\n"
83 + "- Target positions refer to segment positions within TARGET paragraphs of that pair\n"
84 + "- For MULTI_MATCH paragraphs, treat all segments from all source/target paragraphs as continuous lists\n"
85 + "- Types: MATCH (1:1), MULTI_MATCH (many:many), DELETED, INSERTED\n\n"
86
87 + "CRITICAL REQUIREMENTS:\n"
88 + "1. Every paragraph must appear in exactly one paragraph alignment\n"
89 + "2. Every segment must appear in exactly one sentence alignment\n"
90 + "3. For DELETED: source segment has no target (we'll insert empty target)\n"
91 + "4. For INSERTED: target segment has no source (we'll insert empty source)\n"
92 + "5. After alignment, source and target will have EQUAL segment counts with MATCHING IDs\n"
93 + "6. Sentence positions are relative to the aligned paragraph group (0-based continuous)\n\n"
94
95 + "OUTPUT FORMAT - Return EXACTLY this JSON structure (no additional fields):\n"
96 + "{\n"
97 + " \"alignments\": [\n"
98 + " {\n"
99 + " \"paragraphAlignment\": {\n"
100 + " \"type\": \"MATCH\",\n"
101 + " \"sourceParagraphPositions\": [0],\n"
102 + " \"targetParagraphPositions\": [0]\n"
103 + " },\n"
104 + " \"sentenceAlignments\": [\n"
105 + " {\n"
106 + " \"type\": \"MATCH\",\n"
107 + " \"sourcePositions\": [0],\n"
108 + " \"targetPositions\": [0]\n"
109 + " },\n"
110 + " {\n"
111 + " \"type\": \"MATCH\",\n"
112 + " \"sourcePositions\": [1],\n"
113 + " \"targetPositions\": [1]\n"
114 + " }\n"
115 + " ]\n"
116 + " },\n"
117 + " {\n"
118 + " \"paragraphAlignment\": {\n"
119 + " \"type\": \"MULTI_MATCH\",\n"
120 + " \"sourceParagraphPositions\": [1, 2],\n"
121 + " \"targetParagraphPositions\": [1]\n"
122 + " },\n"
123 + " \"sentenceAlignments\": [\n"
124 + " {\n"
125 + " \"type\": \"MATCH\",\n"
126 + " \"sourcePositions\": [0],\n"
127 + " \"targetPositions\": [0]\n"
128 + " },\n"
129 + " {\n"
130 + " \"type\": \"MULTI_MATCH\",\n"
131 + " \"sourcePositions\": [1, 2],\n"
132 + " \"targetPositions\": [1]\n"
133 + " }\n"
134 + " ]\n"
135 + " }\n"
136 + " ]\n"
137 + "}\n\n"
138
139 + "CRITICAL: Use exact field names:\n"
140 + "- \"sourceParagraphPositions\" (NOT \"source_paragraphs\")\n"
141 + "- \"targetParagraphPositions\" (NOT \"target_paragraphs\")\n"
142 + "- \"sourcePositions\" (NOT \"source_positions\")\n"
143 + "- \"targetPositions\" (NOT \"target_positions\")\n"
144 + "- Values must be INTEGER ARRAYS, not objects with position/id\n\n"
145
146 + "Never return anything else. Make all decisions independently.";
147
148 private static Client client;
149
150
151
152
153
154
155 public static void init() throws AthException {
156 if (Const.ATH_GEMINI_API_KEY != null && !Const.ATH_GEMINI_API_KEY.isEmpty()) {
157 client = Client.builder().apiKey(Const.ATH_GEMINI_API_KEY).build();
158
159 } else {
160 AthException.logAndThrow(GenAi.class, "Gemini API client is not initialized");
161 }
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 public static List<List<InlineCodeRef>> reinsertCodes(String modelName,
185 List<LayeredSegment> layeredSegments) {
186
187
188 if (client == null || Util.isEmpty(modelName) || Util.isEmpty(layeredSegments)) {
189 Log.warn(GenAi.class,
190 "Invalid parameters or client not initialized: client= {}, modelName='{}', "
191 + "layeredSegments size={}",
192 client, modelName, layeredSegments == null ? "null" : layeredSegments.size());
193
194 return layeredSegments == null ? new ArrayList<>()
195 : Collections.nCopies(layeredSegments.size(), new ArrayList<>());
196 }
197
198
199 if (Util.isEmpty(modelName)) {
200 modelName = Const.GEMINI_CODE_REINSERTION_MODEL;
201 }
202
203
204 boolean hasAnyCodes = layeredSegments.stream()
205 .anyMatch(seg -> seg.getSrcCodes() != null && !seg.getSrcCodes().isEmpty());
206
207 if (!hasAnyCodes) {
208 Log.debug(GenAi.class, "All source segments have no codes, skipping API call");
209 return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
210 }
211
212 try {
213 String batchJson = JacksonUtil.toJson(layeredSegments, false);
214 Log.debug(GenAi.class, "Gemini reinsert codes input: {}", batchJson);
215
216
217
218 Content systemInstructionContent = Content.builder()
219 .parts(Arrays.asList(Part.builder().text(CODES_REINSERTION_PROMPT).build()))
220 .build();
221
222
223 GenerateContentConfig config = GenerateContentConfig.builder()
224 .systemInstruction(systemInstructionContent)
225 .temperature(0.0f)
226 .responseMimeType("application/json")
227 .build();
228
229
230 List<Content> contents = Arrays.asList(
231 Content.builder().role("user")
232 .parts(Arrays.asList(Part.builder().text(batchJson).build())).build());
233
234
235 GenerateContentResponse response = client.models.generateContent(modelName, contents, config);
236
237
238
239 String json = response.text();
240
241 if (json == null || json.isEmpty()) {
242 Log.warn(GenAi.class, "Gemini API returned empty content");
243 return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
244 }
245
246 Log.debug(GenAi.class, "Gemini reinsert codes output: {}", json);
247
248 TypeReference<List<List<InlineCodeRef>>> ref = new TypeReference<>() {
249 };
250
251 List<List<InlineCodeRef>> trgCodesList = JacksonUtil.fromJson(json, ref);
252
253
254 if (trgCodesList == null) {
255 Log.error(GenAi.class, "Failed to parse Gemini response, returning empty results");
256 return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
257 }
258
259
260 if (trgCodesList.size() != layeredSegments.size()) {
261 Log.warn(GenAi.class,
262 "Gemini API returned wrong number of results -- expected: {}, actual: {}",
263 layeredSegments.size(), trgCodesList.size());
264
265
266 List<List<InlineCodeRef>> normalizedList = new ArrayList<>(layeredSegments.size());
267
268 for (int i = 0; i < layeredSegments.size(); i++) {
269 if (i < trgCodesList.size() && trgCodesList.get(i) != null) {
270 normalizedList.add(trgCodesList.get(i));
271
272 } else {
273 normalizedList.add(new ArrayList<>());
274 }
275 }
276
277 return normalizedList;
278 }
279
280
281 for (int i = 0; i < trgCodesList.size(); i++) {
282 if (trgCodesList.get(i) == null) {
283 trgCodesList.set(i, new ArrayList<>());
284 }
285 }
286
287 return trgCodesList;
288
289 } catch (Exception e) {
290 Log.error(GenAi.class, "Exception calling Gemini API: {}", e.getMessage(), e);
291
292
293 return Collections.nCopies(layeredSegments.size(), new ArrayList<>());
294 }
295 }
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312 public static CombinedAlignmentOutput alignParagraphsAndSentences(String modelName,
313 CombinedAlignmentInput input) throws Exception {
314
315
316 if (client == null || Util.isEmpty(modelName) || input == null) {
317 Log.warn(GenAi.class,
318 "Invalid parameters or client not initialized: modelName='{}', input={}",
319 modelName, input == null ? "null" : "present");
320
321 throw new IllegalArgumentException(
322 "Client must be initialized, and modelName and input are required");
323 }
324
325
326 if (Util.isEmpty(modelName)) {
327 modelName = Const.GEMINI_ALIGNMENT_MODEL;
328 }
329
330 String inputJson = JacksonUtil.toJson(input, false);
331 Log.debug(GenAi.class, "Gemini alignment input: {}", inputJson);
332
333 try {
334
335
336 Content systemInstructionContent = Content.builder()
337 .parts(Arrays.asList(Part.builder().text(COMBINED_ALIGNMENT_PROMPT).build()))
338 .build();
339
340
341 GenerateContentConfig config = GenerateContentConfig.builder()
342 .systemInstruction(systemInstructionContent)
343 .temperature(0.0f)
344 .responseMimeType("application/json")
345 .build();
346
347
348 List<Content> contents = Arrays.asList(
349 Content.builder().role("user")
350 .parts(Arrays.asList(Part.builder().text(inputJson).build())).build());
351
352
353 GenerateContentResponse response = client.models.generateContent(modelName, contents, config);
354
355
356 String outputJson = response.text();
357
358 if (outputJson == null || outputJson.isEmpty()) {
359 Log.warn(GenAi.class, "Gemini API returned empty content");
360 throw new Exception("Gemini API returned empty content");
361 }
362
363 Log.debug(GenAi.class, "Gemini alignment output: {}", outputJson);
364
365 CombinedAlignmentOutput output = JacksonUtil.fromJson(outputJson,
366 CombinedAlignmentOutput.class);
367
368 if (output == null || output.alignments == null) {
369 Log.error(GenAi.class, "Failed to parse Gemini response");
370 throw new Exception("Failed to parse Gemini response");
371 }
372
373 return output;
374
375 } catch (Exception e) {
376 Log.error(GenAi.class, "Exception calling Gemini API: {}", e.getMessage(), e);
377 throw e;
378 }
379 }
380
381
382
383
384 public static void done() {
385 if (client != null) {
386
387 client.close();
388 }
389 }
390
391
392
393
394
395 public static Client getClient() {
396 return client;
397 }
398 }