1 package net.sf.okapi.steps.llmsentencealigner;
2
3 import com.acumenvelocity.ath.common.Const;
4
5 import net.sf.okapi.common.ParametersDescription;
6 import net.sf.okapi.common.StringParameters;
7
8 public class LlmSentenceAlignerParameters extends StringParameters {
9
10 private static final String ALIGNMENT_MODEL_NAME = "alignmentModelName";
11 private static final String MAX_PARAGRAPHS_PER_REQUEST = "maxParagraphsPerRequest";
12 private static final String LOG_ALIGNMENT_DETAILS = "logAlignmentDetails";
13
14 private static final String SEGMENT_SOURCE = "segmentSource";
15 private static final String USE_CUSTOM_SOURCE_RULES = "useCustomSourceRules";
16 private static final String CUSTOM_SOURCE_RULES_PATH = "customSourceRulesPath";
17 private static final String SEGMENT_TARGET = "segmentTarget";
18 private static final String USE_CUSTOM_TARGET_RULES = "useCustomTargetRules";
19 private static final String CUSTOM_TARGET_RULES_PATH = "customTargetRulesPath";
20 private static final String COLLAPSE_WHITESPACE = "collapseWhitespace";
21
22 private static final String USE_CODES_REINSERTION_MODEL = "useCodesReinsertionModel";
23 private static final String CODES_REINSERTION_MODEL_NAME = "codesReinsertionModelName";
24
25 public String getAlignmentModelName() {
26 return getString(ALIGNMENT_MODEL_NAME);
27 }
28
29 public void setAlignmentModelName(String alignmentModelName) {
30 setString(ALIGNMENT_MODEL_NAME, alignmentModelName);
31 }
32
33 public int getMaxParagraphsPerRequest() {
34 return getInteger(MAX_PARAGRAPHS_PER_REQUEST);
35 }
36
37 public void setMaxParagraphsPerRequest(int maxParagraphsPerRequest) {
38 setInteger(MAX_PARAGRAPHS_PER_REQUEST, maxParagraphsPerRequest);
39 }
40
41 public boolean isLogAlignmentDetails() {
42 return getBoolean(LOG_ALIGNMENT_DETAILS);
43 }
44
45 public void setLogAlignmentDetails(boolean logAlignmentDetails) {
46 setBoolean(LOG_ALIGNMENT_DETAILS, logAlignmentDetails);
47 }
48
49 public boolean isSegmentSource() {
50 return getBoolean(SEGMENT_SOURCE);
51 }
52
53 public void setSegmentSource(boolean segmentSource) {
54 setBoolean(SEGMENT_SOURCE, segmentSource);
55 }
56
57 public boolean isUseCustomSourceRules() {
58 return getBoolean(USE_CUSTOM_SOURCE_RULES);
59 }
60
61 public void setUseCustomSourceRules(boolean useCustomSourceRules) {
62 setBoolean(USE_CUSTOM_SOURCE_RULES, useCustomSourceRules);
63 }
64
65 public String getCustomSourceRulesPath() {
66 return getString(CUSTOM_SOURCE_RULES_PATH);
67 }
68
69 public void setCustomSourceRulesPath(String customSourceRulesPath) {
70 setString(CUSTOM_SOURCE_RULES_PATH, customSourceRulesPath);
71 }
72
73 public boolean isSegmentTarget() {
74 return getBoolean(SEGMENT_TARGET);
75 }
76
77 public void setSegmentTarget(boolean segmentTarget) {
78 setBoolean(SEGMENT_TARGET, segmentTarget);
79 }
80
81 public boolean isUseCustomTargetRules() {
82 return getBoolean(USE_CUSTOM_TARGET_RULES);
83 }
84
85 public void setUseCustomTargetRules(boolean useCustomTargetRules) {
86 setBoolean(USE_CUSTOM_TARGET_RULES, useCustomTargetRules);
87 }
88
89 public String getCustomTargetRulesPath() {
90 return getString(CUSTOM_TARGET_RULES_PATH);
91 }
92
93 public void setCustomTargetRulesPath(String customTargetRulesPath) {
94 setString(CUSTOM_TARGET_RULES_PATH, customTargetRulesPath);
95 }
96
97 public boolean isCollapseWhitespace() {
98 return getBoolean(COLLAPSE_WHITESPACE);
99 }
100
101 public void setCollapseWhitespace(boolean collapseWhitespace) {
102 setBoolean(COLLAPSE_WHITESPACE, collapseWhitespace);
103 }
104
105 public boolean isUseCodesReinsertionModel() {
106 return getBoolean(USE_CODES_REINSERTION_MODEL);
107 }
108
109 public void setUseCodesReinsertionModel(boolean useCodesReinsertionModel) {
110 setBoolean(USE_CODES_REINSERTION_MODEL, useCodesReinsertionModel);
111 }
112
113 public String getCodesReinsertionModelName() {
114 return getString(CODES_REINSERTION_MODEL_NAME);
115 }
116
117 public void setCodesReinsertionModelName(String modelName) {
118 setString(CODES_REINSERTION_MODEL_NAME, modelName);
119 }
120
121 @Override
122 public void reset() {
123 super.reset();
124
125 setAlignmentModelName(Const.GEMINI_ALIGNMENT_MODEL);
126 setMaxParagraphsPerRequest(1000);
127 setLogAlignmentDetails(true);
128
129 setSegmentSource(true);
130 setUseCustomSourceRules(false);
131 setCustomSourceRulesPath("");
132
133 setSegmentTarget(true);
134 setUseCustomTargetRules(false);
135 setCustomTargetRulesPath("");
136 setCollapseWhitespace(false);
137
138 setUseCodesReinsertionModel(false);
139 setCodesReinsertionModelName(Const.GEMINI_CODE_REINSERTION_MODEL);
140 }
141
142 @Override
143 public ParametersDescription getParametersDescription() {
144 ParametersDescription desc = new ParametersDescription(this);
145
146 desc.add(ALIGNMENT_MODEL_NAME,
147 "LLM model to use for alignment",
148 "The Gemini AI model name (e.g., 'gpt-4o-mini', 'gpt-4o')");
149
150 desc.add(MAX_PARAGRAPHS_PER_REQUEST,
151 "Maximum paragraphs per LLM request",
152 "For very large documents, split into batches of this many paragraphs to avoid token limits.");
153
154 desc.add(LOG_ALIGNMENT_DETAILS,
155 "Log detailed alignment information",
156 "If true, logs detailed information about alignments (useful for debugging but verbose).");
157
158 desc.add(SEGMENT_SOURCE,
159 "Segment the source content (overriding possible existing segmentation)",
160 null);
161
162 desc.add(USE_CUSTOM_SOURCE_RULES,
163 "Use custom source segmentation rules (instead of the default ones)",
164 null);
165
166 desc.add(CUSTOM_SOURCE_RULES_PATH,
167 "SRX path for the source",
168 "Full path of the SRX document to use for the source");
169
170 desc.add(SEGMENT_TARGET,
171 "Segment the target content (overriding possible existing segmentation)",
172 null);
173
174 desc.add(USE_CUSTOM_TARGET_RULES,
175 "Use custom target segmentation rules (instead of the default ones)",
176 null);
177
178 desc.add(CUSTOM_TARGET_RULES_PATH,
179 "SRX path for the target",
180 "Full path of the SRX document to use for the target");
181
182 desc.add(COLLAPSE_WHITESPACE,
183 "Collapse whitespace",
184 "Collapse whitespace (space, newline etc.) to a single space before segmentation and alignment");
185
186 desc.add(USE_CODES_REINSERTION_MODEL,
187 "Use LLM-based target codes reinsertion",
188 "True to use LLM-based target codes reinsertion, false to have Okapi reinsert. " +
189 "This option has effect only when plainTextMT is true.");
190
191 desc.add(CODES_REINSERTION_MODEL_NAME,
192 "Gemini AI model name for target codes reinsertion",
193 "Gemini model to use for reinserting inline codes in the target text.");
194
195 return desc;
196 }
197 }