1 package net.sf.okapi.steps.sentencealigner;
2
3 import static org.junit.jupiter.api.Assertions.*;
4
5 import java.util.ArrayList;
6 import java.util.List;
7
8 import org.junit.jupiter.api.BeforeEach;
9 import org.junit.jupiter.api.Test;
10
11 import net.sf.okapi.common.LocaleId;
12 import net.sf.okapi.common.resource.ISegments;
13 import net.sf.okapi.common.resource.ITextUnit;
14 import net.sf.okapi.common.resource.Segment;
15 import net.sf.okapi.common.resource.TextContainer;
16 import net.sf.okapi.common.resource.TextFragment;
17 import net.sf.okapi.common.resource.TextPart;
18 import net.sf.okapi.common.resource.TextUnit;
19 import net.sf.okapi.steps.gcaligner.AlignmentScorer;
20
21 class TestSentenceAligner {
22
23 private SentenceAligner aligner;
24 private final LocaleId srcLoc = LocaleId.fromString("en");
25 private final LocaleId trgLoc = LocaleId.fromString("ru");
26
27 @BeforeEach
28 void setUp() {
29 List<AlignmentScorer<Segment>> scorers = new ArrayList<>();
30 scorers.add(new FlexibleScorer());
31 aligner = new SentenceAligner(scorers);
32 }
33
34 @Test
35 void test1to1Alignment() {
36 ITextUnit src = createTextUnit("Hello world.", "s1");
37 ITextUnit trg = createTextUnit("Привет мир.", "t1");
38
39 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
40
41 ISegments srcSegs = result.getSource().getSegments();
42 ISegments trgSegs = result.getTarget(trgLoc).getSegments();
43
44 assertEquals(1, srcSegs.count());
45 assertEquals(1, trgSegs.count());
46
47 Segment srcSeg = srcSegs.get(0);
48 Segment trgSeg = trgSegs.get(0);
49
50 assertEquals("s1", srcSeg.getId());
51 assertEquals("s1", trgSeg.getId());
52 assertEquals("Hello world.", srcSeg.getContent().toText());
53 assertEquals("Привет мир.", trgSeg.getContent().toText());
54 }
55
56 @Test
57 void test1to2Alignment() {
58 ITextUnit src = createTextUnit("Click OK to continue.", "s1");
59 ITextUnit trg = createTextUnit(
60 "Нажмите «ОК» для продолжения. " +
61 "После индикатора установка завершится.",
62 "t1", "t2");
63
64 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
65
66 ISegments srcSegs = result.getSource().getSegments();
67 ISegments trgSegs = result.getTarget(trgLoc).getSegments();
68
69 assertEquals(1, srcSegs.count());
70 assertEquals(1, trgSegs.count());
71
72 Segment srcSeg = srcSegs.get(0);
73 Segment trgSeg = trgSegs.get(0);
74
75 assertEquals("s1", srcSeg.getId());
76 assertEquals("s1", trgSeg.getId());
77
78 assertEquals("Click OK to continue.", srcSeg.getContent().toText());
79
80 String trgContent = trgSeg.getContent().toText();
81 assertTrue(trgContent.contains("Нажмите") && trgContent.contains("После"));
82 }
83
84 @Test
85 void test2to1Alignment() {
86 ITextUnit src = createTextUnit("First sentence. Second sentence.", "s1", "s2");
87 ITextUnit trg = createTextUnit("Первое и второе предложение.", "t1");
88
89 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
90
91 ISegments srcSegs = result.getSource().getSegments();
92 ISegments trgSegs = result.getTarget(trgLoc).getSegments();
93
94 assertEquals(1, srcSegs.count());
95 assertEquals(1, trgSegs.count());
96
97 Segment srcSeg = srcSegs.get(0);
98 Segment trgSeg = trgSegs.get(0);
99
100 assertEquals("s1", srcSeg.getId());
101 assertEquals("s1", trgSeg.getId());
102
103 String srcContent = srcSeg.getContent().toText();
104 assertTrue(srcContent.contains("First") && srcContent.contains("Second"));
105 assertEquals("Первое и второе предложение.", trgSeg.getContent().toText());
106 }
107
108 @Test
109 void test2to2Alignment() {
110 ITextUnit src = createTextUnit("A. B.", "s1", "s2");
111 ITextUnit trg = createTextUnit("А. Б.", "t1", "t2");
112
113 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
114
115 ISegments srcSegs = result.getSource().getSegments();
116 ISegments trgSegs = result.getTarget(trgLoc).getSegments();
117
118 assertEquals(2, srcSegs.count());
119 assertEquals(2, trgSegs.count());
120
121 assertEquals("s1", srcSegs.get(0).getId());
122 assertEquals("s1", trgSegs.get(0).getId());
123 assertEquals("s2", srcSegs.get(1).getId());
124 assertEquals("s2", trgSegs.get(1).getId());
125 }
126
127 @Test
128 void testPreserveInterSegmentSpace() {
129 ITextUnit src = createTextUnit("One. Two.", "s1", "s2");
130 ITextUnit trg = createTextUnit("Один. Два.", "t1", "t2");
131
132 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
133
134 List<TextPart> parts = result.getTarget(trgLoc).getParts();
135
136 boolean foundSpace = false;
137 for (TextPart part : parts) {
138 if (!part.isSegment() && " ".equals(part.getContent().toText())) {
139 foundSpace = true;
140 break;
141 }
142 }
143
144 assertTrue(foundSpace, "Inter-segment space should be preserved");
145 }
146
147 @Test
148 void testNoSourceToTargetContentCopy() {
149 ITextUnit src = createTextUnit("Source only.", "s1");
150 ITextUnit trg = createTextUnit("Target only.", "t1");
151
152 ITextUnit result = aligner.align(src, trg, srcLoc, trgLoc, false);
153
154 String srcText = result.getSource().toString();
155 String trgText = result.getTarget(trgLoc).toString();
156
157 assertTrue(srcText.contains("Source only."));
158 assertTrue(trgText.contains("Target only."));
159 assertFalse(trgText.contains("Source only."));
160 assertFalse(srcText.contains("Target only."));
161 }
162
163 private static class FlexibleScorer implements AlignmentScorer<Segment> {
164 @SuppressWarnings("unused")
165 private LocaleId srcLoc, trgLoc;
166
167 @Override
168 public void setLocales(LocaleId sourceLocale, LocaleId targetLocale) {
169 this.srcLoc = sourceLocale;
170 this.trgLoc = targetLocale;
171 }
172
173 @Override
174 public int substitutionScore(Segment s, Segment t) {
175 if (s.getContent().toText().isEmpty() || t.getContent().toText().isEmpty()) {
176 return 0;
177 }
178 return 100;
179 }
180
181 @Override
182 public int deletionScore(Segment s) {
183 return -50;
184 }
185
186 @Override
187 public int insertionScore(Segment t) {
188 return -50;
189 }
190
191 @Override
192 public int contractionScore(Segment s1, Segment s2, Segment t) {
193 if (s1.getContent().toText().isEmpty() ||
194 s2.getContent().toText().isEmpty() ||
195 t.getContent().toText().isEmpty()) {
196 return 0;
197 }
198 return 90;
199 }
200
201 @Override
202 public int expansionScore(Segment s, Segment t1, Segment t2) {
203 if (s.getContent().toText().isEmpty() ||
204 t1.getContent().toText().isEmpty() ||
205 t2.getContent().toText().isEmpty()) {
206 return 0;
207 }
208 return 90;
209 }
210
211 @Override
212 public int meldingScore(Segment s1, Segment s2, Segment t1, Segment t2) {
213 if (s1.getContent().toText().isEmpty() ||
214 s2.getContent().toText().isEmpty() ||
215 t1.getContent().toText().isEmpty() ||
216 t2.getContent().toText().isEmpty()) {
217 return 0;
218 }
219 return 180;
220 }
221 }
222
223 private ITextUnit createTextUnit(String text, String... segIds) {
224 ITextUnit tu = new TextUnit("tu1");
225 TextContainer tc = tu.getSource();
226
227 String[] sentences = text.split("(?<=[.!?])\\s+");
228
229 tc.append(new TextPart(""));
230
231 int textPos = 0;
232
233 for (int i = 0; i < sentences.length; i++) {
234 String sentence = sentences[i];
235
236 int start = text.indexOf(sentence, textPos);
237 int end = start + sentence.length();
238
239 Segment seg = new Segment(segIds[i], new TextFragment(sentence));
240 tc.getSegments().append(seg);
241
242 if (i < sentences.length - 1) {
243 String interSegment = text.substring(end, text.indexOf(sentences[i + 1], end));
244 tc.append(new TextPart(interSegment));
245 } else {
246 tc.append(new TextPart(""));
247 }
248
249 textPos = end;
250 }
251
252 tc.setHasBeenSegmentedFlag(true);
253 return tu;
254 }
255 }