1 package com.acumenvelocity.ath.common;
2
3 import net.sf.okapi.common.LocaleId;
4 import org.junit.jupiter.api.Test;
5 import org.junit.jupiter.params.ParameterizedTest;
6 import org.junit.jupiter.params.provider.Arguments;
7 import org.junit.jupiter.params.provider.MethodSource;
8
9 import java.util.List;
10 import java.util.stream.Stream;
11
12 import static org.junit.jupiter.api.Assertions.*;
13
14 class TestOkapiWordBreaker {
15
16 private static final LocaleId EN = LocaleId.fromString("en");
17
18 @ParameterizedTest(name = "{0}")
19 @MethodSource("wordBreakProvider")
20 void wordBreaksCorrectly(String description, String input, LocaleId locale,
21 List<Integer> expected) {
22 List<Integer> result = OkapiWordBreaker.getWordBreakPositions(input, locale);
23 assertEquals(expected, result, "Failed for: " + description);
24 }
25
26 static Stream<Arguments> wordBreakProvider() {
27 return Stream.of(
28
29 Arguments.of("Hello, world!", "Hello, world!", EN,
30 List.of(0, 5, 6, 7, 12, 13)),
31
32
33 Arguments.of("Multiple spaces", "Hi there friend", EN,
34 List.of(0, 2, 3, 4, 9, 10, 11, 17)),
35
36
37 Arguments.of("Full sentence", "Hello, world! This is a test.", EN,
38 List.of(0, 5, 6, 7, 12, 13, 14, 18, 19, 21, 22, 23, 24, 28, 29)),
39
40
41 Arguments.of("Single word", "Hello", EN,
42 List.of(0, 5)),
43
44
45 Arguments.of("Multiple punctuation", "What?!", EN,
46 List.of(0, 4, 5, 6)),
47
48
49 Arguments.of("Punctuation with spaces", "Yes , no .", EN,
50 List.of(0, 3, 4, 5, 6, 8, 9, 10))
51 );
52 }
53
54 @Test
55 void emptyTextReturnsEmptyList() {
56 assertTrue(OkapiWordBreaker.getWordBreakPositions("", EN).isEmpty());
57 assertTrue(OkapiWordBreaker.getWordBreakPositions(null, EN).isEmpty());
58 }
59
60 @Test
61 void visualizeBreaks() {
62 String text = "Hello, world!";
63 List<Integer> breaks = OkapiWordBreaker.getWordBreakPositions(text, EN);
64
65 System.out.println("Text: \"" + text + "\"");
66 System.out.println("Positions: " + breaks);
67 System.out.println("\nVisualization:");
68
69 for (int pos : breaks) {
70 String before = text.substring(0, pos);
71 String after = pos < text.length() ? text.substring(pos) : "";
72 System.out.printf("Position %2d: \"%s|%s\"%n", pos, before, after);
73 }
74 }
75 }