View Javadoc
1   package com.acumenvelocity.ath.common;
2   
3   import net.sf.okapi.common.LocaleId;
4   import org.junit.jupiter.api.Test;
5   import org.junit.jupiter.params.ParameterizedTest;
6   import org.junit.jupiter.params.provider.Arguments;
7   import org.junit.jupiter.params.provider.MethodSource;
8   
9   import java.util.List;
10  import java.util.stream.Stream;
11  
12  import static org.junit.jupiter.api.Assertions.*;
13  
14  class TestOkapiWordBreaker {
15  
16    private static final LocaleId EN = LocaleId.fromString("en");
17  
18    @ParameterizedTest(name = "{0}")
19    @MethodSource("wordBreakProvider")
20    void wordBreaksCorrectly(String description, String input, LocaleId locale,
21        List<Integer> expected) {
22      List<Integer> result = OkapiWordBreaker.getWordBreakPositions(input, locale);
23      assertEquals(expected, result, "Failed for: " + description);
24    }
25  
26    static Stream<Arguments> wordBreakProvider() {
27      return Stream.of(
28          // Original requirement
29          Arguments.of("Hello, world!", "Hello, world!", EN,
30              List.of(0, 5, 6, 7, 12, 13)),
31  
32          // Multiple spaces (each space gets a boundary)
33          Arguments.of("Multiple spaces", "Hi  there  friend", EN,
34              List.of(0, 2, 3, 4, 9, 10, 11, 17)),
35  
36          // Full sentence
37          Arguments.of("Full sentence", "Hello, world! This is a test.", EN,
38              List.of(0, 5, 6, 7, 12, 13, 14, 18, 19, 21, 22, 23, 24, 28, 29)),
39  
40          // Just a word
41          Arguments.of("Single word", "Hello", EN,
42              List.of(0, 5)),
43  
44          // Multiple punctuation
45          Arguments.of("Multiple punctuation", "What?!", EN,
46              List.of(0, 4, 5, 6)),
47  
48          // Punctuation with spaces
49          Arguments.of("Punctuation with spaces", "Yes , no .", EN,
50              List.of(0, 3, 4, 5, 6, 8, 9, 10))
51      );
52    }
53  
54    @Test
55    void emptyTextReturnsEmptyList() {
56      assertTrue(OkapiWordBreaker.getWordBreakPositions("", EN).isEmpty());
57      assertTrue(OkapiWordBreaker.getWordBreakPositions(null, EN).isEmpty());
58    }
59  
60    @Test
61    void visualizeBreaks() {
62      String text = "Hello, world!";
63      List<Integer> breaks = OkapiWordBreaker.getWordBreakPositions(text, EN);
64      
65      System.out.println("Text: \"" + text + "\"");
66      System.out.println("Positions: " + breaks);
67      System.out.println("\nVisualization:");
68      
69      for (int pos : breaks) {
70        String before = text.substring(0, pos);
71        String after = pos < text.length() ? text.substring(pos) : "";
72        System.out.printf("Position %2d: \"%s|%s\"%n", pos, before, after);
73      }
74    }
75  }