View Javadoc
1   package com.acumenvelocity.ath.srx;
2   
3   import java.util.HashMap;
4   import java.util.Map;
5   
6   import net.sf.okapi.common.LocaleId;
7   
8   public class SrxFileMapper {
9   
10    private static final Map<String, String> ISO_TO_SRX_MAP = new HashMap<>();
11  
12    static {
13      // Initialize the mapping from 2/3-char ISO codes to 3-char SRX codes
14      ISO_TO_SRX_MAP.put("af", "afr");
15      ISO_TO_SRX_MAP.put("ak", "aka");
16      ISO_TO_SRX_MAP.put("sq", "alb");
17      ISO_TO_SRX_MAP.put("am", "amh");
18      ISO_TO_SRX_MAP.put("ar", "ara");
19      ISO_TO_SRX_MAP.put("an", "arg");
20      ISO_TO_SRX_MAP.put("as", "asm");
21      ISO_TO_SRX_MAP.put("ast", "ast");
22      ISO_TO_SRX_MAP.put("az", "aze");
23      ISO_TO_SRX_MAP.put("eu", "baq");
24      ISO_TO_SRX_MAP.put("be", "bel");
25      ISO_TO_SRX_MAP.put("bn", "ben");
26      ISO_TO_SRX_MAP.put("bi", "bis");
27      ISO_TO_SRX_MAP.put("bs", "bos");
28      ISO_TO_SRX_MAP.put("br", "bre");
29      ISO_TO_SRX_MAP.put("bg", "bul");
30      ISO_TO_SRX_MAP.put("ca", "cat");
31      ISO_TO_SRX_MAP.put("ceb", "ceb");
32      ISO_TO_SRX_MAP.put("ckb", "ckb");
33      ISO_TO_SRX_MAP.put("cs", "cze");
34      ISO_TO_SRX_MAP.put("da", "dan");
35      ISO_TO_SRX_MAP.put("din", "din");
36      ISO_TO_SRX_MAP.put("nl", "dut");
37      ISO_TO_SRX_MAP.put("en", "eng");
38      ISO_TO_SRX_MAP.put("eo", "epo");
39      ISO_TO_SRX_MAP.put("et", "est");
40      ISO_TO_SRX_MAP.put("fa", "fas");
41      ISO_TO_SRX_MAP.put("fj", "fij");
42      ISO_TO_SRX_MAP.put("fil", "fil");
43      ISO_TO_SRX_MAP.put("fi", "fin");
44      ISO_TO_SRX_MAP.put("fr", "fre");
45      ISO_TO_SRX_MAP.put("fy", "fry");
46      ISO_TO_SRX_MAP.put("ff", "ful");
47      ISO_TO_SRX_MAP.put("de", "ger");
48      ISO_TO_SRX_MAP.put("gil", "gil");
49      ISO_TO_SRX_MAP.put("gd", "gla");
50      ISO_TO_SRX_MAP.put("ga", "gle");
51      ISO_TO_SRX_MAP.put("gl", "glg");
52      ISO_TO_SRX_MAP.put("el", "gre");
53      ISO_TO_SRX_MAP.put("gn", "grn");
54      ISO_TO_SRX_MAP.put("gu", "guj");
55      ISO_TO_SRX_MAP.put("ht", "hat");
56      ISO_TO_SRX_MAP.put("ha", "hau");
57      ISO_TO_SRX_MAP.put("haw", "haw");
58      ISO_TO_SRX_MAP.put("he", "heb");
59      ISO_TO_SRX_MAP.put("hil", "hil");
60      ISO_TO_SRX_MAP.put("hi", "hin");
61      ISO_TO_SRX_MAP.put("hmn", "hmn");
62      ISO_TO_SRX_MAP.put("hr", "hrv");
63      ISO_TO_SRX_MAP.put("hu", "hun");
64      ISO_TO_SRX_MAP.put("hy", "hye");
65      ISO_TO_SRX_MAP.put("ig", "ibo");
66      ISO_TO_SRX_MAP.put("is", "ice");
67      ISO_TO_SRX_MAP.put("ilo", "ilo");
68      ISO_TO_SRX_MAP.put("id", "ind");
69      ISO_TO_SRX_MAP.put("it", "ita");
70      ISO_TO_SRX_MAP.put("jv", "jav");
71      ISO_TO_SRX_MAP.put("ja", "jpn");
72      ISO_TO_SRX_MAP.put("kl", "kal");
73      ISO_TO_SRX_MAP.put("kn", "kan");
74      ISO_TO_SRX_MAP.put("ks", "kas");
75      ISO_TO_SRX_MAP.put("ka", "kat");
76      ISO_TO_SRX_MAP.put("kk", "kaz");
77      ISO_TO_SRX_MAP.put("kea", "kea");
78      ISO_TO_SRX_MAP.put("km", "khm");
79      ISO_TO_SRX_MAP.put("rw", "kin");
80      ISO_TO_SRX_MAP.put("ky", "kir");
81      ISO_TO_SRX_MAP.put("ko", "kor");
82      ISO_TO_SRX_MAP.put("ku", "kur");
83      ISO_TO_SRX_MAP.put("lo", "lao");
84      ISO_TO_SRX_MAP.put("la", "lat");
85      ISO_TO_SRX_MAP.put("lv", "lav");
86      ISO_TO_SRX_MAP.put("ln", "lin");
87      ISO_TO_SRX_MAP.put("lt", "lit");
88      ISO_TO_SRX_MAP.put("lb", "ltz");
89      ISO_TO_SRX_MAP.put("mk", "mac");
90      ISO_TO_SRX_MAP.put("mh", "mah");
91      ISO_TO_SRX_MAP.put("ml", "mal");
92      ISO_TO_SRX_MAP.put("mr", "mar");
93      ISO_TO_SRX_MAP.put("mg", "mlg");
94      ISO_TO_SRX_MAP.put("mt", "mlt");
95      ISO_TO_SRX_MAP.put("mi", "mri");
96      ISO_TO_SRX_MAP.put("ms", "msa");
97      ISO_TO_SRX_MAP.put("my", "mya");
98      ISO_TO_SRX_MAP.put("na", "nau");
99      ISO_TO_SRX_MAP.put("nv", "nav");
100     ISO_TO_SRX_MAP.put("ne", "nep");
101     ISO_TO_SRX_MAP.put("nn", "nno");
102     ISO_TO_SRX_MAP.put("no", "nor");
103     ISO_TO_SRX_MAP.put("nb", "nor");
104     ISO_TO_SRX_MAP.put("oc", "oci");
105     ISO_TO_SRX_MAP.put("cu", "ocs");
106     ISO_TO_SRX_MAP.put("or", "ori");
107     ISO_TO_SRX_MAP.put("om", "orm");
108     ISO_TO_SRX_MAP.put("pa", "pan");
109     ISO_TO_SRX_MAP.put("ps", "pbu");
110     ISO_TO_SRX_MAP.put("pl", "pol");
111     ISO_TO_SRX_MAP.put("pon", "pon");
112     ISO_TO_SRX_MAP.put("pt", "por");
113     ISO_TO_SRX_MAP.put("qu", "quz");
114     ISO_TO_SRX_MAP.put("ro", "rum");
115     ISO_TO_SRX_MAP.put("rn", "run");
116     ISO_TO_SRX_MAP.put("ru", "rus");
117     ISO_TO_SRX_MAP.put("sa", "san");
118     ISO_TO_SRX_MAP.put("sr", "scc");
119     ISO_TO_SRX_MAP.put("shn", "shn");
120     ISO_TO_SRX_MAP.put("si", "sin");
121     ISO_TO_SRX_MAP.put("sk", "slo");
122     ISO_TO_SRX_MAP.put("sl", "slv");
123     ISO_TO_SRX_MAP.put("sm", "smo");
124     ISO_TO_SRX_MAP.put("so", "som");
125     ISO_TO_SRX_MAP.put("st", "sot");
126     ISO_TO_SRX_MAP.put("es", "spa");
127     ISO_TO_SRX_MAP.put("su", "sun");
128     ISO_TO_SRX_MAP.put("sw", "swa");
129     ISO_TO_SRX_MAP.put("sv", "swe");
130     ISO_TO_SRX_MAP.put("ta", "tam");
131     ISO_TO_SRX_MAP.put("tt", "tat");
132     ISO_TO_SRX_MAP.put("te", "tel");
133     ISO_TO_SRX_MAP.put("tg", "tgk");
134     ISO_TO_SRX_MAP.put("tl", "tgl");
135     ISO_TO_SRX_MAP.put("th", "tha");
136     ISO_TO_SRX_MAP.put("ti", "tir");
137     ISO_TO_SRX_MAP.put("to", "ton");
138     ISO_TO_SRX_MAP.put("tpi", "tpi");
139     ISO_TO_SRX_MAP.put("tn", "tsn");
140     ISO_TO_SRX_MAP.put("tk", "tuk");
141     ISO_TO_SRX_MAP.put("tr", "tur");
142     ISO_TO_SRX_MAP.put("tvl", "tvl");
143     ISO_TO_SRX_MAP.put("tw", "twi");
144     ISO_TO_SRX_MAP.put("tzm", "tzm");
145     ISO_TO_SRX_MAP.put("uk", "ukr");
146     ISO_TO_SRX_MAP.put("ur", "urd");
147     ISO_TO_SRX_MAP.put("uz", "uzb");
148     ISO_TO_SRX_MAP.put("vi", "vie");
149     ISO_TO_SRX_MAP.put("cy", "wel");
150     ISO_TO_SRX_MAP.put("wo", "wol");
151     ISO_TO_SRX_MAP.put("xh", "xho");
152     ISO_TO_SRX_MAP.put("yi", "yid");
153     ISO_TO_SRX_MAP.put("yo", "yor");
154     ISO_TO_SRX_MAP.put("zh", "zho");
155     ISO_TO_SRX_MAP.put("zu", "zul");
156 
157     // Special mappings
158     ISO_TO_SRX_MAP.put("ihm", "him"); // srx special case
159   }
160 
161   /**
162    * Returns the SRX file name for a given ISO language code.
163    * Supports both 2-character and 3-character ISO codes, as well as
164    * language-region combinations (e.g., "en-US", "zh-CN").
165    * 
166    * @param isoCode The ISO language code (e.g., "en", "en-US", "zh-CN")
167    * @return The SRX file name (e.g., "eng.srx", "eng-US.srx", "zho-CN.srx"),
168    *         or null if no mapping exists
169    */
170   public static String getSrxFileName(String isoCode) {
171     if (isoCode == null || isoCode.isEmpty()) {
172       return null;
173     }
174 
175     // Normalize the input (trim and convert to lowercase)
176     String normalized = isoCode.trim().toLowerCase();
177 
178     // Check if it contains a region code (e.g., "en-US", "zh-CN")
179     if (normalized.contains("-")) {
180       String[] parts = normalized.split("-", 2);
181       String languageCode = parts[0];
182       String regionCode = parts[1].toUpperCase();
183 
184       // Map the language code to 3-char code
185       String srxLangCode = ISO_TO_SRX_MAP.get(languageCode);
186       
187       if (srxLangCode != null) {
188         return srxLangCode + "-" + regionCode + ".srx";
189       }
190       
191       return null;
192     }
193 
194     // Simple language code without region
195     String srxCode = ISO_TO_SRX_MAP.get(normalized);
196     
197     if (srxCode != null) {
198       return srxCode + ".srx";
199     }
200 
201     // If it's already a 3-char code, return as-is with .srx extension
202     if (normalized.length() == 3) {
203       return normalized + ".srx";
204     }
205 
206     return null;
207   }
208 
209   /**
210    * Convenience method that works with Okapi LocaleId objects.
211    * 
212    * @param localeId The Okapi LocaleId object
213    * @return The SRX file name, or null if no mapping exists
214    */
215   public static String getSrxFileName(LocaleId localeId) {
216     if (localeId == null) {
217       return null;
218     }
219 
220     // Use toString() to get the BCP-47 tag from LocaleId
221     String localeString = localeId.toString();
222     return getSrxFileName(localeString);
223   }
224 
225   // Example usage
226   public static void main(String[] args) {
227     // Test cases
228     System.out.println(getSrxFileName("en")); // eng.srx
229     System.out.println(getSrxFileName("en-US")); // eng-US.srx
230     System.out.println(getSrxFileName("zh")); // zho.srx
231     System.out.println(getSrxFileName("zh-CN")); // zho-CN.srx
232     System.out.println(getSrxFileName("fr")); // fre.srx
233     System.out.println(getSrxFileName("fr-CA")); // fre-CA.srx
234     System.out.println(getSrxFileName("ara")); // ara.srx (already 3-char)
235     System.out.println(getSrxFileName("invalid")); // null
236   }
237 }