src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerMap.java (1865B) - raw
1 package au.com.miskinhill.search.analysis;
2
3 import java.util.ArrayList;
4 import java.util.List;
5 import java.util.logging.Logger;
6
7 /**
8 * Returns an analyzer according to based on the language of the text
9 * being analysed. The default sub-analyzer is given in the constructor; this is
10 * used when the language is not specified, or when a language is specified for
11 * which we have no specific sub-analyzer. Use
12 * {@link #addAnalyzer(String, Analyzer)} to add a sub-analyzer for a specific
13 * language.
14 * <p>
15 * Note that languages are matched by prefix, so that if a sub-analyzer has been
16 * added for "en" (but not "en-AU"), it will be returned for "en-AU".
17 */
18 public class PerLanguageAnalyzerMap {
19
20 private static final Logger LOG = Logger.getLogger(PerLanguageAnalyzerMap.class.getName());
21
22 protected Trie<Analyzer> analyzers;
23 private List<Analyzer> analyzersList = new ArrayList<Analyzer>(); // easier than traversing the trie
24
25 public PerLanguageAnalyzerMap(Analyzer defaultAnalyzer) {
26 analyzers = new Trie<Analyzer>(defaultAnalyzer);
27 analyzersList.add(defaultAnalyzer);
28 }
29
30 public void addAnalyzer(String language, Analyzer analyzer) {
31 analyzers.put(language, analyzer);
32 analyzersList.add(analyzer);
33 }
34
35 /**
36 * Returns a list of all sub-analyzers in this analyzer (including the default one).
37 */
38 public List<Analyzer> getAnalyzers() {
39 return analyzersList;
40 }
41
42 /**
43 * Returns an appropriate analyzer for the given language.
44 *
45 * @param language ISO-639 language identifier
46 */
47 // XXX TODO use java.util.Locale eventually (maybe with Locale#forLanguageTag added in 1.7?)
48 public Analyzer getAnalyzer(String language) {
49 if (language == null) language = "";
50 Analyzer a = analyzers.get(language);
51 if (a == analyzersList.get(0))
52 LOG.warning("Using default analyzer for language " + language);
53 return a;
54 }
55
56 }