src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerMap.java (1865B) - raw
1 package au.com.miskinhill.search.analysis; 2 3 import java.util.ArrayList; 4 import java.util.List; 5 import java.util.logging.Logger; 6 7 /** 8 * Returns an analyzer according to based on the language of the text 9 * being analysed. The default sub-analyzer is given in the constructor; this is 10 * used when the language is not specified, or when a language is specified for 11 * which we have no specific sub-analyzer. Use 12 * {@link #addAnalyzer(String, Analyzer)} to add a sub-analyzer for a specific 13 * language. 14 * <p> 15 * Note that languages are matched by prefix, so that if a sub-analyzer has been 16 * added for "en" (but not "en-AU"), it will be returned for "en-AU". 17 */ 18 public class PerLanguageAnalyzerMap { 19 20 private static final Logger LOG = Logger.getLogger(PerLanguageAnalyzerMap.class.getName()); 21 22 protected Trie<Analyzer> analyzers; 23 private List<Analyzer> analyzersList = new ArrayList<Analyzer>(); // easier than traversing the trie 24 25 public PerLanguageAnalyzerMap(Analyzer defaultAnalyzer) { 26 analyzers = new Trie<Analyzer>(defaultAnalyzer); 27 analyzersList.add(defaultAnalyzer); 28 } 29 30 public void addAnalyzer(String language, Analyzer analyzer) { 31 analyzers.put(language, analyzer); 32 analyzersList.add(analyzer); 33 } 34 35 /** 36 * Returns a list of all sub-analyzers in this analyzer (including the default one). 37 */ 38 public List<Analyzer> getAnalyzers() { 39 return analyzersList; 40 } 41 42 /** 43 * Returns an appropriate analyzer for the given language. 44 * 45 * @param language ISO-639 language identifier 46 */ 47 // XXX TODO use java.util.Locale eventually (maybe with Locale#forLanguageTag added in 1.7?) 48 public Analyzer getAnalyzer(String language) { 49 if (language == null) language = ""; 50 Analyzer a = analyzers.get(language); 51 if (a == analyzersList.get(0)) 52 LOG.warning("Using default analyzer for language " + language); 53 return a; 54 } 55 56 }