lucene-multilingual

Multilingual enhancements for the Lucene text search library
git clone https://code.djc.id.au/git/lucene-multilingual/

src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerMap.java (1865B) - raw

      1 package au.com.miskinhill.search.analysis;
      2 
      3 import java.util.ArrayList;
      4 import java.util.List;
      5 import java.util.logging.Logger;
      6 
      7 /**
      8  * Returns an analyzer according to based on the language of the text
      9  * being analysed. The default sub-analyzer is given in the constructor; this is
     10  * used when the language is not specified, or when a language is specified for
     11  * which we have no specific sub-analyzer. Use
     12  * {@link #addAnalyzer(String, Analyzer)} to add a sub-analyzer for a specific
     13  * language.
     14  * <p>
     15  * Note that languages are matched by prefix, so that if a sub-analyzer has been
     16  * added for "en" (but not "en-AU"), it will be returned for "en-AU".
     17  */
     18 public class PerLanguageAnalyzerMap {
     19     
     20     private static final Logger LOG = Logger.getLogger(PerLanguageAnalyzerMap.class.getName());
     21 
     22 	protected Trie<Analyzer> analyzers;
     23 	private List<Analyzer> analyzersList = new ArrayList<Analyzer>(); // easier than traversing the trie
     24 	
     25 	public PerLanguageAnalyzerMap(Analyzer defaultAnalyzer) {
     26 		analyzers = new Trie<Analyzer>(defaultAnalyzer);
     27 		analyzersList.add(defaultAnalyzer);
     28 	}
     29 	
     30 	public void addAnalyzer(String language, Analyzer analyzer) {
     31 		analyzers.put(language, analyzer);
     32 		analyzersList.add(analyzer);
     33 	}
     34 	
     35 	/**
     36 	 * Returns a list of all sub-analyzers in this analyzer (including the default one).
     37 	 */
     38 	public List<Analyzer> getAnalyzers() {
     39 		return analyzersList;
     40 	}
     41 	
     42 	/**
     43 	 * Returns an appropriate analyzer for the given language.
     44 	 * 
     45 	 * @param language ISO-639 language identifier
     46 	 */
     47 	// XXX TODO use java.util.Locale eventually (maybe with Locale#forLanguageTag added in 1.7?)
     48 	public Analyzer getAnalyzer(String language) {
     49 		if (language == null) language = "";
     50 		Analyzer a = analyzers.get(language);
     51 		if (a == analyzersList.get(0))
     52 		    LOG.warning("Using default analyzer for language " + language);
     53 		return a;
     54 	}
     55 
     56 }