lucene-multilingual

Multilingual enhancements for the Lucene text search library
git clone https://code.djc.id.au/git/lucene-multilingual/
commit 8112f747c63524b334a5069d8c5b4c07d9687dd0
parent 5f32b83b0259ad497bfae0e9eb8b4bb47cfd10e8
Author: Dan Callaghan <djc@djc.id.au>
Date:   Wed, 31 Dec 2008 23:03:21 +1000

more oops

--HG--
extra : convert_revision : 43e12581b619d830f570c2d95af139d70ba3abd9

Diffstat:
Mpom.xml | 5+++++
Msrc/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapper.java | 1+
Asrc/test/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapperUnitTest.java | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/pom.xml b/pom.xml
@@ -25,5 +25,10 @@
   		<artifactId>lucene-core</artifactId>
   		<version>2.4.0</version>
   	</dependency>
+  	<dependency>
+  		<groupId>org.easymock</groupId>
+  		<artifactId>easymockclassextension</artifactId>
+  		<version>2.4</version>
+  	</dependency>
   </dependencies>
 </project>
diff --git a/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapper.java b/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapper.java
@@ -49,6 +49,7 @@ public class PerLanguageAnalyzerWrapper extends Analyzer {
 	}
 	
 	public TokenStream tokenStream(String language, String fieldName, Reader reader) {
+		if (language == null) language = "";
 		Analyzer a = analyzers.get(language);
 		return a.tokenStream(fieldName, reader);
 	}
diff --git a/src/test/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapperUnitTest.java b/src/test/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapperUnitTest.java
@@ -0,0 +1,81 @@
+package au.com.miskinhill.search.analysis;
+
+import static org.easymock.classextension.EasyMock.*;
+import static org.junit.Assert.assertThat;
+import static org.junit.matchers.JUnitMatchers.hasItems;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.junit.Test;
+
+public class PerLanguageAnalyzerWrapperUnitTest {
+	
+	private Analyzer defaultAnalyzer = createMock(Analyzer.class);
+	private Analyzer enAnalyzer = createMock(Analyzer.class);
+	private Analyzer ruAnalyzer = createMock(Analyzer.class);
+
+	@Test
+	public void testGetAnalyzers() {
+		PerLanguageAnalyzerWrapper plaw = 
+				new PerLanguageAnalyzerWrapper(defaultAnalyzer);
+		plaw.addAnalyzer("en", enAnalyzer);
+		plaw.addAnalyzer("ru", ruAnalyzer);
+		assertThat(plaw.getAnalyzers(), 
+				hasItems(defaultAnalyzer, enAnalyzer, ruAnalyzer));
+	}
+	
+	@Test
+	public void testTokenStreamNoLanguage() {
+		expect(defaultAnalyzer.tokenStream(
+				isA(String.class), isA(Reader.class))).andReturn(null);
+		replay(defaultAnalyzer, enAnalyzer, ruAnalyzer);
+		PerLanguageAnalyzerWrapper plaw = 
+				new PerLanguageAnalyzerWrapper(defaultAnalyzer);
+		plaw.addAnalyzer("en", enAnalyzer);
+		plaw.addAnalyzer("ru", ruAnalyzer);
+		plaw.tokenStream("asdf", new StringReader(""));
+		verify();
+	}
+	
+	@Test
+	public void testTokenStreamEmptyLanguage() {
+		expect(defaultAnalyzer.tokenStream(
+				isA(String.class), isA(Reader.class))).andReturn(null);
+		replay(defaultAnalyzer, enAnalyzer, ruAnalyzer);
+		PerLanguageAnalyzerWrapper plaw = 
+				new PerLanguageAnalyzerWrapper(defaultAnalyzer);
+		plaw.addAnalyzer("en", enAnalyzer);
+		plaw.addAnalyzer("ru", ruAnalyzer);
+		plaw.tokenStream("", "asdf", new StringReader(""));
+		verify();
+	}
+	
+	@Test
+	public void testTokenStreamNullLanguage() {
+		expect(defaultAnalyzer.tokenStream(
+				isA(String.class), isA(Reader.class))).andReturn(null);
+		replay(defaultAnalyzer, enAnalyzer, ruAnalyzer);
+		PerLanguageAnalyzerWrapper plaw = 
+				new PerLanguageAnalyzerWrapper(defaultAnalyzer);
+		plaw.addAnalyzer("en", enAnalyzer);
+		plaw.addAnalyzer("ru", ruAnalyzer);
+		plaw.tokenStream(null, "asdf", new StringReader(""));
+		verify();
+	}
+	
+	@Test
+	public void testTokenStreamSomeLanguage() {
+		expect(enAnalyzer.tokenStream(
+				isA(String.class), isA(Reader.class))).andReturn(null);
+		replay(defaultAnalyzer, enAnalyzer, ruAnalyzer);
+		PerLanguageAnalyzerWrapper plaw = 
+				new PerLanguageAnalyzerWrapper(defaultAnalyzer);
+		plaw.addAnalyzer("en", enAnalyzer);
+		plaw.addAnalyzer("ru", ruAnalyzer);
+		plaw.tokenStream("en", "asdf", new StringReader(""));
+		verify();
+	}
+	
+}