commit fccbb79c1ae504089a5158749d45268fd356a023 parent 540cd77a71ff85c8d813407171b9525e8fa8994b Author: Dan Callaghan <djc@djc.id.au> Date: Sat, 19 Mar 2011 15:48:42 +1000 upgrade to Lucene 3.0 Diffstat:
9 files changed, 317 insertions(+), 297 deletions(-) diff --git a/pom.xml b/pom.xml @@ -65,17 +65,17 @@ <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>4.8.1</version> + <version>4.8.2</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> - <version>2.4.0</version> + <version>3.0.2</version> </dependency> <dependency> <groupId>org.easymock</groupId> - <artifactId>easymockclassextension</artifactId> + <artifactId>easymock</artifactId> <version>2.5.2</version> </dependency> </dependencies> diff --git a/src/main/java/au/com/miskinhill/search/analysis/Analyzer.java b/src/main/java/au/com/miskinhill/search/analysis/Analyzer.java @@ -0,0 +1,20 @@ +package au.com.miskinhill.search.analysis; + +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.AttributeSource; + +/** + * Same as {@link org.apache.lucene.analysis.Analyzer Lucene's Analyzer} but + * with a saner API. + */ +public interface Analyzer { + + TokenStream tokenizer(Reader input); + + TokenStream tokenizer(AttributeSource attributeSource, Reader input); + + TokenStream applyFilters(TokenStream input); + +} diff --git a/src/main/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilter.java b/src/main/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilter.java @@ -1,46 +1,57 @@ package au.com.miskinhill.search.analysis; import java.io.IOException; +import java.nio.CharBuffer; import java.util.HashMap; import java.util.Map; +import java.util.regex.Pattern; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** * Assumes that tokens have already been lower-cased. */ public class CyrillicTransliteratingFilter extends TokenFilter { - private static final String CYRILLIC_PATTERN = ".*[а-я]+.*"; - - private Token transliterated = null; + private static final Pattern CYRILLIC_PATTERN = Pattern.compile("[а-я]+"); + + private final TermAttribute termAttribute; + private final PositionIncrementAttribute posIncAttribute; + private String transliterated = null; + private State transliteratedState = null; protected CyrillicTransliteratingFilter(TokenStream input) { super(input); + this.termAttribute = addAttribute(TermAttribute.class); + this.posIncAttribute = addAttribute(PositionIncrementAttribute.class); } @Override - public Token next(Token reusableToken) throws IOException { - Token tok; + public boolean incrementToken() throws IOException { if (transliterated == null) { - tok = input.next(reusableToken); - if (tok == null) return null; - if (needsTransliterating(tok.term())) { - transliterated = (Token) tok.clone(); - transliterated.setTermBuffer(transliterate(transliterated.term())); - transliterated.setPositionIncrement(0); + if (!input.incrementToken()) + return false; + CharSequence text = CharBuffer.wrap(termAttribute.termBuffer(), + 0, termAttribute.termLength()); + if (needsTransliterating(text)) { + transliterated = transliterate(text); + transliteratedState = captureState(); } } else { - tok = transliterated; + restoreState(transliteratedState); + termAttribute.setTermBuffer(transliterated); + posIncAttribute.setPositionIncrement(0); transliterated = null; + transliteratedState = null; } - return tok; + return true; } - private static boolean needsTransliterating(String text) { - return (text.matches(CYRILLIC_PATTERN)); + private static boolean needsTransliterating(CharSequence text) { + return (CYRILLIC_PATTERN.matcher(text).find()); } private static final Map<Character, String> TRANSLITERATION_TABLE = new HashMap<Character, String>(); diff --git a/src/main/java/au/com/miskinhill/search/analysis/OffsetTokenFilter.java b/src/main/java/au/com/miskinhill/search/analysis/OffsetTokenFilter.java @@ -2,27 +2,32 @@ package au.com.miskinhill.search.analysis; import java.io.IOException; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; public class OffsetTokenFilter extends TokenFilter { - + + private final OffsetAttribute offsetAttribute; private int offset; protected OffsetTokenFilter(TokenStream input, int offset) { super(input); this.offset = offset; + this.offsetAttribute = addAttribute(OffsetAttribute.class); } - @Override - public Token next(Token reusableToken) throws IOException { - Token retval = input.next(reusableToken); - if (retval != null && offset != 0) { - retval.setStartOffset(retval.startOffset() + offset); - retval.setEndOffset(retval.endOffset() + offset); - } - return retval; - } + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (offset != 0) { + offsetAttribute.setOffset(offsetAttribute.startOffset() + offset, + offsetAttribute.endOffset() + offset); + } + return true; + } else { + return false; + } + } } diff --git a/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerMap.java b/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerMap.java @@ -0,0 +1,56 @@ +package au.com.miskinhill.search.analysis; + +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +/** + * Returns an analyzer according to based on the language of the text + * being analysed. The default sub-analyzer is given in the constructor; this is + * used when the language is not specified, or when a language is specified for + * which we have no specific sub-analyzer. Use + * {@link #addAnalyzer(String, Analyzer)} to add a sub-analyzer for a specific + * language. + * <p> + * Note that languages are matched by prefix, so that if a sub-analyzer has been + * added for "en" (but not "en-AU"), it will be returned for "en-AU". + */ +public class PerLanguageAnalyzerMap { + + private static final Logger LOG = Logger.getLogger(PerLanguageAnalyzerMap.class.getName()); + + protected Trie<Analyzer> analyzers; + private List<Analyzer> analyzersList = new ArrayList<Analyzer>(); // easier than traversing the trie + + public PerLanguageAnalyzerMap(Analyzer defaultAnalyzer) { + analyzers = new Trie<Analyzer>(defaultAnalyzer); + analyzersList.add(defaultAnalyzer); + } + + public void addAnalyzer(String language, Analyzer analyzer) { + analyzers.put(language, analyzer); + analyzersList.add(analyzer); + } + + /** + * Returns a list of all sub-analyzers in this analyzer (including the default one). + */ + public List<Analyzer> getAnalyzers() { + return analyzersList; + } + + /** + * Returns an appropriate analyzer for the given language. + * + * @param language ISO-639 language identifier + */ + // XXX TODO use java.util.Locale eventually (maybe with Locale#forLanguageTag added in 1.7?) + public Analyzer getAnalyzer(String language) { + if (language == null) language = ""; + Analyzer a = analyzers.get(language); + if (a == analyzersList.get(0)) + LOG.warning("Using default analyzer for language " + language); + return a; + } + +} diff --git a/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapper.java b/src/main/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapper.java @@ -1,63 +0,0 @@ -package au.com.miskinhill.search.analysis; - -import java.io.Reader; -import java.util.ArrayList; -import java.util.List; -import java.util.logging.Logger; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; - -/** - * In the same vein as - * {@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper}, this analyzer - * delegates to a sub-analyzer according to based on the language of the text - * being analysed. The default sub-analyzer is given in the constructor; this is - * used when the language is not specified, or when a language is specified for - * which we have no specific sub-analyzer. Use - * {@link #addAnalyzer(String, Analyzer)} to add a sub-analyzer for a specific - * language. - * <p> - * Note that languages are matched by prefix, so that if a sub-analyzer has been - * added for "en" (but not "en-AU"), it will be selected when analysing text - * whose language is given as "en-AU". - */ -public class PerLanguageAnalyzerWrapper extends Analyzer { - - private static final Logger LOG = Logger.getLogger(PerLanguageAnalyzerWrapper.class.getName()); - - protected Trie<Analyzer> analyzers; - private List<Analyzer> analyzersList = new ArrayList<Analyzer>(); // easier than traversing the trie - - public PerLanguageAnalyzerWrapper(Analyzer defaultAnalyzer) { - analyzers = new Trie<Analyzer>(defaultAnalyzer); - analyzersList.add(defaultAnalyzer); - } - - public void addAnalyzer(String language, Analyzer analyzer) { - analyzers.put(language, analyzer); - analyzersList.add(analyzer); - } - - /** - * Returns a list of all sub-analyzers in this analyzer (including the default one). - */ - public List<Analyzer> getAnalyzers() { - return analyzersList; - } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - LOG.warning("Using default analyzer"); - return tokenStream("", fieldName, reader); - } - - public TokenStream tokenStream(String language, String fieldName, Reader reader) { - if (language == null) language = ""; - Analyzer a = analyzers.get(language); - if (a == analyzersList.get(0)) - LOG.warning("Using default analyzer for language " + language); - return a.tokenStream(fieldName, reader); - } - -} diff --git a/src/main/java/au/com/miskinhill/search/analysis/XMLTokenizer.java b/src/main/java/au/com/miskinhill/search/analysis/XMLTokenizer.java @@ -17,123 +17,122 @@ import javax.xml.stream.events.Characters; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; public class XMLTokenizer extends TokenStream { - - private static final XMLInputFactory factory = XMLInputFactory.newInstance(); - static { - factory.setProperty("javax.xml.stream.isCoalescing", true); - } + + private static final XMLInputFactory factory = XMLInputFactory.newInstance(); + static { + factory.setProperty("javax.xml.stream.isCoalescing", true); + } public static XMLInputFactory getXMLInputFactory() { return factory; } - - private static final String XHTML_NS_URI = "http://www.w3.org/1999/xhtml"; - - private static class LangStack extends Stack<String> { - private static final long serialVersionUID = 7020093255092191463L; - private String current = null; - @Override - public String push(String item) { - if (item != null) - current = item; - super.push(current); - return item; - } - @Override - public synchronized String pop() { - String top = super.pop(); - current = empty() ? null : peek(); - return top; - } - public String getCurrent() { - return current; - } - } - - private XMLEventReader r; - private PerLanguageAnalyzerWrapper analyzer; - private LangStack langs = new LangStack(); - - /** Current delegate in use (null if none currently) */ - private TokenStream delegate = null; - - public XMLTokenizer(Reader reader, PerLanguageAnalyzerWrapper analyzer) - throws XMLStreamException { - this.analyzer = analyzer; - r = factory.createXMLEventReader(reader); - } - - public XMLTokenizer(InputStream in, PerLanguageAnalyzerWrapper analyzer) - throws XMLStreamException { - this.analyzer = analyzer; - r = factory.createXMLEventReader(in); - } - - @Override - public Token next(Token reusableToken) throws IOException { - // first try our current string delegate, if we have one - if (delegate != null) { - Token retval = delegate.next(reusableToken); - if (retval != null) - return retval; - else - delegate = null; - } - - while (r.hasNext()) { - XMLEvent event; - try { - event = r.nextEvent(); - } catch (XMLStreamException e) { - throw new IOException(e); - } - switch (event.getEventType()) { - case XMLStreamConstants.START_ELEMENT: - StartElement se = event.asStartElement(); - langs.push(getLang(se)); - break; - case XMLStreamConstants.CHARACTERS: - Characters chars = event.asCharacters(); - if (chars.isWhiteSpace()) - break; // don't care - delegate = new OffsetTokenFilter( - analyzer.tokenStream(langs.getCurrent(), - null, new StringReader(chars.getData())), - event.getLocation().getCharacterOffset()); - Token retval = delegate.next(reusableToken); - if (retval != null) - return retval; - else - delegate = null; - break; - case XMLStreamConstants.END_ELEMENT: - langs.pop(); - break; - } - } - return null; - } - - private String getLang(StartElement se) { - // xml:lang takes precedence - QName xmlLangQName = new QName( - se.getNamespaceURI("") == XMLConstants.XML_NS_URI ? "" : XMLConstants.XML_NS_URI, - "lang"); - Attribute xmlLang = se.getAttributeByName(xmlLangQName); - if (xmlLang != null) - return xmlLang.getValue(); - - QName xhtmlLangQName = new QName( - se.getNamespaceURI("") == XHTML_NS_URI ? "" : XHTML_NS_URI, - "lang"); - Attribute xhtmlLang = se.getAttributeByName(xhtmlLangQName); - if (xhtmlLang != null) - return xhtmlLang.getValue(); - - return null; - } - + + private static final String XHTML_NS_URI = "http://www.w3.org/1999/xhtml"; + + private static class LangStack extends Stack<String> { + private static final long serialVersionUID = 7020093255092191463L; + private String current = null; + + public LangStack() { + } + + @Override + public String push(String item) { + if (item != null) + current = item; + super.push(current); + return item; + } + + @Override + public synchronized String pop() { + String top = super.pop(); + current = empty() ? null : peek(); + return top; + } + + public String getCurrent() { + return current; + } + } + + private XMLEventReader r; + private PerLanguageAnalyzerMap analyzerMap; + private LangStack langs = new LangStack(); + + /** Current delegate in use (null if none currently) */ + private TokenStream delegate = null; + + public XMLTokenizer(Reader reader, PerLanguageAnalyzerMap analyzerMap) throws XMLStreamException { + this.analyzerMap = analyzerMap; + r = factory.createXMLEventReader(reader); + } + + public XMLTokenizer(InputStream in, PerLanguageAnalyzerMap analyzerMap) throws XMLStreamException { + this.analyzerMap = analyzerMap; + r = factory.createXMLEventReader(in); + } + + @Override + public boolean incrementToken() throws IOException { + clearAttributes(); + + // first try our current string delegate, if we have one + if (delegate != null) { + if (delegate.incrementToken()) + return true; + else + delegate = null; + } + + while (r.hasNext()) { + XMLEvent event; + try { + event = r.nextEvent(); + } catch (XMLStreamException e) { + throw new IOException(e); + } + switch (event.getEventType()) { + case XMLStreamConstants.START_ELEMENT: + StartElement se = event.asStartElement(); + langs.push(getLang(se)); + break; + case XMLStreamConstants.CHARACTERS: + Characters chars = event.asCharacters(); + if (chars.isWhiteSpace()) + break; // don't care + Analyzer analyzer = analyzerMap.getAnalyzer(langs.getCurrent()); + delegate = new OffsetTokenFilter(analyzer.applyFilters(analyzer.tokenizer(this, new StringReader( + chars.getData()))), event.getLocation().getCharacterOffset()); + if (delegate.incrementToken()) + return true; + else + delegate = null; + break; + case XMLStreamConstants.END_ELEMENT: + langs.pop(); + break; + } + } + return false; + } + + private String getLang(StartElement se) { + // xml:lang takes precedence + QName xmlLangQName = new QName( + se.getNamespaceURI("") == XMLConstants.XML_NS_URI ? "" : XMLConstants.XML_NS_URI, "lang"); + Attribute xmlLang = se.getAttributeByName(xmlLangQName); + if (xmlLang != null) + return xmlLang.getValue(); + + QName xhtmlLangQName = new QName(se.getNamespaceURI("") == XHTML_NS_URI ? "" : XHTML_NS_URI, "lang"); + Attribute xhtmlLang = se.getAttributeByName(xhtmlLangQName); + if (xhtmlLang != null) + return xhtmlLang.getValue(); + + return null; + } + } diff --git a/src/test/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilterUnitTest.java b/src/test/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilterUnitTest.java @@ -1,19 +1,49 @@ package au.com.miskinhill.search.analysis; -import static org.junit.Assert.assertThat; -import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.*; +import static org.junit.Assert.*; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Iterator; -import java.util.List; +import java.util.LinkedList; +import java.util.Queue; import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.util.AttributeSource; import org.junit.Test; public class CyrillicTransliteratingFilterUnitTest { + + private static final class FakeTokenStream extends TokenStream { + private final TermAttribute termAttribute; + private final OffsetAttribute offsetAttribute; + private final PositionIncrementAttribute posIncAttribute; + private final Queue<Token> tokens; + + public FakeTokenStream(Token... tokens) { + this.tokens = new LinkedList<Token>(Arrays.asList(tokens)); + this.termAttribute = addAttribute(TermAttribute.class); + this.offsetAttribute = addAttribute(OffsetAttribute.class); + this.posIncAttribute = addAttribute(PositionIncrementAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (tokens.isEmpty()) + return false; + clearAttributes(); + Token next = tokens.remove(); + termAttribute.setTermBuffer(next.term()); + offsetAttribute.setOffset(next.startOffset(), next.endOffset()); + posIncAttribute.setPositionIncrement(next.getPositionIncrement()); + return true; + } + } @Test public void shouldPassOnTokensWithoutCyrillicUntouched() throws IOException { @@ -21,8 +51,11 @@ public class CyrillicTransliteratingFilterUnitTest { asdf.setTermBuffer("asdf"); asdf.setStartOffset(1); asdf.setEndOffset(4); - assertThat(filter(Arrays.asList(asdf)), - equalTo(Arrays.asList(asdf))); + TokenFilter filter = new CyrillicTransliteratingFilter( + new FakeTokenStream(asdf)); + assertTrue(filter.incrementToken()); + assertAttributes(filter, "asdf", 1, 4, 1); + assertFalse(filter.incrementToken()); } @Test @@ -31,13 +64,13 @@ public class CyrillicTransliteratingFilterUnitTest { igraCyrillic.setTermBuffer("игра"); igraCyrillic.setStartOffset(1); igraCyrillic.setEndOffset(4); - Token igraLatin = new Token(); - igraLatin.setTermBuffer("igra"); - igraLatin.setStartOffset(1); - igraLatin.setEndOffset(4); - igraLatin.setPositionIncrement(0); - assertThat(filter(Arrays.asList(igraCyrillic)), - equalTo(Arrays.asList(igraCyrillic, igraLatin))); + TokenFilter filter = new CyrillicTransliteratingFilter( + new FakeTokenStream(igraCyrillic)); + assertTrue(filter.incrementToken()); + assertAttributes(filter, "игра", 1, 4, 1); + assertTrue(filter.incrementToken()); + assertAttributes(filter, "igra", 1, 4, 0); + assertFalse(filter.incrementToken()); } @Test @@ -45,33 +78,26 @@ public class CyrillicTransliteratingFilterUnitTest { Token mixed = new Token(); mixed.setTermBuffer("interнет"); mixed.setStartOffset(1); - mixed.setEndOffset(4); - Token latin = new Token(); - latin.setTermBuffer("internet"); - latin.setStartOffset(1); - latin.setEndOffset(4); - latin.setPositionIncrement(0); - assertThat(filter(Arrays.asList(mixed)), - equalTo(Arrays.asList(mixed, latin))); + mixed.setEndOffset(8); + TokenFilter filter = new CyrillicTransliteratingFilter( + new FakeTokenStream(mixed)); + assertTrue(filter.incrementToken()); + assertAttributes(filter, "interнет", 1, 8, 1); + assertTrue(filter.incrementToken()); + assertAttributes(filter, "internet", 1, 8, 0); + assertFalse(filter.incrementToken()); } - private List<Token> filter(List<Token> input) throws IOException { - final Iterator<Token> inputIt = input.iterator(); - TokenStream inputStream = new TokenStream() { - @Override - public Token next(Token reusableToken) throws IOException { - if (!inputIt.hasNext()) return null; - else return inputIt.next(); - } - }; - CyrillicTransliteratingFilter filter = new CyrillicTransliteratingFilter(inputStream); - List<Token> output = new ArrayList<Token>(); - while (true) { - Token next = filter.next(new Token()); - if (next == null) break; - output.add(next); - } - return output; + private void assertAttributes(AttributeSource source, String term, + int start, int end, int posInc) { + assertThat(source.getAttribute(TermAttribute.class).term(), + equalTo(term)); + assertThat(source.getAttribute(OffsetAttribute.class).startOffset(), + equalTo(start)); + assertThat(source.getAttribute(OffsetAttribute.class).endOffset(), + equalTo(end)); + assertThat(source.getAttribute(PositionIncrementAttribute.class) + .getPositionIncrement(), equalTo(posInc)); } } diff --git a/src/test/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapperUnitTest.java b/src/test/java/au/com/miskinhill/search/analysis/PerLanguageAnalyzerWrapperUnitTest.java @@ -1,13 +1,12 @@ package au.com.miskinhill.search.analysis; -import static org.easymock.classextension.EasyMock.*; -import static org.junit.Assert.assertThat; -import static org.junit.matchers.JUnitMatchers.hasItems; +import static org.easymock.EasyMock.*; +import static org.hamcrest.CoreMatchers.*; +import static org.junit.Assert.*; -import java.io.Reader; -import java.io.StringReader; +import java.util.Arrays; -import org.apache.lucene.analysis.Analyzer; +import org.junit.Before; import org.junit.Test; public class PerLanguageAnalyzerWrapperUnitTest { @@ -15,67 +14,34 @@ public class PerLanguageAnalyzerWrapperUnitTest { private Analyzer defaultAnalyzer = createMock(Analyzer.class); private Analyzer enAnalyzer = createMock(Analyzer.class); private Analyzer ruAnalyzer = createMock(Analyzer.class); + private PerLanguageAnalyzerMap plam; + + @Before + public void setUp() { + plam = new PerLanguageAnalyzerMap(defaultAnalyzer); + plam.addAnalyzer("en", enAnalyzer); + plam.addAnalyzer("ru", ruAnalyzer); + } @Test public void testGetAnalyzers() { - PerLanguageAnalyzerWrapper plaw = - new PerLanguageAnalyzerWrapper(defaultAnalyzer); - plaw.addAnalyzer("en", enAnalyzer); - plaw.addAnalyzer("ru", ruAnalyzer); - assertThat(plaw.getAnalyzers(), - hasItems(defaultAnalyzer, enAnalyzer, ruAnalyzer)); - } - - @Test - public void testTokenStreamNoLanguage() { - expect(defaultAnalyzer.tokenStream( - isA(String.class), isA(Reader.class))).andReturn(null); - replay(defaultAnalyzer, enAnalyzer, ruAnalyzer); - PerLanguageAnalyzerWrapper plaw = - new PerLanguageAnalyzerWrapper(defaultAnalyzer); - plaw.addAnalyzer("en", enAnalyzer); - plaw.addAnalyzer("ru", ruAnalyzer); - plaw.tokenStream("asdf", new StringReader("")); - verify(); + assertThat(plam.getAnalyzers(), + equalTo(Arrays.asList(defaultAnalyzer, enAnalyzer, ruAnalyzer))); } @Test public void testTokenStreamEmptyLanguage() { - expect(defaultAnalyzer.tokenStream( - isA(String.class), isA(Reader.class))).andReturn(null); - replay(defaultAnalyzer, enAnalyzer, ruAnalyzer); - PerLanguageAnalyzerWrapper plaw = - new PerLanguageAnalyzerWrapper(defaultAnalyzer); - plaw.addAnalyzer("en", enAnalyzer); - plaw.addAnalyzer("ru", ruAnalyzer); - plaw.tokenStream("", "asdf", new StringReader("")); - verify(); + assertThat(plam.getAnalyzer(""), equalTo(defaultAnalyzer)); } @Test public void testTokenStreamNullLanguage() { - expect(defaultAnalyzer.tokenStream( - isA(String.class), isA(Reader.class))).andReturn(null); - replay(defaultAnalyzer, enAnalyzer, ruAnalyzer); - PerLanguageAnalyzerWrapper plaw = - new PerLanguageAnalyzerWrapper(defaultAnalyzer); - plaw.addAnalyzer("en", enAnalyzer); - plaw.addAnalyzer("ru", ruAnalyzer); - plaw.tokenStream(null, "asdf", new StringReader("")); - verify(); + assertThat(plam.getAnalyzer(null), equalTo(defaultAnalyzer)); } @Test public void testTokenStreamSomeLanguage() { - expect(enAnalyzer.tokenStream( - isA(String.class), isA(Reader.class))).andReturn(null); - replay(defaultAnalyzer, enAnalyzer, ruAnalyzer); - PerLanguageAnalyzerWrapper plaw = - new PerLanguageAnalyzerWrapper(defaultAnalyzer); - plaw.addAnalyzer("en", enAnalyzer); - plaw.addAnalyzer("ru", ruAnalyzer); - plaw.tokenStream("en", "asdf", new StringReader("")); - verify(); + assertThat(plam.getAnalyzer("en"), equalTo(enAnalyzer)); } }