commit 044f35a6abdd370d353afd20b26b91f9697357ab
parent ad54c04938acef563f3ee49130055cc3a6485660
Author: Dan Callaghan <djc@djc.id.au>
Date: Wed, 25 Apr 2012 13:51:59 +1000
updated Lucene and other deps
Diffstat:
5 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/pom.xml b/pom.xml
@@ -29,7 +29,7 @@
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-webdav-jackrabbit</artifactId>
- <version>1.0-beta-7</version>
+ <version>2.2</version>
</extension>
</extensions>
<pluginManagement>
@@ -72,18 +72,19 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>4.8.2</version>
+ <version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
- <version>3.0.2</version>
+ <version>3.6.0</version>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
- <version>2.5.2</version>
+ <version>3.1</version>
+ <scope>test</scope>
</dependency>
</dependencies>
</project>
diff --git a/src/main/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilter.java b/src/main/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilter.java
@@ -1,15 +1,14 @@
package au.com.miskinhill.search.analysis;
import java.io.IOException;
-import java.nio.CharBuffer;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Assumes that tokens have already been lower-cased.
@@ -18,31 +17,30 @@ public class CyrillicTransliteratingFilter extends TokenFilter {
private static final Pattern CYRILLIC_PATTERN = Pattern.compile("[а-я]+");
- private final TermAttribute termAttribute;
+ private final CharTermAttribute termAttribute;
private final PositionIncrementAttribute posIncAttribute;
private String transliterated = null;
private State transliteratedState = null;
protected CyrillicTransliteratingFilter(TokenStream input) {
super(input);
- this.termAttribute = addAttribute(TermAttribute.class);
+ this.termAttribute = addAttribute(CharTermAttribute.class);
this.posIncAttribute = addAttribute(PositionIncrementAttribute.class);
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
if (transliterated == null) {
if (!input.incrementToken())
return false;
- CharSequence text = CharBuffer.wrap(termAttribute.termBuffer(),
- 0, termAttribute.termLength());
- if (needsTransliterating(text)) {
- transliterated = transliterate(text);
+ if (needsTransliterating(termAttribute)) {
+ transliterated = transliterate(termAttribute);
transliteratedState = captureState();
}
} else {
restoreState(transliteratedState);
- termAttribute.setTermBuffer(transliterated);
+ termAttribute.setEmpty();
+ termAttribute.append(transliterated);
posIncAttribute.setPositionIncrement(0);
transliterated = null;
transliteratedState = null;
diff --git a/src/main/java/au/com/miskinhill/search/analysis/OffsetTokenFilter.java b/src/main/java/au/com/miskinhill/search/analysis/OffsetTokenFilter.java
@@ -18,7 +18,7 @@ public class OffsetTokenFilter extends TokenFilter {
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (offset != 0) {
offsetAttribute.setOffset(offsetAttribute.startOffset() + offset,
diff --git a/src/main/java/au/com/miskinhill/search/analysis/XMLTokenizer.java b/src/main/java/au/com/miskinhill/search/analysis/XMLTokenizer.java
@@ -76,7 +76,7 @@ public class XMLTokenizer extends TokenStream {
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
clearAttributes();
// first try our current string delegate, if we have one
diff --git a/src/test/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilterUnitTest.java b/src/test/java/au/com/miskinhill/search/analysis/CyrillicTransliteratingFilterUnitTest.java
@@ -11,34 +11,35 @@ import java.util.Queue;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import org.junit.Test;
public class CyrillicTransliteratingFilterUnitTest {
private static final class FakeTokenStream extends TokenStream {
- private final TermAttribute termAttribute;
+ private final CharTermAttribute termAttribute;
private final OffsetAttribute offsetAttribute;
private final PositionIncrementAttribute posIncAttribute;
private final Queue<Token> tokens;
public FakeTokenStream(Token... tokens) {
this.tokens = new LinkedList<Token>(Arrays.asList(tokens));
- this.termAttribute = addAttribute(TermAttribute.class);
+ this.termAttribute = addAttribute(CharTermAttribute.class);
this.offsetAttribute = addAttribute(OffsetAttribute.class);
this.posIncAttribute = addAttribute(PositionIncrementAttribute.class);
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
if (tokens.isEmpty())
return false;
clearAttributes();
Token next = tokens.remove();
- termAttribute.setTermBuffer(next.term());
+ termAttribute.setEmpty();
+ termAttribute.append(next);
offsetAttribute.setOffset(next.startOffset(), next.endOffset());
posIncAttribute.setPositionIncrement(next.getPositionIncrement());
return true;
@@ -48,7 +49,7 @@ public class CyrillicTransliteratingFilterUnitTest {
@Test
public void shouldPassOnTokensWithoutCyrillicUntouched() throws IOException {
Token asdf = new Token();
- asdf.setTermBuffer("asdf");
+ asdf.append("asdf");
asdf.setStartOffset(1);
asdf.setEndOffset(4);
TokenFilter filter = new CyrillicTransliteratingFilter(
@@ -61,7 +62,7 @@ public class CyrillicTransliteratingFilterUnitTest {
@Test
public void shouldTransliterateCyrillicTokens() throws IOException {
Token igraCyrillic = new Token();
- igraCyrillic.setTermBuffer("игра");
+ igraCyrillic.append("игра");
igraCyrillic.setStartOffset(1);
igraCyrillic.setEndOffset(4);
TokenFilter filter = new CyrillicTransliteratingFilter(
@@ -76,7 +77,7 @@ public class CyrillicTransliteratingFilterUnitTest {
@Test
public void shouldTransliterateTokensWithMixedLatinAndCyrillic() throws IOException {
Token mixed = new Token();
- mixed.setTermBuffer("interнет");
+ mixed.append("interнет");
mixed.setStartOffset(1);
mixed.setEndOffset(8);
TokenFilter filter = new CyrillicTransliteratingFilter(
@@ -90,7 +91,7 @@ public class CyrillicTransliteratingFilterUnitTest {
private void assertAttributes(AttributeSource source, String term,
int start, int end, int posInc) {
- assertThat(source.getAttribute(TermAttribute.class).term(),
+ assertThat(source.getAttribute(CharTermAttribute.class).toString(),
equalTo(term));
assertThat(source.getAttribute(OffsetAttribute.class).startOffset(),
equalTo(start));