diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java index 4e6786c..49dc51b 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java @@ -10,10 +10,14 @@ *******************************************************************************/ package org.eclipse.help.internal.search; import java.io.*; -import java.util.HashSet; -import java.util.Set; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.en.*; +import org.apache.lucene.analysis.core.*; +import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.Version; /** * Lucene Analyzer for English. LowerCaseTokenizer->StopFilter->PorterStemFilter */ @@ -25,18 +29,22 @@ public final class Analyzer_en extends Analyzer { super(); } /** - * Creates a TokenStream which tokenizes all the text in the provided + * Creates a TokenStreamComponents which tokenizes all the text in the provided * Reader. */ - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new PorterStemFilter(new StopFilter(false, new LowerCaseAndDigitsTokenizer(reader), getStopWords(), false)); + @Override + public final TokenStreamComponents createComponents(String fieldName, Reader reader) { + CharArraySet stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_47, getStopWords(), true); + Tokenizer source = new LowerCaseAndDigitsTokenizer(reader); + TokenStream filter = new PorterStemFilter(new StopFilter(Version.LUCENE_47, source, stopWordsSet)); + return new TokenStreamComponents(source, filter); } - private Set stopWords; + private List stopWords; - private Set getStopWords() { + private List getStopWords() { if ( stopWords == null ) { - stopWords = new HashSet(); + stopWords = new ArrayList(); for (int i = 0; i < STOP_WORDS.length; i++) { stopWords.add(STOP_WORDS[i]); } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java index 50258d2..f72615f 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java @@ -15,8 +15,10 @@ import java.util.StringTokenizer; import com.ibm.icu.text.BreakIterator; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.Version; import org.eclipse.help.internal.base.HelpBasePlugin; @@ -82,11 +84,14 @@ public final class DefaultAnalyzer extends Analyzer { } /** - * Creates a TokenStream which tokenizes all the text in the provided + * Creates a TokenStreamComponents which tokenizes all the text in the provided * Reader. */ - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale)); + @Override + public final TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer source = new WordTokenStream(fieldName, reader, locale); + LowerCaseFilter filter = new LowerCaseFilter(Version.LUCENE_47, source); + return new TokenStreamComponents(source, filter); } /** diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java index 0dd3943..d101ae9 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java @@ -12,7 +12,8 @@ package org.eclipse.help.internal.search; import java.io.*; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.Version; /** * Tokenizer breaking words around letters or digits. @@ -20,13 +21,14 @@ import org.apache.lucene.analysis.*; public class LowerCaseAndDigitsTokenizer extends CharTokenizer { public LowerCaseAndDigitsTokenizer(Reader input) { - super(input); + super(Version.LUCENE_47, input); } protected char normalize(char c) { return Character.toLowerCase(c); } - protected boolean isTokenChar(char c) { + @Override + public boolean isTokenChar(int c) { return Character.isLetterOrDigit(c); } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java index 00c2799..9f9962d 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java @@ -243,16 +243,24 @@ public class QueryBuilder { private List analyzeText(Analyzer analyzer, String fieldName, String text) { List words = new ArrayList(1); Reader reader = new StringReader(text); - TokenStream tStream = analyzer.tokenStream(fieldName, reader); - CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class); + TokenStream tStream = null; try { + tStream = analyzer.tokenStream(fieldName, reader); + tStream.reset(); + CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class); while (tStream.incrementToken()) { String term = termAttribute.toString(); words.add(term); } - reader.close(); } catch (IOException ioe) { + } finally { + if (tStream != null) { + try { + tStream.close(); + } catch (IOException e) { + } + } } return words; diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java index e860c8d..0d5a8b6 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java @@ -31,13 +31,20 @@ import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -265,10 +272,10 @@ public class SearchIndex implements IHelpSearchIndex { if (iw != null) { iw.close(); } - boolean create = false; + OpenMode create = OpenMode.APPEND; if (!indexDir.exists() || !isLuceneCompatible() || !isAnalyzerCompatible() || inconsistencyFile.exists() && firstOperation) { - create = true; + create = OpenMode.CREATE; indexDir.mkdirs(); if (!indexDir.exists()) return false; // unable to setup index directory @@ -276,9 +283,13 @@ public class SearchIndex implements IHelpSearchIndex { indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir); indexedDocs.restore(); setInconsistent(true); - MaxFieldLength max = new MaxFieldLength(1000000); - iw = new IndexWriter(luceneDirectory, analyzerDescriptor.getAnalyzer(), create, max); - iw.setMergeFactor(20); + Analyzer wrapper = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 1000000); + IndexWriterConfig iconfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_47, wrapper); + iconfig.setOpenMode(create); + LogDocMergePolicy policy = new LogDocMergePolicy(); + policy.setMergeFactor(20); + iconfig.setMergePolicy(policy); + iw = new IndexWriter(luceneDirectory, iconfig); return true; } catch (IOException e) { HelpBasePlugin.logError("Exception occurred in search indexing at beginAddBatch.", e); //$NON-NLS-1$ @@ -297,7 +308,7 @@ public class SearchIndex implements IHelpSearchIndex { indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir); indexedDocs.restore(); setInconsistent(true); - ir = IndexReader.open(luceneDirectory, false); + ir = DirectoryReader.open(luceneDirectory); return true; } catch (IOException e) { HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$ @@ -313,7 +324,7 @@ public class SearchIndex implements IHelpSearchIndex { if (ir != null) { ir.close(); } - ir = IndexReader.open(luceneDirectory, false); + ir = DirectoryReader.open(luceneDirectory); return true; } catch (IOException e) { HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$ @@ -331,7 +342,7 @@ public class SearchIndex implements IHelpSearchIndex { public IStatus removeDocument(String name) { Term term = new Term(FIELD_NAME, name); try { - ir.deleteDocuments(term); + iw.deleteDocuments(term); indexedDocs.remove(name); } catch (IOException e) { return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR, @@ -350,7 +361,7 @@ public class SearchIndex implements IHelpSearchIndex { if (iw == null) return false; if (optimize) - iw.optimize(); + iw.forceMerge(1); iw.close(); iw = null; // save the update info: @@ -369,7 +380,7 @@ public class SearchIndex implements IHelpSearchIndex { * know about this change. Close it so that it gets reloaded next search. */ if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); searcher = null; } return true; @@ -401,7 +412,7 @@ public class SearchIndex implements IHelpSearchIndex { * know about this change. Close it so that it gets reloaded next search. */ if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); searcher = null; } return true; @@ -505,8 +516,8 @@ public class SearchIndex implements IHelpSearchIndex { } Directory[] luceneDirs = dirList.toArray(new Directory[dirList.size()]); try { - iw.addIndexesNoOptimize(luceneDirs); - iw.optimize(); + iw.addIndexes(luceneDirs); + iw.forceMerge(1); } catch (IOException ioe) { HelpBasePlugin.logError("Merging search indexes failed.", ioe); //$NON-NLS-1$ return new HashMap(); @@ -515,18 +526,19 @@ public class SearchIndex implements IHelpSearchIndex { } public IStatus removeDuplicates(String name, String[] index_paths) { - TermDocs hrefDocs = null; - TermDocs indexDocs = null; - Term hrefTerm = new Term(FIELD_NAME, name); try { + AtomicReader ar = SlowCompositeReaderWrapper.wrap(ir); + DocsEnum hrefDocs = null; + DocsEnum indexDocs = null; + Term hrefTerm = new Term(FIELD_NAME, name); for (int i = 0; i < index_paths.length; i++) { Term indexTerm = new Term(FIELD_INDEX_ID, index_paths[i]); if (i == 0) { - hrefDocs = ir.termDocs(hrefTerm); - indexDocs = ir.termDocs(indexTerm); + hrefDocs = ar.termDocsEnum(hrefTerm); + indexDocs = ar.termDocsEnum(indexTerm); } else { - hrefDocs.seek(hrefTerm); - indexDocs.seek(indexTerm); + hrefDocs = ar.termDocsEnum(hrefTerm); + indexDocs = ar.termDocsEnum(indexTerm); } removeDocuments(hrefDocs, indexDocs); } @@ -535,19 +547,6 @@ public class SearchIndex implements IHelpSearchIndex { "IO exception occurred while removing duplicates of document " + name //$NON-NLS-1$ + " from index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$ ioe); - } finally { - if (hrefDocs != null) { - try { - hrefDocs.close(); - } catch (IOException e) { - } - } - if (indexDocs != null) { - try { - indexDocs.close(); - } catch (IOException e) { - } - } } return Status.OK_STATUS; } @@ -559,33 +558,33 @@ public class SearchIndex implements IHelpSearchIndex { * @param docs2 * @throws IOException */ - private void removeDocuments(TermDocs doc1, TermDocs docs2) throws IOException { - if (!doc1.next()) { + private void removeDocuments(DocsEnum doc1, DocsEnum docs2) throws IOException { + if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } - if (!docs2.next()) { + if (docs2.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } while (true) { - if (doc1.doc() < docs2.doc()) { - if (!doc1.skipTo(docs2.doc())) { - if (!doc1.next()) { + if (doc1.docID() < docs2.docID()) { + if (doc1.advance(docs2.docID()) == DocsEnum.NO_MORE_DOCS) { + if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } } - } else if (doc1.doc() > docs2.doc()) { - if (!docs2.skipTo(doc1.doc())) { - if (!doc1.next()) { + } else if (doc1.docID() > docs2.docID()) { + if (docs2.advance(doc1.docID()) == DocsEnum.NO_MORE_DOCS) { + if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } } } - if (doc1.doc() == docs2.doc()) { - ir.deleteDocument(doc1.doc()); - if (!doc1.next()) { + if (doc1.docID() == docs2.docID()) { + iw.tryDeleteDocument(ir, doc1.docID()); + if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } - if (!docs2.next()) { + if (docs2.nextDoc() == DocsEnum.NO_MORE_DOCS) { return; } } @@ -792,7 +791,7 @@ public class SearchIndex implements IHelpSearchIndex { public void openSearcher() throws IOException { synchronized (searcherCreateLock) { if (searcher == null) { - searcher = new IndexSearcher(luceneDirectory, false); + searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory)); } } } @@ -814,7 +813,7 @@ public class SearchIndex implements IHelpSearchIndex { if (searches.isEmpty()) { if (searcher != null) { try { - searcher.close(); + searcher.getIndexReader().close(); } catch (IOException ioe) { } } @@ -888,9 +887,11 @@ public class SearchIndex implements IHelpSearchIndex { */ private void cleanOldIndex() { IndexWriter cleaner = null; - MaxFieldLength max = new MaxFieldLength(10000); try { - cleaner = new IndexWriter(luceneDirectory, analyzerDescriptor.getAnalyzer(), true, max); + Analyzer wrapper = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 10000); + IndexWriterConfig iconfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_47, wrapper); + iconfig.setOpenMode(OpenMode.CREATE); + cleaner = new IndexWriter(luceneDirectory, iconfig); } catch (IOException ioe) { } finally { try { diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java index d0a7bb7..df31d89 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java @@ -18,7 +18,7 @@ import org.apache.lucene.analysis.*; * Smart Analyzer. Chooses underlying implementation based on the field which * text is analyzed. */ -public final class SmartAnalyzer extends Analyzer { +public final class SmartAnalyzer extends AnalyzerWrapper { Analyzer pluggedInAnalyzer; Analyzer exactAnalyzer; @@ -31,14 +31,14 @@ public final class SmartAnalyzer extends Analyzer { this.exactAnalyzer = new DefaultAnalyzer(locale); } /** - * Creates a TokenStream which tokenizes all the text in the provided - * Reader. Delegates to DefaultAnalyzer when field used to search for exact + * Delegates to DefaultAnalyzer when field used to search for exact * match, and to plugged-in analyzer for other fields. */ - public final TokenStream tokenStream(String fieldName, Reader reader) { + @Override + public final Analyzer getWrappedAnalyzer(String fieldName) { if (fieldName != null && fieldName.startsWith("exact_")) { //$NON-NLS-1$ - return exactAnalyzer.tokenStream(fieldName, reader); + return exactAnalyzer; } - return pluggedInAnalyzer.tokenStream(fieldName, reader); + return pluggedInAnalyzer; } } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java index 1360599..f622417 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java @@ -35,6 +35,7 @@ public final class WordTokenStream extends Tokenizer { * Constructor */ public WordTokenStream(String fieldName, Reader reader, Locale locale) { + super(reader); this.reader = reader; boundary = BreakIterator.getWordInstance(locale); @@ -105,6 +106,7 @@ public final class WordTokenStream extends Tokenizer { } public void close() throws IOException { + super.close(); /// Unlikely to be called as this is a reused if (this.reader != null) { this.reader.close(); --- a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java 2014-11-29 17:08:45.937034662 +0000 +++ b/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java 2014-11-29 17:12:29.324984554 +0000 @@ -23,6 +23,7 @@ import org.osgi.framework.Bundle; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; @@ -155,7 +156,7 @@ IndexSearcher searcher = null; try { luceneDirectory = new NIOFSDirectory(new File(filePath)); - searcher = new IndexSearcher(luceneDirectory, true); + searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory)); TopDocs hits = searcher.search(luceneQuery, 500); assertEquals(hits.totalHits, 1); } finally { @@ -165,7 +166,7 @@ } catch (IOException x) { } if (searcher != null) - searcher.close(); + searcher.getIndexReader().close(); } } else { fail("Cannot resolve to file protocol");