From e9b5adb1d0477a50111afe5cf7736c542b7e4998 Mon Sep 17 00:00:00 2001 From: Sopot Cela Date: Thu, 21 May 2015 13:48:41 +0200 Subject: [PATCH] Bug 466829 - Upgrade platform.ua to Lucene 5.1.0 Change-Id: I882188205c2c1e2cc1106108680dd4e94570a975 Signed-off-by: Sopot Cela --- diff --git a/eclipse.platform.ua/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF index ee34c8e..e4bd703 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/META-INF/MANIFEST.MF +++ b/eclipse.platform.ua/org.eclipse.help.base/META-INF/MANIFEST.MF @@ -43,9 +43,11 @@ org.eclipse.core.runtime;bundle-version="[3.11.0,4.0.0)", org.eclipse.help;bundle-version="[3.5.0,4.0.0)";visibility:=reexport, org.eclipse.core.expressions;bundle-version="[3.4.200,4.0.0)", - org.apache.lucene.analysis;bundle-version="[3.5.0,4.0.0)", - org.apache.lucene.core;bundle-version="[3.5.0,4.0.0)", - org.eclipse.core.net;bundle-version="1.2.200" + org.eclipse.core.net;bundle-version="1.2.200", + org.apache.lucene.analyzers-common;bundle-version="5.1.0", + org.apache.lucene.core;bundle-version="5.1.0", + org.apache.lucene.queryparser;bundle-version="5.1.0", + org.apache.lucene.analyzers-smartcn;bundle-version="5.1.0" Import-Package: com.ibm.icu.text, org.eclipse.equinox.http.jetty;resolution:=optional Bundle-RequiredExecutionEnvironment: JavaSE-1.7 diff --git a/eclipse.platform.ua/org.eclipse.help.base/plugin.xml b/org.eclipse.help.base/plugin.xml index 07a5a22..4daf3f0 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/plugin.xml +++ b/eclipse.platform.ua/org.eclipse.help.base/plugin.xml @@ -83,7 +83,7 @@ + class="org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer"> StopFilter->PorterStemFilter */ public final class Analyzer_en extends Analyzer { + /** * Constructor for Analyzer_en. */ @@ -27,19 +34,25 @@ super(); } /** - * Creates a TokenStream which tokenizes all the text in the provided + * Creates a TokenStreamComponents which tokenizes all the text in the provided * Reader. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new PorterStemFilter(new StopFilter(Version.LUCENE_30, new LowerCaseAndDigitsTokenizer(reader), getStopWords(), false)); + public final TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source; + source = new LowerCaseAndDigitsTokenizer(); + TokenStream result = new EnglishPossessiveFilter(source); + result = new StopFilter(result, new CharArraySet(getStopWords(), false)); + result = new KeywordRepeatFilter(result); + result = new PorterStemFilter(result); + return new TokenStreamComponents(source, result); } - private Set stopWords; + private List stopWords; - private Set getStopWords() { + private List getStopWords() { if ( stopWords == null ) { - stopWords = new HashSet<>(); + stopWords = new ArrayList<>(); for (int i = 0; i < STOP_WORDS.length; i++) { stopWords.add(STOP_WORDS[i]); } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java index 4109474..e3c8722 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java @@ -7,22 +7,20 @@ * Contributors: * IBM Corporation - initial API and implementation * Alexander Kurtakov - Bug 460787 + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; -import java.io.Reader; import java.util.Locale; import java.util.StringTokenizer; -import com.ibm.icu.text.BreakIterator; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.util.Version; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.eclipse.core.runtime.Platform; import org.eclipse.help.internal.base.HelpBasePlugin; -import org.eclipse.core.runtime.Platform; +import com.ibm.icu.text.BreakIterator; /** @@ -84,12 +82,14 @@ } /** - * Creates a TokenStream which tokenizes all the text in the provided + * Creates a TokenStreamComponents which tokenizes all the text in the provided * Reader. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new LowerCaseFilter(Version.LUCENE_30, new WordTokenStream(fieldName, reader, locale)); + public final TokenStreamComponents createComponents(String fieldName) { + Tokenizer source = new WordTokenStream(locale); + LowerCaseFilter filter = new LowerCaseFilter(source); + return new TokenStreamComponents(source, filter); } /** diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java index a475688..cbb2472 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java @@ -8,29 +8,27 @@ * Contributors: * IBM Corporation - initial API and implementation * Alexander Kurtakov - Bug 460787 + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; -import java.io.*; - -import org.apache.lucene.analysis.*; -import org.apache.lucene.util.Version; +import org.apache.lucene.analysis.util.CharTokenizer; /** * Tokenizer breaking words around letters or digits. */ public class LowerCaseAndDigitsTokenizer extends CharTokenizer { - public LowerCaseAndDigitsTokenizer(Reader input) { - super(Version.LUCENE_30, input); + public LowerCaseAndDigitsTokenizer() { + super(); } @Override - protected char normalize(char c) { + protected int normalize(int c) { return Character.toLowerCase(c); } @Override - protected boolean isTokenChar(char c) { + public boolean isTokenChar(int c) { return Character.isLetterOrDigit(c); } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java index 60a545d..804bf5e 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java @@ -7,6 +7,7 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; @@ -16,6 +17,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; import org.eclipse.help.search.ISearchDocument; /** @@ -32,25 +35,25 @@ @Override public void setTitle(String title) { - doc.add(new Field("title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$ - doc.add(new Field("exact_title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$ - doc.add(new Field("raw_title", title, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ + doc.add(new TextField("title", title, Field.Store.NO)); //$NON-NLS-1$ + doc.add(new TextField("exact_title", title, Field.Store.NO)); //$NON-NLS-1$ + doc.add(new StoredField("raw_title", title)); //$NON-NLS-1$ } @Override public void setSummary(String summary) { - doc.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ + doc.add(new StoredField("summary", summary)); //$NON-NLS-1$ } @Override public void addContents(String contents) { - doc.add(new Field("contents", new StringReader(contents))); //$NON-NLS-1$ - doc.add(new Field("exact_contents", new StringReader(contents))); //$NON-NLS-1$ + doc.add(new TextField("contents", new StringReader(contents))); //$NON-NLS-1$ + doc.add(new TextField("exact_contents", new StringReader(contents))); //$NON-NLS-1$ } @Override public void setHasFilters(boolean hasFilters) { - doc.add(new Field("filters", Boolean.toString(hasFilters), Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ + doc.add(new StoredField("filters", Boolean.toString(hasFilters))); //$NON-NLS-1$ } public Document getDocument() { @@ -59,8 +62,8 @@ @Override public void addContents(Reader contents, Reader exactContents) { - doc.add(new Field("contents", contents)); //$NON-NLS-1$ - doc.add(new Field("exact_contents", exactContents)); //$NON-NLS-1$ + doc.add(new TextField("contents", contents)); //$NON-NLS-1$ + doc.add(new TextField("exact_contents", exactContents)); //$NON-NLS-1$ } } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java index ca9cd67..7952c8d 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java @@ -7,6 +7,7 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; @@ -150,6 +151,7 @@ .getProperty(SearchIndex.DEPENDENCIES_KEY_ANALYZER); if (!targetIndex.isLuceneCompatible(lucene) || !targetIndex.isAnalyzerCompatible(analyzer)) { + HelpBasePlugin.logError("Error trying to consume Lucene index from bundle "+bundle.toString()+". Please use an index built with Lucene 5 or higher.", null); //$NON-NLS-1$ //$NON-NLS-2$ return false; } } catch (MalformedURLException mue) { diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java deleted file mode 100644 index 08cf58a..0000000 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java +++ /dev/null @@ -1,455 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2016 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - * Chris Torrence - patch for bug Bug 107648 - *******************************************************************************/ -package org.eclipse.help.internal.search; -import java.io.*; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.StringTokenizer; - -import org.apache.lucene.analysis.*; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -import org.eclipse.help.internal.base.*; -/** - * Build query acceptable by the search engine. - */ -public class QueryBuilder { - // Maximum allowed number of terms - private static final int MAX_TERMS = 10; - // Maximum allowed number of ORs - private static final int MAX_UNIONS = 4; - // Maximum allowed number terms with wild cards - private static final int MAX_WILD_TERMS = 2; - // Query from user - private String searchWords; - // Descriptor of Analyzer to process the query words - private AnalyzerDescriptor analyzerDesc; - // Analyzer to process the query words - private Analyzer analyzer; - // List of QueryWordsToken - private List analyzedTokens; - // List of words to highlight - private List highlightWords = new ArrayList<>(); - private Locale locale; - /** - * Creates a query builder for the search word. The search word is processed - * by a lexical analyzer. - */ - public QueryBuilder(String searchWords, AnalyzerDescriptor analyzerDesc) { - this.searchWords = searchWords; - String language = analyzerDesc.getLang(); - if (language.length() >= 5) { - this.locale = new Locale(language.substring(0, 2), language - .substring(3, 5)); - } else { - this.locale = new Locale(language.substring(0, 2), ""); //$NON-NLS-1$ - } - this.analyzerDesc = analyzerDesc; - this.analyzer = analyzerDesc.getAnalyzer(); - } - /** - * Splits user query into tokens and returns a list of QueryWordsToken's. - */ - private List tokenizeUserQuery(String searchWords) { - List tokenList = new ArrayList<>(); - //Divide along quotation marks - //StringTokenizer qTokenizer = new StringTokenizer(searchWords.trim(), - // "\"", true); //$NON-NLS-1$ - boolean withinQuotation = false; - String quotedString = ""; //$NON-NLS-1$ - int termCount = 0;// keep track of number of terms to disallow too many - - int fromIndex = -1; - searchWords = searchWords.trim(); - while((fromIndex = searchWords.indexOf("\"", fromIndex+1))!= -1){ //$NON-NLS-1$ - withinQuotation = !withinQuotation; - } - if( withinQuotation ) { - searchWords = searchWords + "\""; //$NON-NLS-1$ - withinQuotation = !withinQuotation; - } - - StringTokenizer qTokenizer = new StringTokenizer(searchWords,"\"",true); //$NON-NLS-1$ - int orCount = 0; // keep track of number of ORs to disallow too many - while (qTokenizer.hasMoreTokens()) { - String curToken = qTokenizer.nextToken(); - if (curToken.equals("\"")) { //$NON-NLS-1$ - if (withinQuotation) { - // check for too many terms - if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER - && ++termCount > MAX_TERMS) { - throw new QueryTooComplexException(); - } - tokenList.add(QueryWordsToken.exactPhrase(quotedString)); - } else { - quotedString = ""; //$NON-NLS-1$ - } - withinQuotation = !withinQuotation; - continue; - } else if (withinQuotation) { - quotedString = curToken; - continue; - } else { - //divide unquoted strings along white space - StringTokenizer parser = new StringTokenizer(curToken.trim()); - while (parser.hasMoreTokens()) { - String token = parser.nextToken(); - if (token.equalsIgnoreCase(QueryWordsToken.AND().value)) { - tokenList.add(QueryWordsToken.AND()); - } else if (token - .equalsIgnoreCase(QueryWordsToken.OR().value)) { - // Check for too many OR terms - if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER - && ++orCount > MAX_UNIONS) { - throw new QueryTooComplexException(); - } - tokenList.add(QueryWordsToken.OR()); - } else if (token - .equalsIgnoreCase(QueryWordsToken.NOT().value)) { - tokenList.add(QueryWordsToken.NOT()); - } else { - // check for too many terms - if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER - && ++termCount > MAX_TERMS) { - throw new QueryTooComplexException(); - } - tokenList.add(QueryWordsToken.word(token)); - } - } - } - } - return tokenList; - } - /** - * Apply the Analyzer to the search tokens and return the list of processed - * QueryWordsToken's. - */ - private List analyzeTokens(List tokens) { - boolean isTokenAfterNot = false; - List newTokens = new ArrayList<>(); - int wildCardTermCount = 0; - for (int i = 0; i < tokens.size(); i++) { - QueryWordsToken token = tokens.get(i); - if (token.type == QueryWordsToken.WORD) { - int questionMIndex = token.value.indexOf('?'); - int starIndex = token.value.indexOf('*'); - if (starIndex >= 0 || questionMIndex >= 0) { - if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER - && ++wildCardTermCount > MAX_WILD_TERMS) { - throw new QueryTooComplexException(); - } - newTokens.add(QueryWordsToken.word(token.value - .toLowerCase(locale))); - // add word to the list of words to highlight - if (!isTokenAfterNot && !highlightWords.contains(token.value)) { - highlightWords.add(token.value); - } - } else { - List wordList = analyzeText(analyzer, "contents", //$NON-NLS-1$ - token.value); - if (wordList.size() > 0) { - if (!isTokenAfterNot && !highlightWords.contains(token.value)) { - // add original word to the list of words to - // highlight - highlightWords.add(token.value); - } - if (wordList.size() == 1) { - String word = wordList.get(0); - newTokens.add(QueryWordsToken.word(word)); - // add analyzed word to the list of words to - // highlight - // this is required to highlight stemmed words - if (!isTokenAfterNot && !highlightWords.contains(word)) { - highlightWords.add(word); - } - } else { - QueryWordsPhrase phrase = QueryWordsToken.phrase(); - for (Iterator it = wordList.iterator(); it - .hasNext();) { - String word = it.next(); - phrase.addWord(word); - // add each analyzed word to the list of words - // to highlight - // this is only required to highlight stemmed - // words. - // Adding words should not be done when - // DefaultAnalyzer is used, - // because it does not perform stemming and - // common words removal - // which would result in common characters - // highlighted all over (bug 30263) - if (!analyzerDesc.getId().startsWith( - HelpBasePlugin.PLUGIN_ID + "#")) { //$NON-NLS-1$ - if (!isTokenAfterNot && !highlightWords.contains(word)) { - highlightWords.add(word); - } - } - } - newTokens.add(phrase); - } - } - } - } else if (// forget ANDs - /* - * token.type == SearchQueryToken.AND || - */ - token.type == QueryWordsToken.OR - || token.type == QueryWordsToken.NOT) - newTokens.add(token); - else if (token.type == QueryWordsToken.EXACT_PHRASE) { - List wordList = analyzeText(analyzer, "exact_contents", //$NON-NLS-1$ - token.value); - if (wordList.size() > 0) { - if (!isTokenAfterNot && !highlightWords.contains(token.value)) { - // add original word to the list of words to highlight - highlightWords.add(token.value); - } - } - QueryWordsExactPhrase phrase = QueryWordsToken.exactPhrase(); - for (Iterator it = wordList.iterator(); it.hasNext();) { - String word = it.next(); - phrase.addWord(word); - // add analyzed word to the list of words to highlight - // if (!highlightWords.contains(word)) - // highlightWords.add(word); - } - // add phrase only if not empty - if (phrase.getWords().size() > 0) { - newTokens.add(phrase); - } - } - isTokenAfterNot = (token.type == QueryWordsToken.NOT); - } - return newTokens; - } - /** - * Get a list of tokens corresponding to a search word or phrase - * - * @return List of String - */ - private List analyzeText(Analyzer analyzer, String fieldName, String text) { - List words = new ArrayList<>(1); - try (Reader reader = new StringReader(text); TokenStream tStream = analyzer.tokenStream(fieldName, reader)) { - CharTermAttribute termAttribute = tStream.getAttribute(CharTermAttribute.class); - while (tStream.incrementToken()) { - String term = termAttribute.toString(); - words.add(term); - } - } catch (IOException ioe) { - } - - return words; - } - /** - * Obtains Lucene Query from tokens - * - * @return Query or null if no query could be created - */ - private Query createLuceneQuery(List searchTokens, String[] fieldNames, - float[] boosts) { - // Get queries for parts separated by OR - List requiredQueries = getRequiredQueries(searchTokens, fieldNames, - boosts); - if (requiredQueries.size() == 0) - return null; - else if (requiredQueries.size() <= 1) - return requiredQueries.get(0); - else - /* if (requiredQueries.size() > 1) */ - // OR queries - return (orQueries(requiredQueries)); - } - /** - * Obtains Lucene queries for token sequences separated at OR. - * - * @return List of Query (could be empty) - */ - private List getRequiredQueries(List tokens, String[] fieldNames, - float[] boosts) { - List oredQueries = new ArrayList<>(); - ArrayList requiredQueryTokens = new ArrayList<>(); - for (int i = 0; i < tokens.size(); i++) { - QueryWordsToken token = tokens.get(i); - if (token.type != QueryWordsToken.OR) { - requiredQueryTokens.add(token); - } else { - Query reqQuery = getRequiredQuery(requiredQueryTokens, - fieldNames, boosts); - if (reqQuery != null) - oredQueries.add(reqQuery); - requiredQueryTokens = new ArrayList<>(); - } - } - Query reqQuery = getRequiredQuery(requiredQueryTokens, fieldNames, - boosts); - if (reqQuery != null) - oredQueries.add(reqQuery); - return oredQueries; - } - private Query orQueries(Collection queries) { - BooleanQuery bq = new BooleanQuery(); - for (Iterator it = queries.iterator(); it.hasNext();) { - Query q = it.next(); - bq.add(q, BooleanClause.Occur.SHOULD); - } - return bq; - } - /** - * Obtains Lucene Query for tokens containing only AND and NOT operators. - * - * @return BooleanQuery or null if no query could be created from the tokens - */ - private Query getRequiredQuery(List requiredTokens, String[] fieldNames, - float[] boosts) { - BooleanQuery retQuery = new BooleanQuery(); - boolean requiredTermExist = false; - // Parse tokens left to right - QueryWordsToken operator = null; - for (int i = 0; i < requiredTokens.size(); i++) { - QueryWordsToken token = requiredTokens.get(i); - if (token.type == QueryWordsToken.AND - || token.type == QueryWordsToken.NOT) { - operator = token; - continue; - } - // Creates queries for all fields - Query qs[] = new Query[fieldNames.length]; - for (int f = 0; f < fieldNames.length; f++) { - qs[f] = token.createLuceneQuery(fieldNames[f], boosts[f]); - } - // creates the boolean query of all fields - Query q = qs[0]; - if (fieldNames.length > 1) { - BooleanQuery allFieldsQuery = new BooleanQuery(); - for (int f = 0; f < fieldNames.length; f++) - allFieldsQuery.add(qs[f], BooleanClause.Occur.SHOULD); - q = allFieldsQuery; - } - if (operator != null && operator.type == QueryWordsToken.NOT) { - retQuery.add(q, BooleanClause.Occur.MUST_NOT); // add as prohibited - } else { - retQuery.add(q, BooleanClause.Occur.MUST); // add as required - requiredTermExist = true; - } - } - if (!requiredTermExist) { - return null; // cannot search for prohibited only - } - return retQuery; - } - private Query getLuceneQuery(String[] fieldNames, float[] boosts) { - Query luceneQuery = createLuceneQuery(analyzedTokens, fieldNames, - boosts); - return luceneQuery; - } - /** - * @param fieldNames - - * Collection of field names of type String (e.g. "h1"); the - * search will be performed on the given fields - * @param fieldSearchOnly - - * boolean indicating if field only search should be performed; - * if set to false, default field "contents" and all other fields - * will be searched - */ - public Query getLuceneQuery(Collection fieldNames, boolean fieldSearchOnly) - throws QueryTooComplexException { - // split search query into tokens - List userTokens = tokenizeUserQuery(searchWords); - analyzedTokens = analyzeTokens(userTokens); - return buildLuceneQuery(fieldNames, fieldSearchOnly); - } - /** - * @param fieldNames - - * Collection of field names of type String (e.g. "h1"); the - * search will be performed on the given fields - * @param fieldSearchOnly - - * boolean indicating if field only search should be performed; - * if set to false, default field "contents" and all other fields - * will be searched - */ - private Query buildLuceneQuery(Collection fieldNames, - boolean fieldSearchOnly) { - String[] fields; - float[] boosts; - if (fieldSearchOnly) { - fields = new String[fieldNames.size()]; - boosts = new float[fieldNames.size()]; - Iterator fieldNamesIt = fieldNames.iterator(); - for (int i = 0; i < fieldNames.size(); i++) { - fields[i] = fieldNamesIt.next(); - boosts[i] = 5.0f; - } - } else { - fields = new String[fieldNames.size() + 2]; - boosts = new float[fieldNames.size() + 2]; - Iterator fieldNamesIt = fieldNames.iterator(); - for (int i = 0; i < fieldNames.size(); i++) { - fields[i] = fieldNamesIt.next(); - boosts[i] = 5.0f; - } - fields[fieldNames.size()] = "contents"; //$NON-NLS-1$ - boosts[fieldNames.size()] = 1.0f; - fields[fieldNames.size()+1] = "title"; //$NON-NLS-1$ - boosts[fieldNames.size()+1] = 1.0f; - } - Query query = getLuceneQuery(fields, boosts); - query = improveRankingForUnqotedPhrase(query, fields, boosts); - return query; - } - /** - * If user query contained only words (no quotaions nor operators) extends - * query with term phrase representing entire user query i.e for user string - * a b, the query a AND b will be extended to "a b" OR a AND b - */ - private Query improveRankingForUnqotedPhrase(Query query, String[] fields, - float[] boosts) { - if (query == null) - return query; - // check if all tokens are words - for (int i = 0; i < analyzedTokens.size(); i++) - if (analyzedTokens.get(i).type != QueryWordsToken.WORD) - return query; - // Create phrase query for all tokens and OR with original query - BooleanQuery booleanQuery = new BooleanQuery(); - booleanQuery.add(query, BooleanClause.Occur.SHOULD); - PhraseQuery[] phraseQueries = new PhraseQuery[fields.length]; - for (int f = 0; f < fields.length; f++) { - phraseQueries[f] = new PhraseQuery(); - for (int i = 0; i < analyzedTokens.size(); i++) { - Term t = new Term(fields[f], analyzedTokens - .get(i).value); - phraseQueries[f].add(t); - } - phraseQueries[f].setBoost(10 * boosts[f]); - booleanQuery.add(phraseQueries[f], BooleanClause.Occur.SHOULD); - } - return booleanQuery; - } - /** - * Obtains analyzed terms from query as one string. Words are double quoted, - * and separated by space. The analyzed words are needed for highlighting - * word roots. - */ - public String gethighlightTerms() { - StringBuffer buf = new StringBuffer(); - for (Iterator it = highlightWords.iterator(); it.hasNext();) { - buf.append('"'); - buf.append(it.next()); - buf.append("\" "); //$NON-NLS-1$ - } - return buf.toString(); - } -} diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java deleted file mode 100644 index 324b8e1..0000000 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java +++ /dev/null @@ -1,51 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2015 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.help.internal.search; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -/** - * Represents a quoted token in user search query words - */ -public class QueryWordsExactPhrase extends QueryWordsToken { - private List words; - public QueryWordsExactPhrase() { - super(QueryWordsToken.EXACT_PHRASE, ""); //$NON-NLS-1$ - words = new ArrayList<>(); - } - public void addWord(String word) { - words.add(word); - if (words.size() <= 1) - value = word; - else - value += " " + word; //$NON-NLS-1$ - } - public List getWords() { - return words; - } - /** - * Creates a lucene query for a field - */ - @Override - public Query createLuceneQuery(String field, float boost) { - PhraseQuery q = new PhraseQuery(); - for (Iterator it = getWords().iterator(); it.hasNext();) { - String word = it.next(); - Term t = new Term("exact_" + field, word); //$NON-NLS-1$ - q.add(t); - q.setBoost(boost); - } - return q; - } -} diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java deleted file mode 100644 index 8a94e89..0000000 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java +++ /dev/null @@ -1,52 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2015 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.help.internal.search; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -/** - * Represents a phrase (not quoted) token in user search query words It consists - * of several words created by an analyzer - */ -public class QueryWordsPhrase extends QueryWordsToken { - private List words; - public QueryWordsPhrase() { - super(QueryWordsToken.PHRASE, ""); //$NON-NLS-1$ - words = new ArrayList<>(); - } - public void addWord(String word) { - words.add(word); - if (words.size() <= 1) - value = word; - else - value += " " + word; //$NON-NLS-1$ - } - public List getWords() { - return words; - } - /** - * Creates a lucene query for a field - */ - @Override - public Query createLuceneQuery(String field, float boost) { - PhraseQuery q = new PhraseQuery(); - for (Iterator it = getWords().iterator(); it.hasNext();) { - String word = it.next(); - Term t = new Term(field, word); - q.add(t); - q.setBoost(boost); - } - return q; - } -} diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java deleted file mode 100644 index 6ba76f2..0000000 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java +++ /dev/null @@ -1,81 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2007 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.help.internal.search; -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -/** - * Represents a token in user search query words - */ -public class QueryWordsToken { - public static final int AND = 0; - public static final int OR = 1; - public static final int NOT = 2; - public static final int EXACT_PHRASE = 3; - public static final int PHRASE = 4; - public static final int WORD = 5; - private static final QueryWordsToken fAND = new QueryWordsToken(AND, "AND"); //$NON-NLS-1$ - private static final QueryWordsToken fOR = new QueryWordsToken(OR, "OR"); //$NON-NLS-1$ - private static final QueryWordsToken fNOT = new QueryWordsToken(NOT, "NOT"); //$NON-NLS-1$ - public int type; - public String value; - protected QueryWordsToken(int type, String value) { - this.type = type; - this.value = value; - } - /** - * Creates a lucene query for a field - */ - public Query createLuceneQuery(String field, float boost) { - Query q; - int questionPos = value.indexOf('?'); - int starPos = value.indexOf('*'); - if (questionPos >= 0 || starPos >= 0) { - if (questionPos == -1 && starPos == value.length() - 1) { - Term t = new Term("exact_" + field, value.substring(0, starPos)); //$NON-NLS-1$ - q = new PrefixQuery(t); - ((PrefixQuery) q).setBoost(boost); - } else { - Term t = new Term("exact_" + field, value); //$NON-NLS-1$ - q = new WildcardQuery(t); - ((WildcardQuery) q).setBoost(boost); - } - } else { - Term t = new Term(field, value); - q = new TermQuery(t); - ((TermQuery) q).setBoost(boost); - } - // after updating Lucene, set boost on a Query class - return q; - } - public static QueryWordsToken AND() { - return fAND; - } - public static QueryWordsToken OR() { - return fOR; - } - public static QueryWordsToken NOT() { - return fNOT; - } - public static QueryWordsToken word(String word) { - return new QueryWordsToken(QueryWordsToken.WORD, word); - } - public static QueryWordsPhrase phrase() { - return new QueryWordsPhrase(); - } - public static QueryWordsExactPhrase exactPhrase() { - return new QueryWordsExactPhrase(); - } - public static QueryWordsExactPhrase exactPhrase(String word) { - QueryWordsExactPhrase token = new QueryWordsExactPhrase(); - token.addWord(word); - return token; - } -} diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java index 67963a5..131ac65 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java @@ -17,6 +16,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; +import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.nio.channels.FileLock; @@ -33,19 +33,30 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import org.apache.lucene.analysis.LimitTokenCountAnalyzer; +import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.FastCharStream; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.QueryParserTokenManager; +import org.apache.lucene.queryparser.classic.Token; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; @@ -59,7 +70,6 @@ import org.eclipse.core.runtime.OperationCanceledException; import org.eclipse.core.runtime.Platform; import org.eclipse.core.runtime.Status; -import org.eclipse.help.internal.HelpPlugin; import org.eclipse.help.internal.base.BaseHelpSystem; import org.eclipse.help.internal.base.HelpBasePlugin; import org.eclipse.help.internal.base.util.HelpProperties; @@ -86,7 +96,7 @@ private File indexDir; - private Directory luceneDirectory; + public Directory luceneDirectory; private String locale; @@ -170,7 +180,7 @@ inconsistencyFile = new File(indexDir.getParentFile(), locale + ".inconsistent"); //$NON-NLS-1$ htmlSearchParticipant = new HTMLSearchParticipant(indexDir.getAbsolutePath()); try { - luceneDirectory = new NIOFSDirectory(indexDir); + luceneDirectory = new NIOFSDirectory(indexDir.toPath()); } catch (IOException e) { } if (!exists()) { @@ -203,11 +213,11 @@ public IStatus addDocument(String name, URL url) { try { Document doc = new Document(); - doc.add(new Field(FIELD_NAME, name, Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new StringField(FIELD_NAME, name, Field.Store.YES)); addExtraFields(doc); String pluginId = LocalSearchManager.getPluginId(name); if (relativePath != null) { - doc.add(new Field(FIELD_INDEX_ID, relativePath, Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new StringField(FIELD_INDEX_ID, relativePath, Field.Store.YES)); } // check for the explicit search participant. SearchParticipant participant = null; @@ -220,20 +230,22 @@ if (participant == null) participant = BaseHelpSystem.getLocalSearchManager().getParticipant(pluginId, name); if (participant != null) { - IStatus status = participant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc)); + IStatus status = participant.addDocument(this, pluginId, name, url, id, + new LuceneSearchDocument(doc)); if (status.getSeverity() == IStatus.OK) { String filters = doc.get("filters"); //$NON-NLS-1$ indexedDocs.put(name, filters != null ? filters : "0"); //$NON-NLS-1$ if (id != null) - doc.add(new Field("id", id, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ + doc.add(new StoredField("id", id)); //$NON-NLS-1$ if (pid != null) - doc.add(new Field("participantId", pid, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ + doc.add(new StoredField("participantId", pid)); //$NON-NLS-1$ iw.addDocument(doc); } return status; } // default to html - IStatus status = htmlSearchParticipant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc)); + IStatus status = htmlSearchParticipant.addDocument(this, pluginId, name, url, id, + new LuceneSearchDocument(doc)); if (status.getSeverity() == IStatus.OK) { String filters = doc.get("filters"); //$NON-NLS-1$ indexedDocs.put(name, filters != null ? filters : "0"); //$NON-NLS-1$ @@ -245,20 +257,21 @@ "IO exception occurred while adding document " + name //$NON-NLS-1$ + " to index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$ e); - } - catch (Exception e) { + } catch (Exception e) { return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR, "An unexpected internal error occurred while adding document " //$NON-NLS-1$ + name + " to index " + indexDir.getAbsolutePath() //$NON-NLS-1$ - + ".", e); //$NON-NLS-1$ + + ".", //$NON-NLS-1$ + e); } } /** - * Add any extra fields that need to be added to this document. Subclasses - * should override to add more fields. + * Add any extra fields that need to be added to this document. Subclasses should override to + * add more fields. * - * @param doc the document to add fields to + * @param doc + * the document to add fields to */ protected void addExtraFields(Document doc) { } @@ -282,8 +295,9 @@ indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir); indexedDocs.restore(); setInconsistent(true); - LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 1000000); - IndexWriterConfig writerConfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer); + LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), + 1000000); + IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(20); @@ -307,7 +321,7 @@ indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir); indexedDocs.restore(); setInconsistent(true); - ir = IndexReader.open(luceneDirectory, false); + ir = DirectoryReader.open(luceneDirectory); return true; } catch (IOException e) { HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$ @@ -323,7 +337,7 @@ if (ir != null) { ir.close(); } - ir = IndexReader.open(luceneDirectory, false); + ir = DirectoryReader.open(luceneDirectory); return true; } catch (IOException e) { HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$ @@ -334,14 +348,14 @@ /** * Deletes a single document from the index. * - * @param name - - * document name + * @param name + * - document name * @return IStatus */ public IStatus removeDocument(String name) { Term term = new Term(FIELD_NAME, name); try { - ir.deleteDocuments(term); + iw.deleteDocuments(term); indexedDocs.remove(name); } catch (IOException e) { return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR, @@ -375,11 +389,11 @@ } /* - * The searcher's index reader has it's stuff in memory so it won't - * know about this change. Close it so that it gets reloaded next search. + * The searcher's index reader has it's stuff in memory so it won't know about this + * change. Close it so that it gets reloaded next search. */ if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); searcher = null; } return true; @@ -407,11 +421,11 @@ saveDependencies(); /* - * The searcher's index reader has it's stuff in memory so it won't - * know about this change. Close it so that it gets reloaded next search. + * The searcher's index reader has it's stuff in memory so it won't know about this + * change. Close it so that it gets reloaded next search. */ if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); searcher = null; } return true; @@ -469,11 +483,11 @@ String indexId = indexIds.get(i); String indexPath = indexPaths.get(i); try { - dirList.add(new NIOFSDirectory(new File(indexPath))); + dirList.add(new NIOFSDirectory(new File(indexPath).toPath())); } catch (IOException ioe) { - HelpBasePlugin - .logError( - "Help search indexing directory could not be created for directory " + indexPath, ioe); //$NON-NLS-1$ + HelpBasePlugin.logError( + "Help search indexing directory could not be created for directory " + indexPath, //$NON-NLS-1$ + ioe); continue; } @@ -525,18 +539,19 @@ } public IStatus removeDuplicates(String name, String[] index_paths) { - TermDocs hrefDocs = null; - TermDocs indexDocs = null; - Term hrefTerm = new Term(FIELD_NAME, name); try { + LeafReader ar = SlowCompositeReaderWrapper.wrap(ir); + PostingsEnum hrefDocs = null; + PostingsEnum indexDocs = null; + Term hrefTerm = new Term(FIELD_NAME, name); for (int i = 0; i < index_paths.length; i++) { Term indexTerm = new Term(FIELD_INDEX_ID, index_paths[i]); if (i == 0) { - hrefDocs = ir.termDocs(hrefTerm); - indexDocs = ir.termDocs(indexTerm); + hrefDocs = ar.postings(hrefTerm); + indexDocs = ar.postings(indexTerm); } else { - hrefDocs.seek(hrefTerm); - indexDocs.seek(indexTerm); + hrefDocs = ar.postings(hrefTerm); + indexDocs = ar.postings(indexTerm); } removeDocuments(hrefDocs, indexDocs); } @@ -545,19 +560,6 @@ "IO exception occurred while removing duplicates of document " + name //$NON-NLS-1$ + " from index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$ ioe); - } finally { - if (hrefDocs != null) { - try { - hrefDocs.close(); - } catch (IOException e) { - } - } - if (indexDocs != null) { - try { - indexDocs.close(); - } catch (IOException e) { - } - } } return Status.OK_STATUS; } @@ -569,33 +571,33 @@ * @param docs2 * @throws IOException */ - private void removeDocuments(TermDocs doc1, TermDocs docs2) throws IOException { - if (!doc1.next()) { + private void removeDocuments(PostingsEnum doc1, PostingsEnum docs2) throws IOException { + if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } - if (!docs2.next()) { + if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } while (true) { - if (doc1.doc() < docs2.doc()) { - if (!doc1.skipTo(docs2.doc())) { - if (!doc1.next()) { + if (doc1.docID() < docs2.docID()) { + if (doc1.advance(docs2.docID()) == PostingsEnum.NO_MORE_DOCS) { + if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } } - } else if (doc1.doc() > docs2.doc()) { - if (!docs2.skipTo(doc1.doc())) { - if (!doc1.next()) { + } else if (doc1.docID() > docs2.docID()) { + if (docs2.advance(doc1.docID()) == PostingsEnum.NO_MORE_DOCS) { + if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } } } - if (doc1.doc() == docs2.doc()) { - ir.deleteDocument(doc1.doc()); - if (!doc1.next()) { + if (doc1.docID() == docs2.docID()) { + iw.tryDeleteDocument(ir, doc1.docID()); + if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } - if (!docs2.next()) { + if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) { return; } } @@ -623,20 +625,72 @@ registerSearch(Thread.currentThread()); if (closed) return; - QueryBuilder queryBuilder = new QueryBuilder(searchQuery.getSearchWord(), analyzerDescriptor); - Query luceneQuery = queryBuilder.getLuceneQuery(searchQuery.getFieldNames(), searchQuery - .isFieldSearch()); - if (HelpPlugin.DEBUG_SEARCH) { - System.out.println("Search Query: " + luceneQuery.toString()); //$NON-NLS-1$ + + String[] fields; + if (searchQuery.isFieldSearch()){ + //sometimes you might want to search other than the default fields + fields = (String[]) searchQuery.getFieldNames().toArray(); + }else { + fields = new String[]{"contents","title"}; //$NON-NLS-1$ //$NON-NLS-2$ } - String highlightTerms = queryBuilder.gethighlightTerms(); - if (luceneQuery != null) { + + //prepare the parser + + MultiFieldQueryParser qb = new MultiFieldQueryParser(fields,analyzerDescriptor.getAnalyzer()); + qb.setAllowLeadingWildcard(true); + qb.setAnalyzeRangeTerms(true); + qb.setAutoGeneratePhraseQueries(true); + qb.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); + qb.setLowercaseExpandedTerms(true); + qb.setLocale(new Locale(analyzerDescriptor.getLang())); + + //parse the "pure" query (no boosting) + Query luceneQuery = qb.parse(searchQuery.getSearchWord()); + + //we'll merge the pure query with a some boosted queries + Query mergedQuery; + + if (!isWildcardQuery(searchQuery.getSearchWord())){ + mergedQuery = new BooleanQuery(); //merge for all fields before merging with luceneQuery + for (int i=0;i set = new HashSet(); + + if (mergedQuery != null) { if (searcher == null) { openSearcher(); } - TopDocs topDocs = searcher.search(luceneQuery, null, 1000); - collector.addHits(LocalSearchManager.asList(topDocs, searcher), highlightTerms); + TopDocs topDocs = searcher.search(mergedQuery, 1000); + + String highlight=null; + QueryParserTokenManager manager = new QueryParserTokenManager(new FastCharStream(new StringReader(searchQuery.getSearchWord()))); + while (true){ + Token nextToken = manager.getNextToken(); + String toHighlight = null; + if (nextToken.kind==0) break; + String image = nextToken.image; + toHighlight=image; + if ((image.startsWith("\""))&&( image.endsWith("\""))){ //$NON-NLS-1$//$NON-NLS-2$ + toHighlight = image.substring(1,image.length()-1); + } + if (image.equals("AND") || image.equals("OR")) //$NON-NLS-1$ //$NON-NLS-2$ + continue; + set .add(toHighlight); + + } + highlight = buildHighlight(set); + collector.addHits(LocalSearchManager.asList(topDocs, searcher), highlight==null?"":highlight); //$NON-NLS-1$ } + } catch (BooleanQuery.TooManyClauses tmc) { collector.addQTCException(new QueryTooComplexException()); } catch (QueryTooComplexException qe) { @@ -647,6 +701,18 @@ } finally { unregisterSearch(Thread.currentThread()); } + } + + private boolean isWildcardQuery(String searchWord) { + return searchWord.contains("?")|| searchWord.contains("*"); //$NON-NLS-1$//$NON-NLS-2$ + } + + private String buildHighlight(Set set) { + StringBuilder sb = new StringBuilder(); + for (String string : set) { + sb.append("\""+string+"\""); //$NON-NLS-1$//$NON-NLS-2$ + } + return sb.toString(); } @Override @@ -718,25 +785,27 @@ /** * Determines whether an index can be read by the Lucene bundle - * @param indexVersionString The version of an Index directory + * + * @param indexVersionString + * The version of an Index directory * @return */ public boolean isLuceneCompatible(String indexVersionString) { - if (indexVersionString==null) return false; + if (indexVersionString == null) + return false; String luceneVersionString = ""; //$NON-NLS-1$ Bundle luceneBundle = Platform.getBundle(LUCENE_BUNDLE_ID); if (luceneBundle != null) { - luceneVersionString += luceneBundle.getHeaders() - .get(Constants.BUNDLE_VERSION); + luceneVersionString += luceneBundle.getHeaders().get(Constants.BUNDLE_VERSION); } Version luceneVersion = new Version(luceneVersionString); Version indexVersion = new Version(indexVersionString); - Version v191 = new Version(1, 9, 1); - if (indexVersion.compareTo(v191) < 0) { - // index is older than Lucene 1.9.1 + Version v500 = new Version(5, 0, 0); + if (indexVersion.compareTo(v500) < 0) { + // index is older than Lucene 5.0.0 return false; } - if ( luceneVersion.compareTo(indexVersion) >= 0 ) { + if (luceneVersion.compareTo(indexVersion) >= 0) { // Lucene bundle is newer than the index return true; } @@ -801,7 +870,7 @@ public void openSearcher() throws IOException { synchronized (searcherCreateLock) { if (searcher == null) { - searcher = new IndexSearcher(IndexReader.open(luceneDirectory, false)); + searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory)); } } } @@ -819,7 +888,7 @@ if (searches.isEmpty()) { if (searcher != null) { try { - searcher.close(); + searcher.getIndexReader().close(); } catch (IOException ioe) { } } @@ -837,7 +906,8 @@ * Finds and unzips prebuild index specified in preferences */ private void unzipProductIndex() { - String indexPluginId = Platform.getPreferencesService().getString(HelpBasePlugin.PLUGIN_ID, "productIndex", null, null); //$NON-NLS-1$ + String indexPluginId = Platform.getPreferencesService().getString(HelpBasePlugin.PLUGIN_ID, + "productIndex", null, null); //$NON-NLS-1$ if (indexPluginId == null || indexPluginId.length() <= 0) { return; } @@ -899,7 +969,7 @@ private void cleanOldIndex() { try (LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 10000); IndexWriter cleaner = new IndexWriter(luceneDirectory, - new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer) + new IndexWriterConfig(analyzer) .setOpenMode(OpenMode.CREATE))) { } catch (IOException ioe) { diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java index d0a7bb7..444b66c 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java @@ -7,10 +7,9 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; - -import java.io.*; import org.apache.lucene.analysis.*; @@ -18,7 +17,7 @@ * Smart Analyzer. Chooses underlying implementation based on the field which * text is analyzed. */ -public final class SmartAnalyzer extends Analyzer { +public final class SmartAnalyzer extends AnalyzerWrapper { Analyzer pluggedInAnalyzer; Analyzer exactAnalyzer; @@ -26,20 +25,19 @@ * Constructor for SmartAnalyzer. */ public SmartAnalyzer(String locale, Analyzer pluggedInAnalyzer) { - super(); + super(pluggedInAnalyzer.getReuseStrategy()); this.pluggedInAnalyzer = pluggedInAnalyzer; this.exactAnalyzer = new DefaultAnalyzer(locale); } /** - * Creates a TokenStream which tokenizes all the text in the provided - * Reader. Delegates to DefaultAnalyzer when field used to search for exact + * Delegates to DefaultAnalyzer when field used to search for exact * match, and to plugged-in analyzer for other fields. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { + public final Analyzer getWrappedAnalyzer(String fieldName) { if (fieldName != null && fieldName.startsWith("exact_")) { //$NON-NLS-1$ - return exactAnalyzer.tokenStream(fieldName, reader); + return exactAnalyzer; } - return pluggedInAnalyzer.tokenStream(fieldName, reader); + return pluggedInAnalyzer; } } diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java index 0b70cf7..79d5592 100644 --- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java +++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java @@ -7,16 +7,18 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.help.internal.search; -import com.ibm.icu.text.BreakIterator; import java.io.IOException; -import java.io.Reader; import java.util.Locale; + import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +import com.ibm.icu.text.BreakIterator; /** * WordTokenStream obtains tokens containing words appropriate for use with @@ -24,7 +26,6 @@ */ public final class WordTokenStream extends Tokenizer { private static final int BUF_LEN = 4096; - private final Reader reader; private final BreakIterator boundary; private StringBuffer strbuf; @@ -34,8 +35,8 @@ /** * Constructor */ - public WordTokenStream(String fieldName, Reader reader, Locale locale) { - this.reader = reader; + public WordTokenStream(Locale locale) { + super(); boundary = BreakIterator.getWordInstance(locale); } @@ -52,9 +53,9 @@ if(strbuf == null) { int available; char[] cbuf = new char[BUF_LEN]; - while ((available = reader.read(cbuf)) <= 0) { + while ((available = input.read(cbuf)) <= 0) { if (available < 0) { - reader.close(); + input.close(); return false; } } @@ -62,7 +63,7 @@ strbuf.append(cbuf, 0, available); // read more until white space (or EOF) int c; - while (0 <= (c = reader.read())) { + while (0 <= (c = input.read())) { strbuf.append((char) c); if (c == ' ' || c == '\r' || c == '\n' || c == '\t') { break; @@ -70,7 +71,7 @@ } if (c < 0) { - reader.close(); + input.close(); } boundary.setText(strbuf.toString()); @@ -107,9 +108,10 @@ @Override public void close() throws IOException { + super.close(); /// Unlikely to be called as this is a reused - if (this.reader != null) { - this.reader.close(); + if (this.input != null) { + this.input.close(); } } } diff --git a/eclipse.platform.ua/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml b/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml index 8438100..174e92a 100644 --- a/eclipse.platform.ua/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml +++ b/eclipse.platform.ua/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml @@ -74,20 +74,6 @@ unpack="false"/> - - - - + + + + + + + + diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/META-INF/MANIFEST.MF b/org.eclipse.ua.tests/META-INF/MANIFEST.MF index 6bcf9bc..23f8910 100644 --- a/eclipse.platform.ua/org.eclipse.ua.tests/META-INF/MANIFEST.MF +++ b/eclipse.platform.ua/org.eclipse.ua.tests/META-INF/MANIFEST.MF @@ -19,14 +19,13 @@ org.eclipse.ui.forms, org.eclipse.ui.browser;bundle-version="3.2.300", org.eclipse.equinox.jsp.jasper;bundle-version="1.0.200", - org.eclipse.equinox.jsp.jasper.registry;bundle-version="1.0.100" + org.eclipse.equinox.jsp.jasper.registry;bundle-version="1.0.100", + org.apache.lucene.analyzers-common;bundle-version="5.1.0", + org.apache.lucene.core;bundle-version="5.1.0" Bundle-ActivationPolicy: lazy Bundle-Vendor: Eclipse.org Import-Package: javax.servlet;version="3.1.0", - javax.servlet.http;version="3.1.0", - org.apache.lucene.index;core=split;version="[3.5.0,4.0.0)", - org.apache.lucene.search;core=split;version="[3.5.0,4.0.0)", - org.apache.lucene.store;core=split;version="[3.5.0,4.0.0)" + javax.servlet.http;version="3.1.0" Bundle-RequiredExecutionEnvironment: JavaSE-1.8 Export-Package: org.eclipse.ua.tests, org.eclipse.ua.tests.browser, diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock b/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe new file mode 100644 index 0000000..09ec2c9 --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe Binary files differ diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs new file mode 100644 index 0000000..3aa288a --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs Binary files differ diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si new file mode 100644 index 0000000..d897bfc --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si Binary files differ diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1 b/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1 new file mode 100644 index 0000000..4878901 --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1 Binary files differ diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock b/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/eclipse.platform.ua/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java index 1dd4876..02b2e5b 100644 --- a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java +++ b/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java @@ -7,6 +7,7 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.ua.tests.help.search; @@ -36,7 +37,7 @@ @Test public void testUtf8Hebrew() { SearchTestUtils.searchOneLocale("\u05D0\u05B7\u05E1\u05B0\u05D8\u05B0\u05E8\u05D5\u05B9\u05E0\u05D5\u05B9\u05DE" - + "\u05B0\u05D9\u05B8\u05D4) \u05DC\u05B4\u05E7\u05BC\u05D5\u05BC\u05D9 (\u05D9\u05E8\u05D7 \u05D0\u05D5 \u05E9\u05DE\u05E9", new String[] {"/org.eclipse.ua.tests/data/help/search/testnlUTF8.htm" }, "en"); + + "\u05B0\u05D9\u05B8\u05D4\\) \u05DC\u05B4\u05E7\u05BC\u05D5\u05BC\u05D9 \\(\u05D9\u05E8\u05D7 \u05D0\u05D5 \u05E9\u05DE\u05E9", new String[] {"/org.eclipse.ua.tests/data/help/search/testnlUTF8.htm" }, "en"); } diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java index 640d4c9..33ae5ba 100644 --- a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java +++ b/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java @@ -22,20 +22,21 @@ import java.net.URL; import java.util.ArrayList; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.util.QueryBuilder; import org.eclipse.core.runtime.FileLocator; import org.eclipse.core.runtime.NullProgressMonitor; import org.eclipse.core.runtime.Path; import org.eclipse.help.internal.base.BaseHelpSystem; import org.eclipse.help.internal.search.AnalyzerDescriptor; import org.eclipse.help.internal.search.PluginIndex; -import org.eclipse.help.internal.search.QueryBuilder; import org.eclipse.help.internal.search.SearchIndexWithIndexingProgress; import org.eclipse.ua.tests.plugin.UserAssistanceTestPlugin; import org.junit.Test; @@ -56,28 +57,12 @@ public class PrebuiltIndexCompatibility { /** - * Test index built with Lucene 1.9.1 - */ - @Test - public void test1_9_1_IndexReadable() throws Exception { - checkReadable("data/help/searchindex/index191"); - } - - /** - * Test index built with Lucene 2.9.1 - */ - @Test - public void test2_9_1_IndexReadable() throws Exception { - checkReadable("data/help/searchindex/index291"); - } - - /** ** Test compatibility of Lucene 1.9.1 index with current Lucene */ @Test public void test1_9_1Compatible() { - checkCompatible("data/help/searchindex/index191", true); + checkCompatible("data/help/searchindex/index191", false); } /** @@ -86,13 +71,13 @@ @Test public void test2_9_1Compatible() { - checkCompatible("data/help/searchindex/index291", true); + checkCompatible("data/help/searchindex/index291", false); } @Test public void test1_9_1LuceneCompatible() { - checkLuceneCompatible("1.9.1", true); + checkLuceneCompatible("1.9.1", false); } @Test @@ -104,7 +89,12 @@ @Test public void test2_9_1LuceneCompatible() { - checkLuceneCompatible("2.9.1", true); + checkLuceneCompatible("2.9.1", false); + } + + @Test + public void test5_1_0LuceneCompatible() { + checkLuceneCompatible("5.1.0", true); } @Test @@ -143,6 +133,11 @@ assertFalse(index1.equals(index2)); } + @Test + public void test5_1_0IndexReadable() throws CorruptIndexException, IOException { + checkReadable("data/help/searchindex/index510"); + } + /* * Verifies that a prebuilt index can be searched */ @@ -154,10 +149,11 @@ URL resolved = FileLocator.resolve(url); if ("file".equals(resolved.getProtocol())) { //$NON-NLS-1$ String filePath = resolved.getFile(); - QueryBuilder queryBuilder = new QueryBuilder("eclipse", new AnalyzerDescriptor("en-us")); - Query luceneQuery = queryBuilder.getLuceneQuery(new ArrayList() , false); - try (Directory luceneDirectory = new NIOFSDirectory(new File(filePath)); - IndexSearcher searcher = new IndexSearcher(IndexReader.open(luceneDirectory, true))) { + StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); + QueryBuilder builder = new QueryBuilder(standardAnalyzer); + Query luceneQuery = builder.createBooleanQuery("contents", "eclipse"); + try (Directory luceneDirectory = new NIOFSDirectory(new File(filePath).toPath())) { + IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory)); TopDocs hits = searcher.search(luceneQuery, 500); assertEquals(hits.totalHits, 1); } diff --git a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java index 223e42a..2e782c3 100644 --- a/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java +++ b/eclipse.platform.ua/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java @@ -7,6 +7,7 @@ * * Contributors: * IBM Corporation - initial API and implementation + * Sopot Cela - Bug 466829 - Migration to Lucene 5 *******************************************************************************/ package org.eclipse.ua.tests.help.search; @@ -35,7 +36,7 @@ @Test public void testSearchUsingAndInSeparateDocs() { - SearchTestUtils.searchAllLocales("jduehdye and olhoykk", new String[0]); + SearchTestUtils.searchAllLocales("jduehdye AND olhoykk", new String[0]); } @Test --- a/eclipse.platform.common/bundles/org.eclipse.platform.doc.isv/pom.xml +++ b/eclipse.platform.common/bundles/org.eclipse.platform.doc.isv/pom.xml @@ -101,7 +101,7 @@ eclipse-plugin - org.apache.lucene.analysis + org.apache.lucene.analyzers-common 0.0.0