diff --git a/.gitignore b/.gitignore index c13c043..cd911c2 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ /solr-poms-4.10.3.tar.xz /solr-4.10.3-src.tgz /solr-4.10.4-clean.tar.xz +/solr-5.3.0-clean.tar.xz diff --git a/solr-4.10.4-SOLR-4839.patch b/solr-4.10.4-SOLR-4839.patch deleted file mode 100644 index 62c69dd..0000000 --- a/solr-4.10.4-SOLR-4839.patch +++ /dev/null @@ -1,197 +0,0 @@ ---- solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java 2014-08-07 07:21:33.000000000 +0200 -+++ solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java.SOLR-4839 2015-05-08 07:01:41.290938338 +0200 -@@ -41,17 +41,18 @@ - - import org.apache.solr.servlet.SolrDispatchFilter; - import org.eclipse.jetty.server.Connector; -+import org.eclipse.jetty.server.HttpConfiguration; -+import org.eclipse.jetty.server.HttpConnectionFactory; -+import org.eclipse.jetty.server.LowResourceMonitor; -+import org.eclipse.jetty.server.SecureRequestCustomizer; - import org.eclipse.jetty.server.Server; --import org.eclipse.jetty.server.bio.SocketConnector; --import org.eclipse.jetty.server.handler.GzipHandler; --import org.eclipse.jetty.server.nio.SelectChannelConnector; -+import org.eclipse.jetty.server.ServerConnector; -+import org.eclipse.jetty.server.SslConnectionFactory; - import org.eclipse.jetty.server.session.HashSessionIdManager; --import org.eclipse.jetty.server.ssl.SslConnector; --import org.eclipse.jetty.server.ssl.SslSelectChannelConnector; --import org.eclipse.jetty.server.ssl.SslSocketConnector; - import org.eclipse.jetty.servlet.FilterHolder; - import org.eclipse.jetty.servlet.ServletContextHandler; - import org.eclipse.jetty.servlet.ServletHolder; -+import org.eclipse.jetty.servlets.GzipFilter; - import org.eclipse.jetty.util.component.LifeCycle; - import org.eclipse.jetty.util.log.Logger; - import org.eclipse.jetty.util.ssl.SslContextFactory; -@@ -203,17 +204,19 @@ - - private void init(String solrHome, String context, int port, boolean stopAtShutdown) { - this.context = context; -- server = new Server(port); -+ QueuedThreadPool qtp = new QueuedThreadPool(); -+ qtp.setMaxThreads(10000); -+ qtp.setIdleTimeout((int) TimeUnit.SECONDS.toMillis(5)); -+ qtp.setStopTimeout((int) TimeUnit.MINUTES.toMillis(1)); -+ server = new Server(qtp); -+ server.manage(qtp); - - this.solrHome = solrHome; - this.stopAtShutdown = stopAtShutdown; - server.setStopAtShutdown(stopAtShutdown); -- if (!stopAtShutdown) { -- server.setGracefulShutdown(0); -- } -- System.setProperty("solr.solr.home", solrHome); -+ - if (System.getProperty("jetty.testMode") != null) { -- final String connectorName = System.getProperty("tests.jettyConnector", "SelectChannel"); -+ - - // if this property is true, then jetty will be configured to use SSL - // leveraging the same system properties as java to specify -@@ -228,52 +231,39 @@ - final SslContextFactory sslcontext = new SslContextFactory(false); - sslInit(useSsl, sslcontext); - -- final Connector connector; -- if ("SelectChannel".equals(connectorName)) { -- final SelectChannelConnector c = useSsl -- ? new SslSelectChannelConnector(sslcontext) -- : new SelectChannelConnector(); -- c.setReuseAddress(true); -- c.setLowResourcesMaxIdleTime(1500); -- c.setSoLingerTime(0); -- connector = c; -- } else if ("Socket".equals(connectorName)) { -- final SocketConnector c = useSsl -- ? new SslSocketConnector(sslcontext) -- : new SocketConnector(); -- c.setReuseAddress(true); -- c.setSoLingerTime(0); -- connector = c; -+ ServerConnector connector; -+ if (useSsl) { -+ HttpConfiguration configuration = new HttpConfiguration(); -+ configuration.setSecureScheme("https"); -+ configuration.addCustomizer(new SecureRequestCustomizer()); -+ connector = new ServerConnector(server, new SslConnectionFactory(sslcontext, "http/1.1"), -+ new HttpConnectionFactory(configuration)); - } else { -- throw new IllegalArgumentException("Illegal value for system property 'tests.jettyConnector': " + connectorName); -+ connector = new ServerConnector(server, new HttpConnectionFactory()); - } - -+ connector.setReuseAddress(true); -+ connector.setSoLingerTime(0); - connector.setPort(port); - connector.setHost("127.0.0.1"); - -- // Connectors by default inherit server's thread pool. -- QueuedThreadPool qtp = new QueuedThreadPool(); -- qtp.setMaxThreads(10000); -- qtp.setMaxIdleTimeMs((int) TimeUnit.SECONDS.toMillis(5)); -- qtp.setMaxStopTimeMs((int) TimeUnit.MINUTES.toMillis(1)); -- server.setThreadPool(qtp); -+ // Enable Low Resources Management -+ LowResourceMonitor lowResources = new LowResourceMonitor(server); -+ lowResources.setLowResourcesIdleTimeout(1500); -+ lowResources.setMaxConnections(10000); -+ server.addBean(lowResources); - - server.setConnectors(new Connector[] {connector}); - server.setSessionIdManager(new HashSessionIdManager(new Random())); - } else { -- if (server.getThreadPool() == null) { -- // Connectors by default inherit server's thread pool. -- QueuedThreadPool qtp = new QueuedThreadPool(); -- qtp.setMaxThreads(10000); -- qtp.setMaxIdleTimeMs((int) TimeUnit.SECONDS.toMillis(5)); -- qtp.setMaxStopTimeMs((int) TimeUnit.SECONDS.toMillis(1)); -- server.setThreadPool(qtp); -- } -+ ServerConnector connector = new ServerConnector(server, new HttpConnectionFactory()); -+ connector.setPort(port); -+ server.setConnectors(new Connector[] {connector}); - } - - // Initialize the servlets - final ServletContextHandler root = new ServletContextHandler(server,context,ServletContextHandler.SESSIONS); -- root.setHandler(new GzipHandler()); -+ root.addFilter(GzipFilter.class, "*", EnumSet.of(DispatcherType.REQUEST)); - server.addLifeCycleListener(new LifeCycle.Listener() { - - @Override -@@ -307,14 +297,16 @@ - extraFilters = new LinkedList<>(); - for (Class filterClass : extraRequestFilters.keySet()) { - extraFilters.add(root.addFilter(filterClass, extraRequestFilters.get(filterClass), -- EnumSet.of(DispatcherType.REQUEST))); -+ EnumSet.of(DispatcherType.REQUEST))); - } - } -- dispatchFilter = root.addFilter(SolrDispatchFilter.class, "*", EnumSet.of(DispatcherType.REQUEST) ); - for (ServletHolder servletHolder : extraServlets.keySet()) { - String pathSpec = extraServlets.get(servletHolder); - root.addServlet(servletHolder, pathSpec); - } -+ -+ dispatchFilter = root.addFilter(SolrDispatchFilter.class, "*", EnumSet.of(DispatcherType.REQUEST) ); -+ - if (solrConfigFilename != null) System.clearProperty("solrconfig"); - if (schemaFilename != null) System.clearProperty("schema"); - System.clearProperty("solr.solr.home"); -@@ -340,7 +332,7 @@ - sslcontext.setKeyStorePassword(sslConfig.getKeyStorePassword()); - } - if (null != sslConfig.getTrustStore()) { -- sslcontext.setTrustStore(System -+ sslcontext.setTrustStorePath(System - .getProperty(sslConfig.getTrustStore())); - } - if (null != sslConfig.getTrustStorePassword()) { -@@ -360,7 +352,7 @@ - (System.getProperty("javax.net.ssl.keyStorePassword")); - } - if (null != System.getProperty("javax.net.ssl.trustStore")) { -- sslcontext.setTrustStore -+ sslcontext.setTrustStorePath - (System.getProperty("javax.net.ssl.trustStore")); - } - if (null != System.getProperty("javax.net.ssl.trustStorePassword")) { -@@ -465,7 +457,7 @@ - if (0 == conns.length) { - throw new RuntimeException("Jetty Server has no Connectors"); - } -- return (proxyPort != -1) ? proxyPort : conns[0].getLocalPort(); -+ return (proxyPort != -1) ? proxyPort : ((ServerConnector) conns[0]).getLocalPort(); - } - - /** -@@ -500,12 +492,12 @@ - if (0 == conns.length) { - throw new IllegalStateException("Jetty Server has no Connectors"); - } -- Connector c = conns[0]; -+ ServerConnector c = (ServerConnector) conns[0]; - if (c.getLocalPort() < 0) { - throw new IllegalStateException("Jetty Connector is not open: " + - c.getLocalPort()); - } -- protocol = (c instanceof SslConnector) ? "https" : "http"; -+ protocol = c.getDefaultProtocol().equals("SSL-http/1.1") ? "https" : "http"; - return new URL(protocol, c.getHost(), c.getLocalPort(), context); - - } catch (MalformedURLException e) { -@@ -615,6 +607,10 @@ - - } - -+ public void debug(String s, long l) { -+ -+ } -+ - @Override - public String getName() { - return toString(); diff --git a/solr-5.3.0-jetty9.3.3.patch b/solr-5.3.0-jetty9.3.3.patch new file mode 100644 index 0000000..6b96b58 --- /dev/null +++ b/solr-5.3.0-jetty9.3.3.patch @@ -0,0 +1,20 @@ +diff -Nru solr-5.3.0/solr/test-framework/src/java/org/apache/solr/util/SSLTestConfig.java solr-5.3.0.jetty9/solr/test-framework/src/java/org/apache/solr/util/SSLTestConfig.java +--- solr-5.3.0/solr/test-framework/src/java/org/apache/solr/util/SSLTestConfig.java 2015-01-21 16:15:34.000000000 +0100 ++++ solr-5.3.0.jetty9/solr/test-framework/src/java/org/apache/solr/util/SSLTestConfig.java 2015-09-16 13:13:34.697018018 +0200 +@@ -36,6 +36,7 @@ + import org.apache.solr.client.solrj.impl.HttpClientConfigurer; + import org.apache.solr.common.params.SolrParams; + import org.eclipse.jetty.util.security.CertificateUtils; ++import org.eclipse.jetty.util.resource.Resource; + + public class SSLTestConfig extends SSLConfig { + public static File TEST_KEYSTORE = ExternalPaths.SERVER_HOME == null ? null +@@ -82,7 +83,7 @@ + + protected static KeyStore buildKeyStore(String keyStoreLocation, String password) { + try { +- return CertificateUtils.getKeyStore(null, keyStoreLocation, "JKS", null, password); ++ return CertificateUtils.getKeyStore(Resource.newResource(keyStoreLocation), keyStoreLocation, "JKS", password); + } catch (Exception ex) { + throw new IllegalStateException("Unable to build KeyStore from file: " + keyStoreLocation, ex); + } diff --git a/solr-5.3.0-use-system-hll.patch b/solr-5.3.0-use-system-hll.patch new file mode 100644 index 0000000..01a596f --- /dev/null +++ b/solr-5.3.0-use-system-hll.patch @@ -0,0 +1,3154 @@ +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-07-17 14:10:40.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-09-14 18:11:56.330769125 +0200 +@@ -55,8 +55,8 @@ + import org.apache.solr.search.QueryParsing; + import org.apache.solr.search.SolrIndexSearcher; + import org.apache.solr.search.SyntaxError; +-import org.apache.solr.util.hll.HLL; +-import org.apache.solr.util.hll.HLLType; ++import net.agkn.hll.HLL; ++import net.agkn.hll.HLLType; + + import com.google.common.hash.Hashing; + import com.google.common.hash.HashFunction; +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-07-17 14:10:40.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-09-14 18:12:12.835927178 +0200 +@@ -33,12 +33,12 @@ + import org.apache.solr.schema.*; + + import com.tdunning.math.stats.AVLTreeDigest; ++ ++import net.agkn.hll.HLL; ++import net.agkn.hll.HLLType; + import com.google.common.hash.Hashing; + import com.google.common.hash.HashFunction; + +-import org.apache.solr.util.hll.HLL; +-import org.apache.solr.util.hll.HLLType; +- + /** + * Factory class for creating instance of + * {@link org.apache.solr.handler.component.StatsValues} +@@ -824,7 +824,7 @@ + @Override + public long hash(String v) { + // NOTE: renamed hashUnencodedChars starting with guava 15 +- return hasher.hashString(v).asLong(); ++ return hasher.hashUnencodedChars(v).asLong(); + } + + @Override +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-07-17 14:10:40.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-09-15 07:26:56.977497188 +0200 +@@ -23,8 +23,8 @@ + import java.util.List; + import java.util.Set; + +-import org.apache.solr.util.hll.HLL; +-import org.apache.solr.util.hll.HLLType; ++import net.agkn.hll.HLL; ++import net.agkn.hll.HLLType; + import org.apache.lucene.index.DocValues; + import org.apache.lucene.index.LeafReaderContext; + import org.apache.lucene.index.NumericDocValues; +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-07-17 14:10:40.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-09-16 09:31:52.773252909 +0200 +@@ -21,7 +21,7 @@ + import java.util.ArrayList; + import java.util.List; + +-import org.apache.solr.util.hll.HLL; ++import net.agkn.hll.HLL; + import org.apache.lucene.index.LeafReaderContext; + import org.apache.lucene.index.MultiDocValues; + import org.apache.lucene.index.SortedDocValues; +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,173 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}. +- */ +-class BigEndianAscendingWordDeserializer implements IWordDeserializer { +- // The number of bits per byte. +- private static final int BITS_PER_BYTE = 8; +- +- // long mask for the maximum value stored in a byte +- private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L; +- +- // ************************************************************************ +- // The length in bits of the words to be read. +- private final int wordLength; +- +- // The byte array to which the words are serialized. +- private final byte[] bytes; +- +- // The number of leading padding bytes in 'bytes' to be ignored. +- private final int bytePadding; +- +- // The number of words that the byte array contains. +- private final int wordCount; +- +- // The current read state. +- private int currentWordIndex; +- +- // ======================================================================== +- /** +- * @param wordLength the length in bits of the words to be deserialized. Must +- * be less than or equal to 64 and greater than or equal to 1. +- * @param bytePadding the number of leading bytes that pad the serialized words. +- * Must be greater than or equal to zero. +- * @param bytes the byte array containing the serialized words. Cannot be +- * null. +- */ +- public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) { +- if((wordLength < 1) || (wordLength > 64)) { +- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); +- } +- +- if(bytePadding < 0) { +- throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")"); +- } +- +- this.wordLength = wordLength; +- this.bytes = bytes; +- this.bytePadding = bytePadding; +- +- final int dataBytes = (bytes.length - bytePadding); +- final long dataBits = (dataBytes * BITS_PER_BYTE); +- +- this.wordCount = (int)(dataBits/wordLength); +- +- currentWordIndex = 0; +- } +- +- // ======================================================================== +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IWordDeserializer#readWord() +- */ +- @Override +- public long readWord() { +- final long word = readWord(currentWordIndex); +- currentWordIndex++; +- +- return word; +- } +- +- // ------------------------------------------------------------------------ +- /** +- * Reads the word at the specified sequence position (zero-indexed). +- * +- * @param position the zero-indexed position of the word to be read. This +- * must be greater than or equal to zero. +- * @return the value of the serialized word at the specified position. +- */ +- private long readWord(final int position) { +- if(position < 0) { +- throw new ArrayIndexOutOfBoundsException(position); +- } +- +- // First bit of the word +- final long firstBitIndex = (position * wordLength); +- final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE)); +- final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE); +- +- // Last bit of the word +- final long lastBitIndex = (firstBitIndex + wordLength - 1); +- final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE)); +- final int lastByteBitsToConsume; +- +- final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE); +- // If the word terminates at the end of the last byte, consume the whole +- // last byte. +- if(bitsAfterByteBoundary == 0) { +- lastByteBitsToConsume = BITS_PER_BYTE; +- } else { +- // Otherwise, only consume what is necessary. +- lastByteBitsToConsume = bitsAfterByteBoundary; +- } +- +- if(lastByteIndex >= bytes.length) { +- throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array."); +- } +- +- // Accumulator +- long value = 0; +- +- // -------------------------------------------------------------------- +- // First byte +- final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits); +- final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength); +- long firstByte = (long)bytes[firstByteIndex]; +- +- // Mask off the bits to skip in the first byte. +- final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L); +- firstByte &= firstByteMask; +- // Right-align relevant bits of first byte. +- firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte); +- +- value |= firstByte; +- +- // If the first byte contains the whole word, short-circuit. +- if(firstByteIndex == lastByteIndex) { +- return value; +- } +- +- // -------------------------------------------------------------------- +- // Middle bytes +- final int middleByteCount = (lastByteIndex - firstByteIndex - 1); +- for(int i=0; i>= (BITS_PER_BYTE - lastByteBitsToConsume); +- value <<= lastByteBitsToConsume; +- value |= lastByte; +- return value; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount() +- */ +- @Override +- public int totalWordCount() { +- return wordCount; +- } +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,174 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A serializer that writes a sequence of fixed bit-width 'words' to a byte array. +- * Bitwise OR is used to write words into bytes, so a low bit in a word is also +- * a low bit in a byte. However, a high byte in a word is written at a lower index +- * in the array than a low byte in a word. The first word is written at the lowest +- * array index. Each serializer is one time use and returns its backing byte +- * array.

+- * +- * This encoding was chosen so that when reading bytes as octets in the typical +- * first-octet-is-the-high-nibble fashion, an octet-to-binary conversion +- * would yield a high-to-low, left-to-right view of the "short words".

+- * +- * Example:

+- * +- * Say short words are 5 bits wide. Our word sequence is the values +- * [31, 1, 5]. In big-endian binary format, the values are +- * [0b11111, 0b00001, 0b00101]. We use 15 of 16 bits in two bytes +- * and pad the last (lowest) bit of the last byte with a zero: +- * +- * +- * [0b11111000, 0b01001010] = [0xF8, 0x4A] +- * . +- */ +-class BigEndianAscendingWordSerializer implements IWordSerializer { +- // The number of bits per byte. +- private static final int BITS_PER_BYTE = 8; +- +- // ************************************************************************ +- // The length in bits of the words to be written. +- private final int wordLength; +- // The number of words to be written. +- private final int wordCount; +- +- // The byte array to which the words are serialized. +- private final byte[] bytes; +- +- // ------------------------------------------------------------------------ +- // Write state +- // Number of bits that remain writable in the current byte. +- private int bitsLeftInByte; +- // Index of byte currently being written to. +- private int byteIndex; +- // Number of words written. +- private int wordsWritten; +- +- // ======================================================================== +- /** +- * @param wordLength the length in bits of the words to be serialized. Must +- * be greater than or equal to 1 and less than or equal to 64. +- * @param wordCount the number of words to be serialized. Must be greater than +- * or equal to zero. +- * @param bytePadding the number of leading bytes that should pad the +- * serialized words. Must be greater than or equal to zero. +- */ +- public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) { +- if((wordLength < 1) || (wordLength > 64)) { +- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); +- } +- if(wordCount < 0) { +- throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")"); +- } +- if(bytePadding < 0) { +- throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")"); +- } +- +- this.wordLength = wordLength; +- this.wordCount = wordCount; +- +- final long bitsRequired = (wordLength * wordCount); +- final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0); +- final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding; +- bytes = new byte[bytesRequired]; +- +- bitsLeftInByte = BITS_PER_BYTE; +- byteIndex = bytePadding; +- wordsWritten = 0; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IWordSerializer#writeWord(long) +- * @throws RuntimeException if the number of words written is greater than the +- * wordCount parameter in the constructor. +- */ +- @Override +- public void writeWord(final long word) { +- if(wordsWritten == wordCount) { +- throw new RuntimeException("Cannot write more words, backing array full!"); +- } +- +- int bitsLeftInWord = wordLength; +- +- while(bitsLeftInWord > 0) { +- // Move to the next byte if the current one is fully packed. +- if(bitsLeftInByte == 0) { +- byteIndex++; +- bitsLeftInByte = BITS_PER_BYTE; +- } +- +- final long consumedMask; +- if(bitsLeftInWord == 64) { +- consumedMask = ~0L; +- } else { +- consumedMask = ((1L << bitsLeftInWord) - 1L); +- } +- +- // Fix how many bits will be written in this cycle. Choose the +- // smaller of the remaining bits in the word or byte. +- final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord); +- final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite); +- +- // In general, we write the highest bits of the word first, so we +- // strip the highest bits that were consumed in previous cycles. +- final long remainingBitsOfWordToWrite = (word & consumedMask); +- +- final long bitsThatTheByteCanAccept; +- // If there is more left in the word than can be written to this +- // byte, shift off the bits that can't be written off the bottom. +- if(bitsLeftInWord > numberOfBitsToWrite) { +- bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte)); +- } else { +- // If the byte can accept all remaining bits, there is no need +- // to shift off the bits that won't be written in this cycle. +- bitsThatTheByteCanAccept = remainingBitsOfWordToWrite; +- } +- +- // Align the word bits to write up against the byte bits that have +- // already been written. This shift may do nothing if the remainder +- // of the byte is being consumed in this cycle. +- final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite); +- +- // Update the byte with the alignedBits. +- bytes[byteIndex] |= (byte)alignedBits; +- +- // Update state with bit count written. +- bitsLeftInWord -= numberOfBitsToWrite; +- bitsLeftInByte = bitsInByteRemainingAfterWrite; +- } +- +- wordsWritten ++; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IWordSerializer#getBytes() +- * @throws RuntimeException if the number of words written is fewer than the +- * wordCount parameter in the constructor. +- */ +- @Override +- public byte[] getBytes() { +- if(wordsWritten < wordCount) { +- throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")"); +- } +- +- return bytes; +- } +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,71 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A collection of bit utilities. +- */ +-class BitUtil { +- /** +- * The set of least-significant bits for a given byte. -1 +- * is used if no bits are set (so as to not be confused with "index of zero" +- * meaning that the least significant bit is the 0th (1st) bit). +- * +- * @see #leastSignificantBit(long) +- */ +- private static final int[] LEAST_SIGNIFICANT_BIT = { +- -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +- }; +- +- /** +- * Computes the least-significant bit of the specified long +- * that is set to 1. Zero-indexed. +- * +- * @param value the long whose least-significant bit is desired. +- * @return the least-significant bit of the specified long. +- * -1 is returned if there are no bits set. +- */ +- // REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set +- // REF: http://www-graphics.stanford.edu/~seander/bithacks.html +- public static int leastSignificantBit(final long value) { +- if(value == 0L) return -1/*by contract*/; +- if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0; +- if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8; +- if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16; +- if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24; +- if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32; +- if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40; +- if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48; +- return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56; +- } +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,259 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A vector (array) of bits that is accessed in units ("registers") of width +- * bits which are stored as 64bit "words" (longs). In this context +- * a register is at most 64bits. +- */ +-class BitVector implements Cloneable { +- // NOTE: in this context, a word is 64bits +- +- // rather than doing division to determine how a bit index fits into 64bit +- // words (i.e. longs), bit shifting is used +- private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/; +- private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD; +- private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1; +- +- // ditto from above but for bytes (for output) +- private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/; +- public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE; +- +- // ======================================================================== +- public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/; +- +- // ************************************************************************ +- // 64bit words +- private final long[] words; +- public final long[] words() { return words; } +- public final int wordCount() { return words.length; } +- public final int byteCount() { return wordCount() * BYTES_PER_WORD; } +- +- // the width of a register in bits (this cannot be more than 64 (the word size)) +- private final int registerWidth; +- public final int registerWidth() { return registerWidth; } +- +- private final long count; +- +- // ------------------------------------------------------------------------ +- private final long registerMask; +- +- // ======================================================================== +- /** +- * @param width the width of each register. This cannot be negative or +- * zero or greater than 63 (the signed word size). +- * @param count the number of registers. This cannot be negative or zero +- */ +- public BitVector(final int width, final long count) { +- // ceil((width * count)/BITS_PER_WORD) +- this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)]; +- this.registerWidth = width; +- this.count = count; +- +- this.registerMask = (1L << width) - 1; +- } +- +- // ======================================================================== +- /** +- * @param registerIndex the index of the register whose value is to be +- * retrieved. This cannot be negative. +- * @return the value at the specified register index +- * @see #setRegister(long, long) +- * @see #setMaxRegister(long, long) +- */ +- // NOTE: if this changes then setMaxRegister() must change +- public long getRegister(final long registerIndex) { +- final long bitIndex = registerIndex * registerWidth; +- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; +- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; +- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; +- +- if(firstWordIndex == secondWordIndex) +- return ((words[firstWordIndex] >>> bitRemainder) & registerMask); +- /* else -- register spans words */ +- return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ +- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; +- } +- +- /** +- * @param registerIndex the index of the register whose value is to be set. +- * This cannot be negative +- * @param value the value to set in the register +- * @see #getRegister(long) +- * @see #setMaxRegister(long, long) +- */ +- // NOTE: if this changes then setMaxRegister() must change +- public void setRegister(final long registerIndex, final long value) { +- final long bitIndex = registerIndex * registerWidth; +- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; +- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; +- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; +- +- final long words[] = this.words/*for convenience/performance*/; +- if(firstWordIndex == secondWordIndex) { +- // clear then set +- words[firstWordIndex] &= ~(registerMask << bitRemainder); +- words[firstWordIndex] |= (value << bitRemainder); +- } else {/*register spans words*/ +- // clear then set each partial word +- words[firstWordIndex] &= (1L << bitRemainder) - 1; +- words[firstWordIndex] |= (value << bitRemainder); +- +- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); +- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); +- } +- } +- +- // ------------------------------------------------------------------------ +- /** +- * @return a LongIterator for iterating starting at the register +- * with index zero. This will never be null. +- */ +- public LongIterator registerIterator() { +- return new LongIterator() { +- final int registerWidth = BitVector.this.registerWidth; +- final long[] words = BitVector.this.words; +- final long registerMask = BitVector.this.registerMask; +- +- // register setup +- long registerIndex = 0; +- int wordIndex = 0; +- int remainingWordBits = BITS_PER_WORD; +- long word = words[wordIndex]; +- +- @Override public long next() { +- long register; +- if(remainingWordBits >= registerWidth) { +- register = word & registerMask; +- +- // shift to the next register +- word >>>= registerWidth; +- remainingWordBits -= registerWidth; +- } else { /*insufficient bits remaining in current word*/ +- wordIndex++/*move to the next word*/; +- +- register = (word | (words[wordIndex] << remainingWordBits)) & registerMask; +- +- // shift to the next partial register (word) +- word = words[wordIndex] >>> (registerWidth - remainingWordBits); +- remainingWordBits += BITS_PER_WORD - registerWidth; +- } +- registerIndex++; +- return register; +- } +- +- @Override public boolean hasNext() { +- return registerIndex < count; +- } +- }; +- } +- +- // ------------------------------------------------------------------------ +- // composite accessors +- /** +- * Sets the value of the specified index register if and only if the specified +- * value is greater than the current value in the register. This is equivalent +- * to but much more performant than:

+- * +- *

vector.setRegister(index, Math.max(vector.getRegister(index), value));
+- * +- * @param registerIndex the index of the register whose value is to be set. +- * This cannot be negative +- * @param value the value to set in the register if and only if this value +- * is greater than the current value in the register +- * @return true if and only if the specified value is greater +- * than or equal to the current register value. false +- * otherwise. +- * @see #getRegister(long) +- * @see #setRegister(long, long) +- * @see java.lang.Math#max(long, long) +- */ +- // NOTE: if this changes then setRegister() must change +- public boolean setMaxRegister(final long registerIndex, final long value) { +- final long bitIndex = registerIndex * registerWidth; +- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; +- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; +- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; +- +- // NOTE: matches getRegister() +- final long registerValue; +- final long words[] = this.words/*for convenience/performance*/; +- if(firstWordIndex == secondWordIndex) +- registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask); +- else /*register spans words*/ +- registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ +- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; +- +- // determine which is the larger and update as necessary +- if(value > registerValue) { +- // NOTE: matches setRegister() +- if(firstWordIndex == secondWordIndex) { +- // clear then set +- words[firstWordIndex] &= ~(registerMask << bitRemainder); +- words[firstWordIndex] |= (value << bitRemainder); +- } else {/*register spans words*/ +- // clear then set each partial word +- words[firstWordIndex] &= (1L << bitRemainder) - 1; +- words[firstWordIndex] |= (value << bitRemainder); +- +- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); +- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); +- } +- } /* else -- the register value is greater (or equal) so nothing needs to be done */ +- +- return (value >= registerValue); +- } +- +- // ======================================================================== +- /** +- * Fills this bit vector with the specified bit value. This can be used to +- * clear the vector by specifying 0. +- * +- * @param value the value to set all bits to (only the lowest bit is used) +- */ +- public void fill(final long value) { +- for(long i=0; inull. +- */ +- public void getRegisterContents(final IWordSerializer serializer) { +- for(final LongIterator iter = registerIterator(); iter.hasNext();) { +- serializer.writeWord(iter.next()); +- } +- } +- +- /** +- * Creates a deep copy of this vector. +- * +- * @see java.lang.Object#clone() +- */ +- @Override +- public BitVector clone() { +- final BitVector copy = new BitVector(registerWidth, count); +- System.arraycopy(words, 0, copy.words, 0, words.length); +- return copy; +- } +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java 2015-07-16 13:14:59.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,1071 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-import java.util.Arrays; +- +-import com.carrotsearch.hppc.IntByteOpenHashMap; +-import com.carrotsearch.hppc.LongOpenHashSet; +-import com.carrotsearch.hppc.cursors.IntByteCursor; +-import com.carrotsearch.hppc.cursors.LongCursor; +- +-/** +- * A probabilistic set of hashed long elements. Useful for computing +- * the approximate cardinality of a stream of data in very small storage. +- * +- * A modified version of the +- * 'HyperLogLog' data structure and algorithm is used, which combines both +- * probabilistic and non-probabilistic techniques to improve the accuracy and +- * storage requirements of the original algorithm. +- * +- * More specifically, initializing and storing a new {@link HLL} will +- * allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}). +- * After adding the first few values, a sorted list of unique integers is +- * stored in a {@link HLLType#EXPLICIT} hash set. When configured, accuracy can +- * be sacrificed for memory footprint: the values in the sorted list are +- * "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure. +- * Finally, when enough registers are set, the map-based HLL will be converted +- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure. +- * +- * This data structure is interoperable with the implementations found at: +- * +- * when properly serialized. +- */ +-public class HLL implements Cloneable { +- // minimum and maximum values for the log-base-2 of the number of registers +- // in the HLL +- public static final int MINIMUM_LOG2M_PARAM = 4; +- public static final int MAXIMUM_LOG2M_PARAM = 30; +- +- // minimum and maximum values for the register width of the HLL +- public static final int MINIMUM_REGWIDTH_PARAM = 1; +- public static final int MAXIMUM_REGWIDTH_PARAM = 8; +- +- // minimum and maximum values for the 'expthresh' parameter of the +- // constructor that is meant to match the PostgreSQL implementation's +- // constructor and parameter names +- public static final int MINIMUM_EXPTHRESH_PARAM = -1; +- public static final int MAXIMUM_EXPTHRESH_PARAM = 18; +- public static final int MAXIMUM_EXPLICIT_THRESHOLD = (1 << (MAXIMUM_EXPTHRESH_PARAM - 1)/*per storage spec*/); +- +- // ************************************************************************ +- // Storage +- // storage used when #type is EXPLICIT, null otherwise +- LongOpenHashSet explicitStorage; +- // storage used when #type is SPARSE, null otherwise +- IntByteOpenHashMap sparseProbabilisticStorage; +- // storage used when #type is FULL, null otherwise +- BitVector probabilisticStorage; +- +- // current type of this HLL instance, if this changes then so should the +- // storage used (see above) +- private HLLType type; +- +- // ------------------------------------------------------------------------ +- // Characteristic parameters +- // NOTE: These members are named to match the PostgreSQL implementation's +- // parameters. +- // log2(the number of probabilistic HLL registers) +- private final int log2m; +- // the size (width) each register in bits +- private final int regwidth; +- +- // ------------------------------------------------------------------------ +- // Computed constants +- // ........................................................................ +- // EXPLICIT-specific constants +- // flag indicating if the EXPLICIT representation should NOT be used +- private final boolean explicitOff; +- // flag indicating that the promotion threshold from EXPLICIT should be +- // computed automatically +- // NOTE: this only has meaning when 'explicitOff' is false +- private final boolean explicitAuto; +- // threshold (in element count) at which a EXPLICIT HLL is converted to a +- // SPARSE or FULL HLL, always greater than or equal to zero and always a +- // power of two OR simply zero +- // NOTE: this only has meaning when 'explicitOff' is false +- private final int explicitThreshold; +- +- // ........................................................................ +- // SPARSE-specific constants +- // the computed width of the short words +- private final int shortWordLength; +- // flag indicating if the SPARSE representation should not be used +- private final boolean sparseOff; +- // threshold (in register count) at which a SPARSE HLL is converted to a +- // FULL HLL, always greater than zero +- private final int sparseThreshold; +- +- // ........................................................................ +- // Probabilistic algorithm constants +- // the number of registers, will always be a power of 2 +- private final int m; +- // a mask of the log2m bits set to one and the rest to zero +- private final int mBitsMask; +- // a mask as wide as a register (see #fromBytes()) +- private final int valueMask; +- // mask used to ensure that p(w) does not overflow register (see #Constructor() and #addRaw()) +- private final long pwMaxMask; +- // alpha * m^2 (the constant in the "'raw' HyperLogLog estimator") +- private final double alphaMSquared; +- // the cutoff value of the estimator for using the "small" range cardinality +- // correction formula +- private final double smallEstimatorCutoff; +- // the cutoff value of the estimator for using the "large" range cardinality +- // correction formula +- private final double largeEstimatorCutoff; +- +- // ======================================================================== +- /** +- * NOTE: Arguments here are named and structured identically to those in the +- * PostgreSQL implementation, which can be found +- * here. +- * +- * @param log2m log-base-2 of the number of registers used in the HyperLogLog +- * algorithm. Must be at least 4 and at most 30. +- * @param regwidth number of bits used per register in the HyperLogLog +- * algorithm. Must be at least 1 and at most 8. +- * @param expthresh tunes when the {@link HLLType#EXPLICIT} to +- * {@link HLLType#SPARSE} promotion occurs, +- * based on the set's cardinality. Must be at least -1 and at most 18. +- * @param sparseon Flag indicating if the {@link HLLType#SPARSE} +- * representation should be used. +- * @param type the type in the promotion hierarchy which this instance should +- * start at. This cannot be null. +- */ +- public HLL(final int log2m, final int regwidth, final int expthresh, final boolean sparseon, final HLLType type) { +- this.log2m = log2m; +- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) { +- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")"); +- } +- +- this.regwidth = regwidth; +- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) { +- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")"); +- } +- +- this.m = (1 << log2m); +- this.mBitsMask = m - 1; +- this.valueMask = (1 << regwidth) - 1; +- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth); +- this.alphaMSquared = HLLUtil.alphaMSquared(m); +- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m); +- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth); +- +- if(expthresh == -1) { +- this.explicitAuto = true; +- this.explicitOff = false; +- +- // NOTE: This math matches the size calculation in the PostgreSQL impl. +- final long fullRepresentationSize = (this.regwidth * (long)this.m + 7/*round up to next whole byte*/)/Byte.SIZE; +- final int numLongs = (int)(fullRepresentationSize / 8/*integer division to round down*/); +- +- if(numLongs > MAXIMUM_EXPLICIT_THRESHOLD) { +- this.explicitThreshold = MAXIMUM_EXPLICIT_THRESHOLD; +- } else { +- this.explicitThreshold = numLongs; +- } +- } else if(expthresh == 0) { +- this.explicitAuto = false; +- this.explicitOff = true; +- this.explicitThreshold = 0; +- } else if((expthresh > 0) && (expthresh <= MAXIMUM_EXPTHRESH_PARAM)){ +- this.explicitAuto = false; +- this.explicitOff = false; +- this.explicitThreshold = (1 << (expthresh - 1)); +- } else { +- throw new IllegalArgumentException("'expthresh' must be at least " + MINIMUM_EXPTHRESH_PARAM + " and at most " + MAXIMUM_EXPTHRESH_PARAM + " (was: " + expthresh + ")"); +- } +- +- this.shortWordLength = (regwidth + log2m); +- this.sparseOff = !sparseon; +- if(this.sparseOff) { +- this.sparseThreshold = 0; +- } else { +- // TODO improve this cutoff to include the cost overhead of Java +- // members/objects +- final int largestPow2LessThanCutoff = +- (int)NumberUtil.log2((this.m * this.regwidth) / this.shortWordLength); +- this.sparseThreshold = (1 << largestPow2LessThanCutoff); +- } +- +- initializeStorage(type); +- } +- +- /** +- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}. +- * +- * This is equivalent to calling HLL(log2m, regwidth, -1, true, HLLType.EMPTY). +- * +- * @param log2m log-base-2 of the number of registers used in the HyperLogLog +- * algorithm. Must be at least 4 and at most 30. +- * @param regwidth number of bits used per register in the HyperLogLog +- * algorithm. Must be at least 1 and at most 8. +- * +- * @see #HLL(int, int, int, boolean, HLLType) +- */ +- public HLL(final int log2m, final int regwidth) { +- this(log2m, regwidth, -1, true, HLLType.EMPTY); +- } +- +- // ------------------------------------------------------------------------- +- /** +- * Convenience constructor for testing. Assumes that both {@link HLLType#EXPLICIT} +- * and {@link HLLType#SPARSE} representations should be enabled. +- * +- * @param log2m log-base-2 of the number of registers used in the HyperLogLog +- * algorithm. Must be at least 4 and at most 30. +- * @param regwidth number of bits used per register in the HyperLogLog +- * algorithm. Must be at least 1 and at most 8. +- * @param explicitThreshold cardinality threshold at which the {@link HLLType#EXPLICIT} +- * representation should be promoted to {@link HLLType#SPARSE}. +- * This must be greater than zero and less than or equal to {@value #MAXIMUM_EXPLICIT_THRESHOLD}. +- * @param sparseThreshold register count threshold at which the {@link HLLType#SPARSE} +- * representation should be promoted to {@link HLLType#FULL}. +- * This must be greater than zero. +- * @param type the type in the promotion hierarchy which this instance should +- * start at. This cannot be null. +- */ +- /*package, for testing*/ HLL(final int log2m, final int regwidth, final int explicitThreshold, final int sparseThreshold, final HLLType type) { +- this.log2m = log2m; +- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) { +- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")"); +- } +- +- this.regwidth = regwidth; +- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) { +- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")"); +- } +- +- this.m = (1 << log2m); +- this.mBitsMask = m - 1; +- this.valueMask = (1 << regwidth) - 1; +- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth); +- this.alphaMSquared = HLLUtil.alphaMSquared(m); +- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m); +- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth); +- +- this.explicitAuto = false; +- this.explicitOff = false; +- this.explicitThreshold = explicitThreshold; +- if((explicitThreshold < 1) || (explicitThreshold > MAXIMUM_EXPLICIT_THRESHOLD)) { +- throw new IllegalArgumentException("'explicitThreshold' must be at least 1 and at most " + MAXIMUM_EXPLICIT_THRESHOLD + " (was: " + explicitThreshold + ")"); +- } +- +- this.shortWordLength = (regwidth + log2m); +- this.sparseOff = false; +- this.sparseThreshold = sparseThreshold; +- +- initializeStorage(type); +- } +- +- /** +- * @return the type in the promotion hierarchy of this instance. This will +- * never be null. +- */ +- public HLLType getType() { return type; } +- +- // ======================================================================== +- // Add +- /** +- * Adds rawValue directly to the HLL. +- * +- * @param rawValue the value to be added. It is very important that this +- * value already be hashed with a strong (but not +- * necessarily cryptographic) hash function. For instance, the +- * Murmur3 implementation in +- * +- * Google's Guava library is an excellent hash function for this +- * purpose and, for seeds greater than zero, matches the output +- * of the hash provided in the PostgreSQL implementation. +- */ +- public void addRaw(final long rawValue) { +- switch(type) { +- case EMPTY: { +- // NOTE: EMPTY type is always promoted on #addRaw() +- if(explicitThreshold > 0) { +- initializeStorage(HLLType.EXPLICIT); +- explicitStorage.add(rawValue); +- } else if(!sparseOff) { +- initializeStorage(HLLType.SPARSE); +- addRawSparseProbabilistic(rawValue); +- } else { +- initializeStorage(HLLType.FULL); +- addRawProbabilistic(rawValue); +- } +- return; +- } +- case EXPLICIT: { +- explicitStorage.add(rawValue); +- +- // promotion, if necessary +- if(explicitStorage.size() > explicitThreshold) { +- if(!sparseOff) { +- initializeStorage(HLLType.SPARSE); +- for (LongCursor c : explicitStorage) { +- addRawSparseProbabilistic(c.value); +- } +- } else { +- initializeStorage(HLLType.FULL); +- for (LongCursor c : explicitStorage) { +- addRawProbabilistic(c.value); +- } +- } +- explicitStorage = null; +- } +- return; +- } +- case SPARSE: { +- addRawSparseProbabilistic(rawValue); +- +- // promotion, if necessary +- if(sparseProbabilisticStorage.size() > sparseThreshold) { +- initializeStorage(HLLType.FULL); +- for(IntByteCursor c : sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- sparseProbabilisticStorage = null; +- } +- return; +- } +- case FULL: +- addRawProbabilistic(rawValue); +- return; +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- } +- +- // ------------------------------------------------------------------------ +- // #addRaw(..) helpers +- /** +- * Adds the raw value to the {@link #sparseProbabilisticStorage}. +- * {@link #type} must be {@link HLLType#SPARSE}. +- * +- * @param rawValue the raw value to add to the sparse storage. +- */ +- private void addRawSparseProbabilistic(final long rawValue) { +- // p(w): position of the least significant set bit (one-indexed) +- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value) +- // +- // By construction of pwMaxMask (see #Constructor()), +- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2, +- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2, +- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1. +- final long substreamValue = (rawValue >>> log2m); +- final byte p_w; +- +- if(substreamValue == 0L) { +- // The paper does not cover p(0x0), so the special value 0 is used. +- // 0 is the original initialization value of the registers, so by +- // doing this the multiset simply ignores it. This is acceptable +- // because the probability is 1/(2^(2^registerSizeInBits)). +- p_w = 0; +- } else { +- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask)); +- } +- +- // Short-circuit if the register is being set to zero, since algorithmically +- // this corresponds to an "unset" register, and "unset" registers aren't +- // stored to save memory. (The very reason this sparse implementation +- // exists.) If a register is set to zero it will break the #algorithmCardinality +- // code. +- if(p_w == 0) { +- return; +- } +- +- // NOTE: no +1 as in paper since 0-based indexing +- final int j = (int)(rawValue & mBitsMask); +- +- final byte currentValue; +- if (sparseProbabilisticStorage.containsKey(j)) { +- currentValue = sparseProbabilisticStorage.lget(); +- } else { +- currentValue = 0; +- } +- +- if(p_w > currentValue) { +- sparseProbabilisticStorage.put(j, p_w); +- } +- } +- +- /** +- * Adds the raw value to the {@link #probabilisticStorage}. +- * {@link #type} must be {@link HLLType#FULL}. +- * +- * @param rawValue the raw value to add to the full probabilistic storage. +- */ +- private void addRawProbabilistic(final long rawValue) { +- // p(w): position of the least significant set bit (one-indexed) +- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value) +- // +- // By construction of pwMaxMask (see #Constructor()), +- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2, +- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2, +- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1. +- final long substreamValue = (rawValue >>> log2m); +- final byte p_w; +- +- if (substreamValue == 0L) { +- // The paper does not cover p(0x0), so the special value 0 is used. +- // 0 is the original initialization value of the registers, so by +- // doing this the multiset simply ignores it. This is acceptable +- // because the probability is 1/(2^(2^registerSizeInBits)). +- p_w = 0; +- } else { +- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask)); +- } +- +- // Short-circuit if the register is being set to zero, since algorithmically +- // this corresponds to an "unset" register, and "unset" registers aren't +- // stored to save memory. (The very reason this sparse implementation +- // exists.) If a register is set to zero it will break the #algorithmCardinality +- // code. +- if(p_w == 0) { +- return; +- } +- +- // NOTE: no +1 as in paper since 0-based indexing +- final int j = (int)(rawValue & mBitsMask); +- +- probabilisticStorage.setMaxRegister(j, p_w); +- } +- +- // ------------------------------------------------------------------------ +- // Storage helper +- /** +- * Initializes storage for the specified {@link HLLType} and changes the +- * instance's {@link #type}. +- * +- * @param type the {@link HLLType} to initialize storage for. This cannot be +- * null and must be an instantiable type. +- */ +- private void initializeStorage(final HLLType type) { +- this.type = type; +- switch(type) { +- case EMPTY: +- // nothing to be done +- break; +- case EXPLICIT: +- this.explicitStorage = new LongOpenHashSet(); +- break; +- case SPARSE: +- this.sparseProbabilisticStorage = new IntByteOpenHashMap(); +- break; +- case FULL: +- this.probabilisticStorage = new BitVector(regwidth, m); +- break; +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- } +- +- // ======================================================================== +- // Cardinality +- /** +- * Computes the cardinality of the HLL. +- * +- * @return the cardinality of HLL. This will never be negative. +- */ +- public long cardinality() { +- switch(type) { +- case EMPTY: +- return 0/*by definition*/; +- case EXPLICIT: +- return explicitStorage.size(); +- case SPARSE: +- return (long)Math.ceil(sparseProbabilisticAlgorithmCardinality()); +- case FULL: +- return (long)Math.ceil(fullProbabilisticAlgorithmCardinality()); +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- } +- +- // ------------------------------------------------------------------------ +- // Cardinality helpers +- /** +- * Computes the exact cardinality value returned by the HLL algorithm when +- * represented as a {@link HLLType#SPARSE} HLL. Kept +- * separate from {@link #cardinality()} for testing purposes. {@link #type} +- * must be {@link HLLType#SPARSE}. +- * +- * @return the exact, unrounded cardinality given by the HLL algorithm +- */ +- /*package, for testing*/ double sparseProbabilisticAlgorithmCardinality() { +- final int m = this.m/*for performance*/; +- +- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the +- // 'j'th register value +- double sum = 0; +- int numberOfZeroes = 0/*"V" in the paper*/; +- for(int j=0; jclear does NOT handle +- * transitions between {@link HLLType}s - a probabilistic type will remain +- * probabilistic after being cleared. +- */ +- public void clear() { +- switch(type) { +- case EMPTY: +- return /*do nothing*/; +- case EXPLICIT: +- explicitStorage.clear(); +- return; +- case SPARSE: +- sparseProbabilisticStorage.clear(); +- return; +- case FULL: +- probabilisticStorage.fill(0); +- return; +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- } +- +- // ======================================================================== +- // Union +- /** +- * Computes the union of HLLs and stores the result in this instance. +- * +- * @param other the other {@link HLL} instance to union into this one. This +- * cannot be null. +- */ +- public void union(final HLL other) { +- // TODO: verify HLLs are compatible +- final HLLType otherType = other.getType(); +- +- if(type.equals(otherType)) { +- homogeneousUnion(other); +- return; +- } else { +- heterogenousUnion(other); +- return; +- } +- } +- +- // ------------------------------------------------------------------------ +- // Union helpers +- /** +- * Computes the union of two HLLs, of different types, and stores the +- * result in this instance. +- * +- * @param other the other {@link HLL} instance to union into this one. This +- * cannot be null. +- */ +- /*package, for testing*/ void heterogenousUnion(final HLL other) { +- /* +- * The logic here is divided into two sections: unions with an EMPTY +- * HLL, and unions between EXPLICIT/SPARSE/FULL +- * HLL. +- * +- * Between those two sections, all possible heterogeneous unions are +- * covered. Should another type be added to HLLType whose unions +- * are not easily reduced (say, as EMPTY's are below) this may be more +- * easily implemented as Strategies. However, that is unnecessary as it +- * stands. +- */ +- +- // .................................................................... +- // Union with an EMPTY +- if(HLLType.EMPTY.equals(type)) { +- // NOTE: The union of empty with non-empty HLL is just a +- // clone of the non-empty. +- +- switch(other.getType()) { +- case EXPLICIT: { +- // src: EXPLICIT +- // dest: EMPTY +- +- if(other.explicitStorage.size() <= explicitThreshold) { +- type = HLLType.EXPLICIT; +- explicitStorage = other.explicitStorage.clone(); +- } else { +- if(!sparseOff) { +- initializeStorage(HLLType.SPARSE); +- } else { +- initializeStorage(HLLType.FULL); +- } +- for(LongCursor c : other.explicitStorage) { +- addRaw(c.value); +- } +- } +- return; +- } +- case SPARSE: { +- // src: SPARSE +- // dest: EMPTY +- +- if(!sparseOff) { +- type = HLLType.SPARSE; +- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone(); +- } else { +- initializeStorage(HLLType.FULL); +- for(IntByteCursor c : other.sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- } +- return; +- } +- default/*case FULL*/: { +- // src: FULL +- // dest: EMPTY +- +- type = HLLType.FULL; +- probabilisticStorage = other.probabilisticStorage.clone(); +- return; +- } +- } +- } else if (HLLType.EMPTY.equals(other.getType())) { +- // source is empty, so just return destination since it is unchanged +- return; +- } /* else -- both of the sets are not empty */ +- +- // .................................................................... +- // NOTE: Since EMPTY is handled above, the HLLs are non-EMPTY below +- switch(type) { +- case EXPLICIT: { +- // src: FULL/SPARSE +- // dest: EXPLICIT +- // "Storing into destination" cannot be done (since destination +- // is by definition of smaller capacity than source), so a clone +- // of source is made and values from destination are inserted +- // into that. +- +- // Determine source and destination storage. +- // NOTE: destination storage may change through promotion if +- // source is SPARSE. +- if(HLLType.SPARSE.equals(other.getType())) { +- if(!sparseOff) { +- type = HLLType.SPARSE; +- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone(); +- } else { +- initializeStorage(HLLType.FULL); +- for(IntByteCursor c : other.sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- } +- } else /*source is HLLType.FULL*/ { +- type = HLLType.FULL; +- probabilisticStorage = other.probabilisticStorage.clone(); +- } +- for(LongCursor c : explicitStorage) { +- addRaw(c.value); +- } +- explicitStorage = null; +- return; +- } +- case SPARSE: { +- if(HLLType.EXPLICIT.equals(other.getType())) { +- // src: EXPLICIT +- // dest: SPARSE +- // Add the raw values from the source to the destination. +- +- for(LongCursor c : other.explicitStorage) { +- addRaw(c.value); +- } +- // NOTE: addRaw will handle promotion cleanup +- } else /*source is HLLType.FULL*/ { +- // src: FULL +- // dest: SPARSE +- // "Storing into destination" cannot be done (since destination +- // is by definition of smaller capacity than source), so a +- // clone of source is made and registers from the destination +- // are merged into the clone. +- +- type = HLLType.FULL; +- probabilisticStorage = other.probabilisticStorage.clone(); +- for(IntByteCursor c : sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- sparseProbabilisticStorage = null; +- } +- return; +- } +- default/*destination is HLLType.FULL*/: { +- if(HLLType.EXPLICIT.equals(other.getType())) { +- // src: EXPLICIT +- // dest: FULL +- // Add the raw values from the source to the destination. +- // Promotion is not possible, so don't bother checking. +- +- for(LongCursor c : other.explicitStorage) { +- addRaw(c.value); +- } +- } else /*source is HLLType.SPARSE*/ { +- // src: SPARSE +- // dest: FULL +- // Merge the registers from the source into the destination. +- // Promotion is not possible, so don't bother checking. +- +- for(IntByteCursor c : other.sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- } +- } +- } +- } +- +- /** +- * Computes the union of two HLLs of the same type, and stores the +- * result in this instance. +- * +- * @param other the other {@link HLL} instance to union into this one. This +- * cannot be null. +- */ +- private void homogeneousUnion(final HLL other) { +- switch(type) { +- case EMPTY: +- // union of empty and empty is empty +- return; +- case EXPLICIT: +- for(LongCursor c : other.explicitStorage) { +- addRaw(c.value); +- } +- // NOTE: #addRaw() will handle promotion, if necessary +- return; +- case SPARSE: +- for(IntByteCursor c : other.sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- final byte currentRegisterValue = sparseProbabilisticStorage.get(registerIndex); +- if(registerValue > currentRegisterValue) { +- sparseProbabilisticStorage.put(registerIndex, registerValue); +- } +- } +- +- // promotion, if necessary +- if(sparseProbabilisticStorage.size() > sparseThreshold) { +- initializeStorage(HLLType.FULL); +- for(IntByteCursor c : sparseProbabilisticStorage) { +- final int registerIndex = c.key; +- final byte registerValue = c.value; +- probabilisticStorage.setMaxRegister(registerIndex, registerValue); +- } +- sparseProbabilisticStorage = null; +- } +- return; +- case FULL: +- for(int i=0; inull or empty. +- */ +- public byte[] toBytes() { +- return toBytes(SerializationUtil.DEFAULT_SCHEMA_VERSION); +- } +- +- /** +- * Serializes the HLL to an array of bytes in correspondence with the format +- * of the specified schema version. +- * +- * @param schemaVersion the schema version dictating the serialization format +- * @return the array of bytes representing the HLL. This will never be +- * null or empty. +- */ +- public byte[] toBytes(final ISchemaVersion schemaVersion) { +- final byte[] bytes; +- switch(type) { +- case EMPTY: +- bytes = new byte[schemaVersion.paddingBytes(type)]; +- break; +- case EXPLICIT: { +- final IWordSerializer serializer = +- schemaVersion.getSerializer(type, Long.SIZE, explicitStorage.size()); +- +- final long[] values = explicitStorage.toArray(); +- Arrays.sort(values); +- for(final long value : values) { +- serializer.writeWord(value); +- } +- +- bytes = serializer.getBytes(); +- break; +- } +- case SPARSE: { +- final IWordSerializer serializer = +- schemaVersion.getSerializer(type, shortWordLength, sparseProbabilisticStorage.size()); +- +- final int[] indices = sparseProbabilisticStorage.keys().toArray(); +- Arrays.sort(indices); +- for(final int registerIndex : indices) { +- assert sparseProbabilisticStorage.containsKey(registerIndex); +- final long registerValue = sparseProbabilisticStorage.get(registerIndex); +- // pack index and value into "short word" +- final long shortWord = ((registerIndex << regwidth) | registerValue); +- serializer.writeWord(shortWord); +- } +- +- bytes = serializer.getBytes(); +- break; +- } +- case FULL: { +- final IWordSerializer serializer = schemaVersion.getSerializer(type, regwidth, m); +- probabilisticStorage.getRegisterContents(serializer); +- +- bytes = serializer.getBytes(); +- break; +- } +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- +- final IHLLMetadata metadata = new HLLMetadata(schemaVersion.schemaVersionNumber(), +- type, +- log2m, +- regwidth, +- (int)NumberUtil.log2(explicitThreshold), +- explicitOff, +- explicitAuto, +- !sparseOff); +- schemaVersion.writeMetadata(bytes, metadata); +- +- return bytes; +- } +- +- /** +- * Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized +- * into bytes. +- * +- * @param bytes the serialized bytes of new HLL +- * @return the deserialized HLL. This will never be null. +- * +- * @see #toBytes(ISchemaVersion) +- */ +- public static HLL fromBytes(final byte[] bytes) { +- final ISchemaVersion schemaVersion = SerializationUtil.getSchemaVersion(bytes); +- final IHLLMetadata metadata = schemaVersion.readMetadata(bytes); +- +- final HLLType type = metadata.HLLType(); +- final int regwidth = metadata.registerWidth(); +- final int log2m = metadata.registerCountLog2(); +- final boolean sparseon = metadata.sparseEnabled(); +- +- final int expthresh; +- if(metadata.explicitAuto()) { +- expthresh = -1; +- } else if(metadata.explicitOff()) { +- expthresh = 0; +- } else { +- // NOTE: take into account that the postgres-compatible constructor +- // subtracts one before taking a power of two. +- expthresh = metadata.log2ExplicitCutoff() + 1; +- } +- +- final HLL hll = new HLL(log2m, regwidth, expthresh, sparseon, type); +- +- // Short-circuit on empty, which needs no other deserialization. +- if(HLLType.EMPTY.equals(type)) { +- return hll; +- } +- +- final int wordLength; +- switch(type) { +- case EXPLICIT: +- wordLength = Long.SIZE; +- break; +- case SPARSE: +- wordLength = hll.shortWordLength; +- break; +- case FULL: +- wordLength = hll.regwidth; +- break; +- default: +- throw new RuntimeException("Unsupported HLL type " + type); +- } +- +- final IWordDeserializer deserializer = +- schemaVersion.getDeserializer(type, wordLength, bytes); +- switch(type) { +- case EXPLICIT: +- // NOTE: This should not exceed expthresh and this will always +- // be exactly the number of words that were encoded, +- // because the word length is at least a byte wide. +- // SEE: IWordDeserializer#totalWordCount() +- for(int i=0; i>> hll.regwidth), registerValue); +- } +- } +- break; +- case FULL: +- // NOTE: Iteration is done using m (register count) and NOT +- // deserializer#totalWordCount() because regwidth may be +- // less than 8 and as such the padding on the 'last' byte +- // may be larger than regwidth, causing an extra register +- // to be read. +- // SEE: IWordDeserializer#totalWordCount() +- for(long i=0; inull. +- * @param registerCountLog2 the log-base-2 register count parameter for +- * probabilistic HLLs. This must be greater than or equal to zero. +- * @param registerWidth the register width parameter for probabilistic +- * HLLs. This must be greater than or equal to zero. +- * @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff, +- * if it is explicitly defined. (If explicitOff or +- * explicitAuto is true then this has no +- * meaning.) +- * @param explicitOff the flag for 'explicit off'-mode, where the +- * {@link HLLType#EXPLICIT} representation is not used. Both this and +- * explicitAuto cannot be true at the same +- * time. +- * @param explicitAuto the flag for 'explicit auto'-mode, where the +- * {@link HLLType#EXPLICIT} representation's promotion cutoff is +- * determined based on in-memory size automatically. Both this and +- * explicitOff cannot be true at the same +- * time. +- * @param sparseEnabled the flag for 'sparse-enabled'-mode, where the +- * {@link HLLType#SPARSE} representation is used. +- */ +- public HLLMetadata(final int schemaVersion, +- final HLLType type, +- final int registerCountLog2, +- final int registerWidth, +- final int log2ExplicitCutoff, +- final boolean explicitOff, +- final boolean explicitAuto, +- final boolean sparseEnabled) { +- this.schemaVersion = schemaVersion; +- this.type = type; +- this.registerCountLog2 = registerCountLog2; +- this.registerWidth = registerWidth; +- this.log2ExplicitCutoff = log2ExplicitCutoff; +- this.explicitOff = explicitOff; +- this.explicitAuto = explicitAuto; +- this.sparseEnabled = sparseEnabled; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion() +- */ +- @Override +- public int schemaVersion() { return schemaVersion; } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#HLLType() +- */ +- @Override +- public HLLType HLLType() { return type; } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2() +- */ +- @Override +- public int registerCountLog2() { return registerCountLog2; } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#registerWidth() +- */ +- @Override +- public int registerWidth() { return registerWidth; } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff() +- */ +- @Override +- public int log2ExplicitCutoff() { return log2ExplicitCutoff; } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#explicitOff() +- */ +- @Override +- public boolean explicitOff() { +- return explicitOff; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto() +- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff() +- */ +- @Override +- public boolean explicitAuto() { +- return explicitAuto; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled() +- */ +- @Override +- public boolean sparseEnabled() { return sparseEnabled; } +- +- /* (non-Javadoc) +- * @see java.lang.Object#toString() +- */ +- @Override +- public String toString() { +- return ""; +- } +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,29 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * The types of algorithm/data structure that {@link HLL} can utilize. For more +- * information, see the Javadoc for {@link HLL}. +- */ +-public enum HLLType { +- EMPTY, +- EXPLICIT, +- SPARSE, +- FULL; +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,199 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * Static functions for computing constants and parameters used in the HLL +- * algorithm. +- */ +-final class HLLUtil { +- /** +- * Precomputed pwMaxMask values indexed by registerSizeInBits. +- * Calculated with this formula: +- *
+-     *     int maxRegisterValue = (1 << registerSizeInBits) - 1;
+-     *     // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
+-     *     return ~((1L << (maxRegisterValue - 1)) - 1);
+-     * 
+- * +- * @see #pwMaxMask(int) +- */ +- private static final long[] PW_MASK = { +- ~((1L << (((1 << 0) - 1) - 1)) - 1), +- ~((1L << (((1 << 1) - 1) - 1)) - 1), +- ~((1L << (((1 << 2) - 1) - 1)) - 1), +- ~((1L << (((1 << 3) - 1) - 1)) - 1), +- ~((1L << (((1 << 4) - 1) - 1)) - 1), +- ~((1L << (((1 << 5) - 1) - 1)) - 1), +- ~((1L << (((1 << 6) - 1) - 1)) - 1), +- ~((1L << (((1 << 7) - 1) - 1)) - 1), +- ~((1L << (((1 << 8) - 1) - 1)) - 1) +- }; +- +- /** +- * Precomputed twoToL values indexed by a linear combination of +- * regWidth and log2m. +- * +- * The array is one-dimensional and can be accessed by using index +- * (REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m +- * for regWidth and log2m between the specified +- * HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM constants. +- * +- * @see #largeEstimator(int, int, double) +- * @see #largeEstimatorCutoff(int, int) +- * @see "Blog post with section on 2^L" +- */ +- private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)]; +- +- /** +- * Spacing constant used to compute offsets into {@link #TWO_TO_L}. +- */ +- private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1; +- +- static { +- for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) { +- for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) { +- int maxRegisterValue = (1 << regWidth) - 1; +- +- // Since 1 is added to p(w) in the insertion algorithm, only +- // (maxRegisterValue - 1) bits are inspected hence the hash +- // space is one power of two smaller. +- final int pwBits = (maxRegisterValue - 1); +- final int totalBits = (pwBits + log2m); +- final double twoToL = Math.pow(2, totalBits); +- TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL; +- } +- } +- } +- +- // ************************************************************************ +- /** +- * Computes the bit-width of HLL registers necessary to estimate a set of +- * the specified cardinality. +- * +- * @param expectedUniqueElements an upper bound on the number of unique +- * elements that are expected. This must be greater than zero. +- * @return a register size in bits (i.e. log2(log2(n))) +- */ +- public static int registerBitSize(final long expectedUniqueElements) { +- return Math.max(HLL.MINIMUM_REGWIDTH_PARAM, +- (int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements)))); +- } +- +- // ======================================================================== +- /** +- * Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm. +- * +- * @param m this must be a power of two, cannot be less than +- * 16 (24), and cannot be greater than 65536 (216). +- * @return gamma times registerCount squared where gamma is +- * based on the value of registerCount. +- * @throws IllegalArgumentException if registerCount is less +- * than 16. +- */ +- public static double alphaMSquared(final int m) { +- switch(m) { +- case 1/*2^0*/: +- case 2/*2^1*/: +- case 4/*2^2*/: +- case 8/*2^3*/: +- throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16)."); +- +- case 16/*2^4*/: +- return 0.673 * m * m; +- +- case 32/*2^5*/: +- return 0.697 * m * m; +- +- case 64/*2^6*/: +- return 0.709 * m * m; +- +- default/*>2^6*/: +- return (0.7213 / (1.0 + 1.079 / m)) * m * m; +- } +- } +- +- // ======================================================================== +- /** +- * Computes a mask that prevents overflow of HyperLogLog registers. +- * +- * @param registerSizeInBits the size of the HLL registers, in bits. +- * @return mask a long mask to prevent overflow of the registers +- * @see #registerBitSize(long) +- */ +- public static long pwMaxMask(final int registerSizeInBits) { +- return PW_MASK[registerSizeInBits]; +- } +- +- // ======================================================================== +- /** +- * The cutoff for using the "small range correction" formula, in the +- * HyperLogLog algorithm. +- * +- * @param m the number of registers in the HLL. m in the paper. +- * @return the cutoff for the small range correction. +- * @see #smallEstimator(int, int) +- */ +- public static double smallEstimatorCutoff(final int m) { +- return ((double)m * 5) / 2; +- } +- +- /** +- * The "small range correction" formula from the HyperLogLog algorithm. Only +- * appropriate if both the estimator is smaller than
(5/2) * m
and +- * there are still registers that have the zero value. +- * +- * @param m the number of registers in the HLL. m in the paper. +- * @param numberOfZeroes the number of registers with value zero. V +- * in the paper. +- * @return a corrected cardinality estimate. +- */ +- public static double smallEstimator(final int m, final int numberOfZeroes) { +- return m * Math.log((double)m / numberOfZeroes); +- } +- +- /** +- * The cutoff for using the "large range correction" formula, from the +- * HyperLogLog algorithm, adapted for 64 bit hashes. +- * +- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper. +- * @param registerSizeInBits the size of the HLL registers, in bits. +- * @return the cutoff for the large range correction. +- * @see #largeEstimator(int, int, double) +- * @see "Blog post with section on 64 bit hashes and 'large range correction' cutoff" +- */ +- public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) { +- return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0; +- } +- +- /** +- * The "large range correction" formula from the HyperLogLog algorithm, adapted +- * for 64 bit hashes. Only appropriate for estimators whose value exceeds +- * the return of {@link #largeEstimatorCutoff(int, int)}. +- * +- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper. +- * @param registerSizeInBits the size of the HLL registers, in bits. +- * @param estimator the original estimator ("E" in the paper). +- * @return a corrected cardinality estimate. +- * @see "Blog post with section on 64 bit hashes and 'large range correction'" +- */ +- public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) { +- final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]; +- return -1 * twoToL * Math.log(1.0 - (estimator/twoToL)); +- } +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,71 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * The metadata and parameters associated with a HLL. +- */ +-interface IHLLMetadata { +- /** +- * @return the schema version of the HLL. This will never be null. +- */ +- int schemaVersion(); +- +- /** +- * @return the type of the HLL. This will never be null. +- */ +- HLLType HLLType(); +- +- /** +- * @return the log-base-2 of the register count parameter of the HLL. This +- * will always be greater than or equal to 4 and less than or equal +- * to 31. +- */ +- int registerCountLog2(); +- +- /** +- * @return the register width parameter of the HLL. This will always be +- * greater than or equal to 1 and less than or equal to 8. +- */ +- int registerWidth(); +- +- /** +- * @return the log-base-2 of the explicit cutoff cardinality. This will always +- * be greater than or equal to zero and less than 31, per the specification. +- */ +- int log2ExplicitCutoff(); +- +- /** +- * @return true if the {@link HLLType#EXPLICIT} representation +- * has been disabled. false otherwise. +- */ +- boolean explicitOff(); +- +- /** +- * @return true if the {@link HLLType#EXPLICIT} representation +- * cutoff cardinality is set to be automatically chosen, +- * false otherwise. +- */ +- boolean explicitAuto(); +- +- /** +- * @return true if the {@link HLLType#SPARSE} representation +- * is enabled. +- */ +- boolean sparseEnabled(); +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 2015-07-16 13:22:50.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,85 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A serialization schema for HLLs. Reads and writes HLL metadata to +- * and from byte[] representations. +- */ +-interface ISchemaVersion { +- /** +- * The number of metadata bytes required for a serialized HLL of the +- * specified type. +- * +- * @param type the type of the serialized HLL +- * @return the number of padding bytes needed in order to fully accommodate +- * the needed metadata. +- */ +- int paddingBytes(HLLType type); +- +- /** +- * Writes metadata bytes to serialized HLL. +- * +- * @param bytes the padded data bytes of the HLL +- * @param metadata the metadata to write to the padding bytes +- */ +- void writeMetadata(byte[] bytes, IHLLMetadata metadata); +- +- /** +- * Reads the metadata bytes of the serialized HLL. +- * +- * @param bytes the serialized HLL +- * @return the HLL metadata +- */ +- IHLLMetadata readMetadata(byte[] bytes); +- +- /** +- * Builds an HLL serializer that matches this schema version. +- * +- * @param type the HLL type that will be serialized. This cannot be +- * null. +- * @param wordLength the length of the 'words' that comprise the data of the +- * HLL. Words must be at least 5 bits and at most 64 bits long. +- * @param wordCount the number of 'words' in the HLL's data. +- * @return a byte array serializer used to serialize a HLL according +- * to this schema version's specification. +- * @see #paddingBytes(HLLType) +- * @see IWordSerializer +- */ +- IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount); +- +- /** +- * Builds an HLL deserializer that matches this schema version. +- * +- * @param type the HLL type that will be deserialized. This cannot be +- * null. +- * @param wordLength the length of the 'words' that comprise the data of the +- * serialized HLL. Words must be at least 5 bits and at most 64 +- * bits long. +- * @param bytes the serialized HLL to deserialize. This cannot be +- * null. +- * @return a byte array deserializer used to deserialize a HLL serialized +- * according to this schema version's specification. +- */ +- IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes); +- +- /** +- * @return the schema version number. +- */ +- int schemaVersionNumber(); +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 2015-07-16 13:14:59.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,41 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * Reads 'words' of a fixed width, in sequence, from a byte array. +- */ +-public interface IWordDeserializer { +- /** +- * @return the next word in the sequence. Should not be called more than +- * {@link #totalWordCount()} times. +- */ +- long readWord(); +- +- /** +- * Returns the number of words that could be encoded in the sequence. +- * +- * NOTE: the sequence that was encoded may be shorter than the value this +- * method returns due to padding issues within bytes. This guarantees +- * only an upper bound on the number of times {@link #readWord()} +- * can be called. +- * +- * @return the maximum number of words that could be read from the sequence. +- */ +- int totalWordCount(); +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,39 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * Writes 'words' of fixed width, in sequence, to a byte array. +- */ +-interface IWordSerializer { +- +- /** +- * Writes the word to the backing array. +- * +- * @param word the word to write. +- */ +- void writeWord(final long word); +- +- /** +- * Returns the backing array of bytes that contain the serialized +- * words. +- * @return the serialized words as a byte[]. +- */ +- byte[] getBytes(); +- +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,35 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A long-based iterator. This is not is-a {@link java.util.Iterator} +- * to prevent autoboxing between Long and long. +- */ +-interface LongIterator { +- /** +- * @return true if and only if there are more elements to +- * iterate over. false otherwise. +- */ +- boolean hasNext(); +- +- /** +- * @return the next long in the collection. +- */ +- long next(); +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,172 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A collection of utilities to work with numbers. +- */ +-class NumberUtil { +- // loge(2) (log-base e of 2) +- public static final double LOGE_2 = 0.6931471805599453; +- +- // ************************************************************************ +- /** +- * Computes the log2 (log-base-two) of the specified value. +- * +- * @param value the double for which the log2 is +- * desired. +- * @return the log2 of the specified value +- */ +- public static double log2(final double value) { +- // REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases) +- return Math.log(value) / LOGE_2; +- } +- +- // ======================================================================== +- // the hex characters +- private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7', +- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; +- +- // ------------------------------------------------------------------------ +- /** +- * Converts the specified array of bytes into a string of +- * hex characters (low byte first). +- * +- * @param bytes the array of bytes that are to be converted. +- * This cannot be null though it may be empty. +- * @param offset the offset in bytes at which the bytes will +- * be taken. This cannot be negative and must be less than +- * bytes.length - 1. +- * @param count the number of bytes to be retrieved from the specified array. +- * This cannot be negative. If greater than bytes.length - offset +- * then that value is used. +- * @return a string of at most count characters that represents +- * the specified byte array in hex. This will never be null +- * though it may be empty if bytes is empty or count +- * is zero. +- * @throws IllegalArgumentException if offset is greater than +- * or equal to bytes.length. +- * @see #fromHex(String, int, int) +- */ +- public static String toHex(final byte[] bytes, final int offset, final int count) { +- if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/; +- final int byteCount = Math.min( (bytes.length - offset), count); +- final int upperBound = byteCount + offset; +- +- final char[] chars = new char[byteCount * 2/*two chars per byte*/]; +- int charIndex = 0; +- for(int i=offset; i>> 4) & 0x0F]; +- chars[charIndex++] = HEX[value & 0x0F]; +- } +- +- return new String(chars); +- } +- +- /** +- * Converts the specified array of hex characters into an array of bytes +- * (low byte first). +- * +- * @param string the string of hex characters to be converted into bytes. +- * This cannot be null though it may be blank. +- * @param offset the offset in the string at which the characters will be +- * taken. This cannot be negative and must be less than string.length() - 1. +- * @param count the number of characters to be retrieved from the specified +- * string. This cannot be negative and must be divisible by two +- * (since there are two characters per byte). +- * @return the array of bytes that were converted from the +- * specified string (in the specified range). This will never be +- * null though it may be empty if string +- * is empty or count is zero. +- * @throws IllegalArgumentException if offset is greater than +- * or equal to string.length() or if count +- * is not divisible by two. +- * @see #toHex(byte[], int, int) +- */ +- public static byte[] fromHex(final String string, final int offset, final int count) { +- if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/; +- if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/; +- final int charCount = Math.min((string.length() - offset), count); +- final int upperBound = offset + charCount; +- +- final byte[] bytes = new byte[charCount >>> 1/*aka /2*/]; +- int byteIndex = 0/*beginning*/; +- for(int i=offset; ibyte. +- * This cannot be a character other than [a-fA-F0-9]. +- * @return the value of the specified character. This will be a value 0 +- * through 15. +- * @throws IllegalArgumentException if the specified character is not in +- * [a-fA-F0-9] +- */ +- private static final int digit(final char character) { +- switch(character) { +- case '0': +- return 0; +- case '1': +- return 1; +- case '2': +- return 2; +- case '3': +- return 3; +- case '4': +- return 4; +- case '5': +- return 5; +- case '6': +- return 6; +- case '7': +- return 7; +- case '8': +- return 8; +- case '9': +- return 9; +- case 'a': +- case 'A': +- return 10; +- case 'b': +- case 'B': +- return 11; +- case 'c': +- case 'C': +- return 12; +- case 'd': +- case 'D': +- return 13; +- case 'e': +- case 'E': +- return 14; +- case 'f': +- case 'F': +- return 15; +- +- default: +- throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "')."); +- } +- } +-} +\ Manca newline alla fine del file +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,24 +0,0 @@ +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A fork of Java-HyperLogLog package tweaked +- * not to depend on fastutil and with cleanups to make it lean and clean. +- */ +-package org.apache.solr.util.hll; +- +- +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 2015-07-16 13:22:50.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,154 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A concrete {@link ISchemaVersion} representing schema version one. +- */ +-class SchemaVersionOne implements ISchemaVersion { +- /** +- * The schema version number for this instance. +- */ +- public static final int SCHEMA_VERSION = 1; +- +- // ------------------------------------------------------------------------ +- // Version-specific ordinals (array position) for each of the HLL types +- private static final HLLType[] TYPE_ORDINALS = new HLLType[] { +- HLLType.EMPTY, +- HLLType.EXPLICIT, +- HLLType.SPARSE, +- HLLType.FULL +- }; +- +- // ------------------------------------------------------------------------ +- // number of header bytes for all HLL types +- private static final int HEADER_BYTE_COUNT = 3; +- +- // sentinel values from the spec for explicit off and auto +- private static final int EXPLICIT_OFF = 0; +- private static final int EXPLICIT_AUTO = 63; +- +- // ************************************************************************ +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType) +- */ +- @Override +- public int paddingBytes(final HLLType type) { +- return HEADER_BYTE_COUNT; +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata) +- */ +- @Override +- public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) { +- final HLLType type = metadata.HLLType(); +- final int typeOrdinal = getOrdinal(type); +- +- final int explicitCutoffValue; +- if(metadata.explicitOff()) { +- explicitCutoffValue = EXPLICIT_OFF; +- } else if(metadata.explicitAuto()) { +- explicitCutoffValue = EXPLICIT_AUTO; +- } else { +- explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/; +- } +- +- bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal); +- bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2()); +- bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled()); +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[]) +- */ +- @Override +- public IHLLMetadata readMetadata(final byte[] bytes) { +- final byte versionByte = bytes[0]; +- final byte parametersByte = bytes[1]; +- final byte cutoffByte = bytes[2]; +- +- final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte); +- final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte); +- final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF); +- final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO); +- final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/); +- +- return new HLLMetadata(SCHEMA_VERSION, +- getType(typeOrdinal), +- SerializationUtil.registerCountLog2(parametersByte), +- SerializationUtil.registerWidth(parametersByte), +- log2ExplicitCutoff, +- explicitOff, +- explicitAuto, +- SerializationUtil.sparseEnabled(cutoffByte)); +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int) +- */ +- @Override +- public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) { +- return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type)); +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[]) +- */ +- @Override +- public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) { +- return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes); +- } +- +- /* (non-Javadoc) +- * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber() +- */ +- @Override +- public int schemaVersionNumber() { +- return SCHEMA_VERSION; +- } +- +- // ======================================================================== +- // Type/Ordinal lookups +- /** +- * Gets the ordinal for the specified {@link HLLType}. +- * +- * @param type the type whose ordinal is desired +- * @return the ordinal for the specified type, to be used in the version byte. +- * This will always be non-negative. +- */ +- private static int getOrdinal(final HLLType type) { +- for(int i=0; inull. +- */ +- private static HLLType getType(final int ordinal) { +- if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) { +- throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed."); +- } +- return TYPE_ORDINALS[ordinal]; +- } +-} +diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java +--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 2015-07-16 12:32:07.000000000 +0200 ++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 1970-01-01 01:00:00.000000000 +0100 +@@ -1,277 +0,0 @@ +-package org.apache.solr.util.hll; +- +-/* +- * Licensed to the Apache Software Foundation (ASF) under one or more +- * contributor license agreements. See the NOTICE file distributed with +- * this work for additional information regarding copyright ownership. +- * The ASF licenses this file to You under the Apache License, Version 2.0 +- * (the "License"); you may not use this file except in compliance with +- * the License. You may obtain a copy of the License at +- * +- * http://www.apache.org/licenses/LICENSE-2.0 +- * +- * Unless required by applicable law or agreed to in writing, software +- * distributed under the License is distributed on an "AS IS" BASIS, +- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- * See the License for the specific language governing permissions and +- * limitations under the License. +- */ +- +-/** +- * A collection of constants and utilities for serializing and deserializing +- * HLLs. +- * +- * NOTE: 'package' visibility is used for many methods that only need to be +- * used by the {@link ISchemaVersion} implementations. The structure of +- * a serialized HLL's metadata should be opaque to the rest of the +- * library. +- */ +-class SerializationUtil { +- /** +- * The number of bits (of the parameters byte) dedicated to encoding the +- * width of the registers. +- */ +- /*package*/ static int REGISTER_WIDTH_BITS = 3; +- +- /** +- * A mask to cap the maximum value of the register width. +- */ +- /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1; +- +- /** +- * The number of bits (of the parameters byte) dedicated to encoding +- * log2(registerCount). +- */ +- /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5; +- +- /** +- * A mask to cap the maximum value of log2(registerCount). +- */ +- /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1; +- +- /** +- * The number of bits (of the cutoff byte) dedicated to encoding the +- * log-base-2 of the explicit cutoff or sentinel values for +- * 'explicit-disabled' or 'auto'. +- */ +- /*package*/ static int EXPLICIT_CUTOFF_BITS = 6; +- +- /** +- * A mask to cap the maximum value of the explicit cutoff choice. +- */ +- /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1; +- +- /** +- * Number of bits in a nibble. +- */ +- private static int NIBBLE_BITS = 4; +- +- /** +- * A mask to cap the maximum value of a nibble. +- */ +- private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1; +- +- // ************************************************************************ +- // Serialization utilities +- +- /** +- * Schema version one (v1). +- */ +- public static ISchemaVersion VERSION_ONE = new SchemaVersionOne(); +- +- /** +- * The default schema version for serializing HLLs. +- */ +- public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE; +- +- /** +- * List of registered schema versions, indexed by their version numbers. If +- * an entry is null, then no such schema version is registered. +- * Similarly, registering a new schema version simply entails assigning an +- * {@link ISchemaVersion} instance to the appropriate index of this array.

+- * +- * By default, only {@link SchemaVersionOne} is registered. Note that version +- * zero will always be reserved for internal (e.g. proprietary, legacy) schema +- * specifications/implementations and will never be assigned to in by this +- * library. +- */ +- public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16]; +- +- static { +- REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE; +- } +- +- /** +- * @param schemaVersionNumber the version number of the {@link ISchemaVersion} +- * desired. This must be a registered schema version number. +- * @return The {@link ISchemaVersion} for the given number. This will never +- * be null. +- */ +- public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) { +- if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) { +- throw new RuntimeException("Invalid schema version number " + schemaVersionNumber); +- } +- final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber]; +- if(schemaVersion == null) { +- throw new RuntimeException("Unknown schema version number " + schemaVersionNumber); +- } +- return schemaVersion; +- } +- +- /** +- * Get the appropriate {@link ISchemaVersion schema version} for the specified +- * serialized HLL. +- * +- * @param bytes the serialized HLL whose schema version is desired. +- * @return the schema version for the specified HLL. This will never +- * be null. +- */ +- public static ISchemaVersion getSchemaVersion(final byte[] bytes) { +- final byte versionByte = bytes[0]; +- final int schemaVersionNumber = schemaVersion(versionByte); +- +- return getSchemaVersion(schemaVersionNumber); +- } +- +- // ************************************************************************ +- // Package-specific shared helpers +- +- /** +- * Generates a byte that encodes the schema version and the type ordinal +- * of the HLL. +- * +- * The top nibble is the schema version and the bottom nibble is the type +- * ordinal. +- * +- * @param schemaVersion the schema version to encode. +- * @param typeOrdinal the type ordinal of the HLL to encode. +- * @return the packed version byte +- */ +- public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) { +- return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal)); +- } +- /** +- * Generates a byte that encodes the log-base-2 of the explicit cutoff +- * or sentinel values for 'explicit-disabled' or 'auto', as well as the +- * boolean indicating whether to use {@link HLLType#SPARSE} +- * in the promotion hierarchy. +- * +- * The top bit is always padding, the second highest bit indicates the +- * 'sparse-enabled' boolean, and the lowest six bits encode the explicit +- * cutoff value. +- * +- * @param explicitCutoff the explicit cutoff value to encode. +- *

    +- *
  • +- * If 'explicit-disabled' is chosen, this value should be 0. +- *
  • +- *
  • +- * If 'auto' is chosen, this value should be 63. +- *
  • +- *
  • +- * If a cutoff of 2n is desired, for 0 <= n < 31, +- * this value should be n + 1. +- *
  • +- *
+- * @param sparseEnabled whether {@link HLLType#SPARSE} +- * should be used in the promotion hierarchy to improve HLL +- * storage. +- * +- * @return the packed cutoff byte +- */ +- public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) { +- final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0); +- return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff)); +- } +- +- /** +- * Generates a byte that encodes the parameters of a +- * {@link HLLType#FULL} or {@link HLLType#SPARSE} +- * HLL.

+- * +- * The top 3 bits are used to encode registerWidth - 1 +- * (range of registerWidth is thus 1-9) and the bottom 5 +- * bits are used to encode registerCountLog2 +- * (range of registerCountLog2 is thus 0-31). +- * +- * @param registerWidth the register width (must be at least 1 and at +- * most 9) +- * @param registerCountLog2 the log-base-2 of the register count (must +- * be at least 0 and at most 31) +- * @return the packed parameters byte +- */ +- public static byte packParametersByte(final int registerWidth, final int registerCountLog2) { +- final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK); +- final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK); +- return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits); +- } +- +- /** +- * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized +- * HLL. +- * +- * @param cutoffByte the cutoff byte of the serialized HLL +- * @return the 'sparse-enabled' boolean +- */ +- public static boolean sparseEnabled(final byte cutoffByte) { +- return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1; +- } +- +- /** +- * Extracts the explicit cutoff value from the cutoff byte of a serialized +- * HLL. +- * +- * @param cutoffByte the cutoff byte of the serialized HLL +- * @return the explicit cutoff value +- */ +- public static int explicitCutoff(final byte cutoffByte) { +- return (cutoffByte & EXPLICIT_CUTOFF_MASK); +- } +- +- /** +- * Extracts the schema version from the version byte of a serialized +- * HLL. +- * +- * @param versionByte the version byte of the serialized HLL +- * @return the schema version of the serialized HLL +- */ +- public static int schemaVersion(final byte versionByte) { +- return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS); +- } +- +- /** +- * Extracts the type ordinal from the version byte of a serialized HLL. +- * +- * @param versionByte the version byte of the serialized HLL +- * @return the type ordinal of the serialized HLL +- */ +- public static int typeOrdinal(final byte versionByte) { +- return (versionByte & NIBBLE_MASK); +- } +- +- /** +- * Extracts the register width from the parameters byte of a serialized +- * {@link HLLType#FULL} HLL. +- * +- * @param parametersByte the parameters byte of the serialized HLL +- * @return the register width of the serialized HLL +- * +- * @see #packParametersByte(int, int) +- */ +- public static int registerWidth(final byte parametersByte) { +- return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1; +- } +- +- /** +- * Extracts the log2(registerCount) from the parameters byte of a +- * serialized {@link HLLType#FULL} HLL. +- * +- * @param parametersByte the parameters byte of the serialized HLL +- * @return log2(registerCount) of the serialized HLL +- * +- * @see #packParametersByte(int, int) +- */ +- public static int registerCountLog2(final byte parametersByte) { +- return (parametersByte & LOG2_REGISTER_COUNT_MASK); +- } +-} diff --git a/solr-repack.sh b/solr-repack.sh index b50c285..1df1a0b 100644 --- a/solr-repack.sh +++ b/solr-repack.sh @@ -6,7 +6,7 @@ if [ $# -ne 1 ] ; then fi VERSION=$1 rm -Rf solr-$VERSION-clean.tar.xz -#wget http://www.apache.org/dist/lucene/solr/$VERSION/solr-$VERSION-src.tgz +wget http://www.apache.org/dist/lucene/solr/$VERSION/solr-$VERSION-src.tgz tar -xf solr-$VERSION-src.tgz find solr-$VERSION -name "*.class" -print -delete @@ -23,8 +23,8 @@ pushd solr-$VERSION wget -O pom.xml http://central.maven.org/maven2/org/apache/solr/solr-${p}/${VERSION}/solr-${p}-${VERSION}.pom popd done -# analytics - for p in analysis-extras clustering dataimporthandler dataimporthandler-extras \ + + for p in analysis-extras analytics clustering dataimporthandler dataimporthandler-extras \ langid map-reduce morphlines-cell morphlines-core uima velocity; do mkdir -p solr/contrib/${p} pushd solr/contrib/${p} @@ -40,4 +40,4 @@ pushd solr-$VERSION popd tar -cJf solr-$VERSION-clean.tar.xz solr-$VERSION -#rm -rf solr-$VERSION +rm -rf solr-$VERSION diff --git a/solr.spec b/solr.spec index 2c27dc3..b270ca9 100644 --- a/solr.spec +++ b/solr.spec @@ -4,14 +4,13 @@ # Unavailable deps %bcond_with randomizedtesting %bcond_with uima -%bcond_with webapp %bcond_with kite %endif Name: solr -Version: 4.10.4 -Release: 2%{?dist} +Version: 5.3.0 +Release: 1%{?dist} Summary: Ultra-fast Lucene-based Search Server # MIT/X11 (BSD like) solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java License: ASL 2.0 and BSD @@ -19,50 +18,131 @@ URL: http://lucene.apache.org/solr/ # Use solr-repack.sh Source0: %{name}-%{version}-clean.tar.xz Source2: solr-repack.sh -# https://issues.apache.org/jira/browse/SOLR-4839 -Patch0: solr-4.10.4-SOLR-4839.patch +Patch0: solr-5.3.0-use-system-hll.patch +Patch1: solr-5.3.0-jetty9.3.3.patch +BuildRequires: maven-local +BuildRequires: mvn(com.adobe.xmp:xmpcore) +BuildRequires: mvn(com.carrotsearch:hppc) BuildRequires: mvn(com.cybozu.labs:langdetect) +BuildRequires: mvn(com.drewnoakes:metadata-extractor:2) +BuildRequires: mvn(com.fasterxml.jackson.core:jackson-core) +BuildRequires: mvn(com.fasterxml.jackson.dataformat:jackson-dataformat-smile) BuildRequires: mvn(com.google.guava:guava) +BuildRequires: mvn(com.google.protobuf:protobuf-java) BuildRequires: mvn(com.googlecode.concurrentlinkedhashmap:concurrentlinkedhashmap-lru) +BuildRequires: mvn(com.googlecode.juniversalchardet:juniversalchardet) BuildRequires: mvn(com.ibm.icu:icu4j) +BuildRequires: mvn(com.pff:java-libpst) BuildRequires: mvn(com.spatial4j:spatial4j) BuildRequires: mvn(com.sun.mail:gimap) BuildRequires: mvn(com.sun.mail:javax.mail) +# https://bugzilla.redhat.com/show_bug.cgi?id=1242405 +BuildRequires: mvn(com.tdunning:t-digest) +BuildRequires: mvn(com.thoughtworks.paranamer:paranamer) +BuildRequires: mvn(de.l3s.boilerpipe:boilerpipe) +BuildRequires: mvn(dom4j:dom4j) BuildRequires: mvn(org.aspectj:aspectjrt) -BuildRequires: mvn(org.codelibs:jhighlight) BuildRequires: mvn(commons-cli:commons-cli) BuildRequires: mvn(commons-codec:commons-codec) +BuildRequires: mvn(commons-collections:commons-collections) +BuildRequires: mvn(commons-configuration:commons-configuration) BuildRequires: mvn(commons-fileupload:commons-fileupload) BuildRequires: mvn(commons-io:commons-io) BuildRequires: mvn(commons-lang:commons-lang) +BuildRequires: mvn(dom4j:dom4j) +BuildRequires: mvn(io.netty:netty:3) BuildRequires: mvn(jakarta-regexp:jakarta-regexp) -BuildRequires: mvn(javax.servlet:servlet-api) +BuildRequires: mvn(javax.servlet:javax.servlet-api) +BuildRequires: mvn(jdom:jdom) BuildRequires: mvn(joda-time:joda-time) BuildRequires: mvn(log4j:log4j:1.2.17) +BuildRequires: mvn(net.agkn:hll) BuildRequires: mvn(net.arnx:jsonic) -BuildRequires: mvn(net.sourceforge.nekohtml:nekohtml) +BuildRequires: mvn(net.sourceforge.jmatio:jmatio) BuildRequires: mvn(org.antlr:antlr-runtime) +BuildRequires: mvn(org.apache:apache:pom:) +BuildRequires: mvn(org.apache.ant:ant) +BuildRequires: mvn(org.apache.commons:commons-exec) BuildRequires: mvn(org.apache.commons:commons-compress) +BuildRequires: mvn(org.apache.felix:maven-bundle-plugin) +# https://bugzilla.redhat.com/show_bug.cgi?id=1235420 BuildRequires: mvn(org.apache.hadoop:hadoop-annotations) BuildRequires: mvn(org.apache.hadoop:hadoop-auth) BuildRequires: mvn(org.apache.hadoop:hadoop-common) BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs) BuildRequires: mvn(org.apache.httpcomponents:httpclient) BuildRequires: mvn(org.apache.httpcomponents:httpmime) +BuildRequires: mvn(org.apache.lucene:lucene-analysis-modules-aggregator:pom:) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-common) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-icu) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-kuromoji) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-morfologik) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-phonetic) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-smartcn) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-stempel) +BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima) +BuildRequires: mvn(org.apache.lucene:lucene-backward-codecs) +BuildRequires: mvn(org.apache.lucene:lucene-codecs) +BuildRequires: mvn(org.apache.lucene:lucene-core) +BuildRequires: mvn(org.apache.lucene:lucene-expressions) +BuildRequires: mvn(org.apache.lucene:lucene-grouping) +BuildRequires: mvn(org.apache.lucene:lucene-highlighter) +BuildRequires: mvn(org.apache.lucene:lucene-join) +BuildRequires: mvn(org.apache.lucene:lucene-memory) +BuildRequires: mvn(org.apache.lucene:lucene-misc) +BuildRequires: mvn(org.apache.lucene:lucene-parent:pom:) +BuildRequires: mvn(org.apache.lucene:lucene-queries) +BuildRequires: mvn(org.apache.lucene:lucene-queryparser) +BuildRequires: mvn(org.apache.lucene:lucene-replicator) +BuildRequires: mvn(org.apache.lucene:lucene-sandbox) +BuildRequires: mvn(org.apache.lucene:lucene-spatial) +BuildRequires: mvn(org.apache.lucene:lucene-suggest) +BuildRequires: mvn(org.apache.lucene:lucene-test-framework) +BuildRequires: mvn(org.apache.maven.plugins:maven-enforcer-plugin) +BuildRequires: mvn(org.apache.maven.plugins:maven-remote-resources-plugin) +BuildRequires: mvn(org.apache.maven.plugins:maven-site-plugin) +BuildRequires: mvn(org.apache.james:apache-mime4j-core) +BuildRequires: mvn(org.apache.james:apache-mime4j-dom) +BuildRequires: mvn(org.apache.james:james-project:pom:) +BuildRequires: mvn(org.apache.pdfbox:fontbox) +BuildRequires: mvn(org.apache.pdfbox:jempbox) +BuildRequires: mvn(org.apache.pdfbox:pdfbox) +BuildRequires: mvn(org.apache.poi:poi) +BuildRequires: mvn(org.apache.poi:poi-ooxml) +BuildRequires: mvn(org.apache.poi:poi-ooxml-schemas) +BuildRequires: mvn(org.apache.poi:poi-scratchpad) +BuildRequires: mvn(org.apache.tika:tika-core) BuildRequires: mvn(org.apache.tika:tika-parsers) BuildRequires: mvn(org.apache.velocity:velocity) BuildRequires: mvn(org.apache.velocity:velocity-tools) BuildRequires: mvn(org.apache.zookeeper:zookeeper) +BuildRequires: mvn(org.apache.xmlbeans:xmlbeans) +BuildRequires: mvn(org.aspectj:aspectjrt) +BuildRequires: mvn(org.carrot2:morfologik-fsa) BuildRequires: mvn(org.carrot2:morfologik-polish) +BuildRequires: mvn(org.carrot2:morfologik-stemming) +BuildRequires: mvn(org.ccil.cowan.tagsoup:tagsoup) +# https://bugzilla.redhat.com/show_bug.cgi?id=1237324 +BuildRequires: mvn(org.cloudera.htrace:htrace-core) +BuildRequires: mvn(org.codehaus.mojo:buildnumber-maven-plugin) +BuildRequires: mvn(org.codehaus.woodstox:stax2-api) BuildRequires: mvn(org.codehaus.woodstox:woodstox-core-asl) +BuildRequires: mvn(org.eclipse.jetty:jetty-continuation) BuildRequires: mvn(org.eclipse.jetty:jetty-deploy) +BuildRequires: mvn(org.eclipse.jetty:jetty-http) +BuildRequires: mvn(org.eclipse.jetty:jetty-io) BuildRequires: mvn(org.eclipse.jetty:jetty-jmx) +BuildRequires: mvn(org.eclipse.jetty:jetty-rewrite) +BuildRequires: mvn(org.eclipse.jetty:jetty-security) BuildRequires: mvn(org.eclipse.jetty:jetty-server) BuildRequires: mvn(org.eclipse.jetty:jetty-servlet) BuildRequires: mvn(org.eclipse.jetty:jetty-servlets) BuildRequires: mvn(org.eclipse.jetty:jetty-util) BuildRequires: mvn(org.eclipse.jetty:jetty-webapp) +BuildRequires: mvn(org.eclipse.jetty:jetty-xml) +BuildRequires: mvn(org.gagravarr:vorbis-java-tika) +BuildRequires: mvn(org.hamcrest:hamcrest-core) BuildRequires: mvn(org.noggit:noggit) BuildRequires: mvn(org.ow2.asm:asm) BuildRequires: mvn(org.ow2.asm:asm-commons) @@ -72,13 +152,10 @@ BuildRequires: mvn(org.slf4j:jcl-over-slf4j) BuildRequires: mvn(org.slf4j:jul-to-slf4j) BuildRequires: mvn(org.slf4j:slf4j-api) BuildRequires: mvn(org.slf4j:slf4j-log4j12) +BuildRequires: mvn(org.tukaani:xz) +BuildRequires: mvn(rome:rome) BuildRequires: mvn(xerces:xercesImpl) -%if %{?fedora} > 20 -BuildRequires: mvn(io.netty:netty:3) -%else -BuildRequires: mvn(io.netty:netty) -%endif # Optional? %if %{with carrot2} @@ -86,12 +163,14 @@ BuildRequires: mvn(io.netty:netty) # work in progress ... circular deps BuildRequires: mvn(org.carrot2:carrot2-mini:3.8.0) %endif + %if %{without randomizedtesting} # {lucene,solr}/test-framework BuildRequires: mvn(com.carrotsearch.randomizedtesting:junit4-ant) BuildRequires: mvn(com.carrotsearch.randomizedtesting:randomizedtesting-runner) BuildRequires: mvn(junit:junit) %endif + %if %{without uima} # {lucene/analysis,solr/contrib}/uima BuildRequires: mvn(org.apache.uima:uimaj-core) @@ -102,10 +181,7 @@ BuildRequires: mvn(org.apache.uima:WhitespaceTokenizer) BuildRequires: mvn(org.apache.uima:parent-pom:pom:) BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima) %endif -%if %{with webapp} -# solr/webapp -BuildRequires: mvn(org.eclipse.jetty.orbit:javax.servlet) -%endif + %if %{with kite} # solr/contrib/{map-reduce,morphlines-cell,morphlines-core} BuildRequires: mvn(com.codahale.metrics:metrics-core) @@ -129,52 +205,57 @@ BuildRequires: mvn(org.kitesdk:kite-morphlines-saxon) %endif %endif -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-common) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-icu) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-kuromoji) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-morfologik) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-phonetic) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-smartcn) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-stempel) -BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima) -BuildRequires: mvn(org.apache.lucene:lucene-codecs) -BuildRequires: mvn(org.apache.lucene:lucene-core) -BuildRequires: mvn(org.apache.lucene:lucene-expressions) -BuildRequires: mvn(org.apache.lucene:lucene-grouping) -BuildRequires: mvn(org.apache.lucene:lucene-highlighter) -BuildRequires: mvn(org.apache.lucene:lucene-join) -BuildRequires: mvn(org.apache.lucene:lucene-memory) -BuildRequires: mvn(org.apache.lucene:lucene-misc) -BuildRequires: mvn(org.apache.lucene:lucene-queries) -BuildRequires: mvn(org.apache.lucene:lucene-queryparser) -BuildRequires: mvn(org.apache.lucene:lucene-replicator) -BuildRequires: mvn(org.apache.lucene:lucene-spatial) -BuildRequires: mvn(org.apache.lucene:lucene-suggest) - -BuildRequires: mvn(org.apache.lucene:lucene-analysis-modules-aggregator:pom:) -BuildRequires: mvn(org.apache.lucene:lucene-parent:pom:) -BuildRequires: mvn(org.apache.lucene:lucene-test-framework) - %if 0 # test deps -BuildRequires: mvn(dom4j:dom4j) +BuildRequires: mvn(aopalliance:aopalliance) +BuildRequires: mvn(com.fasterxml.jackson.core:jackson-annotations) +BuildRequires: mvn(com.fasterxml.jackson.core:jackson-databind) +BuildRequires: mvn(com.google.inject:guice) +BuildRequires: mvn(com.google.inject.extensions:guice-servlet) +BuildRequires: mvn(com.sun.jersey:jersey-bundle:1) +BuildRequires: mvn(com.sun.jersey:jersey-core:1) +BuildRequires: mvn(com.sun.jersey:jersey-json:1) +BuildRequires: mvn(com.sun.jersey:jersey-server:1) +BuildRequires: mvn(com.sun.jersey.contribs:jersey-guice:1) +BuildRequires: mvn(com.sun.xml.bind:jaxb-impl) BuildRequires: mvn(hsqldb:hsqldb:1.8.0.10) -BuildRequires: mvn(org.apache.ant:ant:1.8.2) +BuildRequires: mvn(org.apache.avro:avro) +BuildRequires: mvn(org.apache.curator:curator-client) +BuildRequires: mvn(org.apache.curator:curator-framework) BuildRequires: mvn(org.apache.derby:derby:10.9.1.0) -BuildRequires: mvn(org.apache.hadoop:hadoop-common:tests:2.0.5-alpha) -BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs:tests:2.0.5-alpha) +BuildRequires: mvn(org.apache.hadoop:hadoop-common:2.6.0:tests:) +BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs:2.6.0:tests:) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-app) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-common) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-hs) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-jobclient) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.0:tests:) +BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-shuffle) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-api) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-client) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-common) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-applicationhistoryservice) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-common) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-nodemanager) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-resourcemanager) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-tests:2.6.0:tests:) +BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-web-proxy) +BuildRequires: mvn(org.apache.mrunit:mrunit::hadoop2:) +BuildRequires: mvn(org.codehaus.jackson:jackson-core-asl) +BuildRequires: mvn(org.codehaus.jackson:jackson-jaxrs) +BuildRequires: mvn(org.codehaus.jackson:jackson-mapper-asl) BuildRequires: mvn(org.easymock:easymock:3.0) +BuildRequires: mvn(org.fusesource.leveldbjni:leveldbjni) +BuildRequires: mvn(org.iq80.leveldb:leveldb) +BuildRequires: mvn(org.iq80.leveldb:leveldb-api) +BuildRequires: mvn(org.kitesdk:kite-morphlines-core::tests:) +BuildRequires: mvn(org.mockito:mockito-core) BuildRequires: mvn(org.mortbay.jetty:jetty:6.1.26) BuildRequires: mvn(org.mortbay.jetty:jetty-util:6.1.26) -%endif -BuildRequires: mvn(org.hamcrest:hamcrest-core) +BuildRequires: mvn(org.objenesis:objenesis) +BuildRequires: mvn(org.xerial.snappy:snappy-java) -BuildRequires: buildnumber-maven-plugin -BuildRequires: maven-local -BuildRequires: maven-enforcer-plugin -BuildRequires: maven-plugin-bundle -BuildRequires: maven-remote-resources-plugin -BuildRequires: maven-site-plugin +%endif BuildArch: noarch @@ -198,10 +279,17 @@ find . -name "*.class" -print -delete find . -name "*.jar" -print -delete find . -name "*.js" -print -delete +%patch0 -p1 +rm -rf solr/core/src/java/org/apache/solr/util/hll +%pom_add_dep net.agkn:hll:1.6.0 solr/core + +%if %{?fedora} > 23 +%patch1 -p1 +%endif + cp -p dev-tools/maven/solr/pom.xml.template solr/pom.xml cp -p dev-tools/maven/solr/contrib/pom.xml.template solr/contrib/pom.xml -cp -p dev-tools/maven/solr/webapp/pom.xml.template solr/webapp/pom.xml -sed -i "s/@version@/%{version}/g" solr/pom.xml solr/contrib/pom.xml solr/webapp/pom.xml +sed -i "s/@version@/%{version}/g" solr/pom.xml solr/contrib/pom.xml # Fix parent pom sed -i "s|../../../pom.xml|../pom.xml|" $(find solr -name "pom.xml") @@ -210,7 +298,7 @@ sed -i "s|../../../pom.xml|../pom.xml sed -i 's|${module-path}|${basedir}/src/java|' \ solr/core/pom.xml solr/solrj/pom.xml -for p in solr/test-framework/pom.xml solr/contrib/analysis-extras/pom.xml \ +for p in solr/test-framework/pom.xml solr/contrib/analytics/pom.xml solr/contrib/analysis-extras/pom.xml \ solr/contrib/clustering/pom.xml solr/contrib/dataimporthandler/pom.xml solr/contrib/dataimporthandler-extras/pom.xml \ solr/contrib/extraction/pom.xml solr/contrib/langid/pom.xml solr/contrib/uima/pom.xml solr/contrib/velocity/pom.xml \ solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml; do @@ -220,7 +308,7 @@ done sed -i 's|${module-path}/src/resources|${basedir}/src/resources|' \ solr/contrib/uima/pom.xml \ - solr/contrib/clustering/pom.xml \ + solr/contrib/analytics/pom.xml solr/contrib/clustering/pom.xml \ solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml sed -i 's|${module-path}/src/test-files|${basedir}/src/test-files|' \ @@ -234,16 +322,15 @@ sed -i 's|${module-path}|${basedir}/src/java${module-path}|${basedir}/src/test|' \ solr/solrj/pom.xml -# Remove unavailable plugins -%pom_remove_plugin org.codehaus.gmaven:gmaven-plugin -%pom_remove_plugin de.thetaphi:forbiddenapis -for m in solr solr/core solr/solrj solr/test-framework \ - solr/contrib/extraction solr/contrib/velocity solr/contrib/uima solr/contrib/langid solr/webapp \ - solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml; do +sed -i 's|now.timestamp|maven.build.timestamp|' pom.xml +sed -i 's|@spec.version@|${project.version}|' pom.xml -%pom_remove_plugin de.thetaphi:forbiddenapis ${m} +# Disable lucene +%pom_disable_module lucene -done +# Remove unavailable plugins +%pom_remove_plugin org.codehaus.gmaven:gmaven-plugin +%pom_remove_plugin -r de.thetaphi:forbiddenapis %pom_xpath_inject "pom:build/pom:pluginManagement/pom:plugins/pom:plugin[pom:artifactId = 'maven-javadoc-plugin' ]" ' @@ -274,39 +361,18 @@ done %if %{with uima} %pom_disable_module uima solr/contrib %endif -%if %{without webapp} -# Require JQuery.js and other js libraries https://bugzilla.redhat.com/show_bug.cgi?id=857992 -%pom_disable_module webapp solr -%endif + %if %{without kite} %pom_disable_module map-reduce solr/contrib %pom_disable_module morphlines-cell solr/contrib %pom_disable_module morphlines-core solr/contrib %endif -# Fix aId -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/core -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/solrj -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/test-framework -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/analysis-extras -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/langid -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/dataimporthandler -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/dataimporthandler-extras -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/extraction -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/uima -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/velocity -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/morphlines-core -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/morphlines-cell -%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/map-reduce - # Use system jvm apis -#%%pom_remove_dep javax.activation:activation solr/contrib/dataimporthandler %pom_remove_dep javax.activation:activation solr/contrib/dataimporthandler-extras + # Remove fake BR -%pom_remove_dep com.googlecode.mp4parser:isoparser -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/extraction -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/langid -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/dataimporthandler-extras +%pom_remove_dep -r com.googlecode.mp4parser:isoparser %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/dataimporthandler-extras %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/extraction %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/langid @@ -321,76 +387,69 @@ done %pom_remove_dep org.bouncycastle:bcprov-jdk15 solr/contrib/langid %pom_remove_dep org.bouncycastle: solr/contrib/morphlines-core %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/morphlines-core -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/morphlines-core %pom_remove_dep org.bouncycastle: solr/contrib/morphlines-cell %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/morphlines-cell -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/morphlines-cell %pom_remove_dep org.bouncycastle: solr/contrib/map-reduce %pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/map-reduce -%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/map-reduce %pom_remove_dep net.sf.saxon:Saxon-HE solr/contrib/map-reduce %pom_remove_dep org.kitesdk:kite-morphlines-saxon solr/contrib/map-reduce %pom_remove_dep org.apache.tika:tika-xmp solr/contrib/map-reduce %pom_remove_dep org.apache.tika:tika-xmp solr/contrib/morphlines-cell %pom_remove_dep org.apache.tika:tika-xmp solr/contrib/morphlines-core +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/dataimporthandler-extras +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/extraction +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/langid +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/map-reduce +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/morphlines-cell +%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/morphlines-core -%if %{?fedora} > 20 -%pom_xpath_set "pom:dependencyManagement/pom:dependencies/pom:dependency[pom:groupId = 'io.netty']/pom:version" 3 -%endif - -# Disable lucene -%pom_disable_module lucene - -# Add jetty9 support -%patch0 -p0 -%pom_xpath_set "pom:properties/pom:jetty.version" 9.0.5.v20130815 -%pom_add_dep org.eclipse.jetty:jetty-servlets solr/core -%pom_remove_dep org.eclipse.jetty.orbit:javax.servlet solr/core -%pom_add_dep javax.servlet:javax.servlet-api solr/core - +%pom_xpath_set "pom:dependency[pom:groupId = 'io.netty']/pom:version" 3 # fix log4j version -sed -i "s|1.2.16|1.2.17|" pom.xml -for p in solr/core \ - solr/test-framework \ - solr/contrib/analysis-extras \ - solr/contrib/dataimporthandler \ - solr/contrib/dataimporthandler-extras \ - solr/contrib/extraction \ - solr/contrib/langid \ - solr/contrib/uima \ - solr/contrib/velocity \ - solr/contrib/morphlines-core \ - solr/contrib/morphlines-cell \ - solr/contrib/map-reduce;do -%pom_xpath_inject "pom:dependencies/pom:dependency[pom:artifactId = 'log4j']" "1.2.17" ${p} +for p in core \ + test-framework \ + contrib/analysis-extras \ + contrib/analytics \ + contrib/clustering \ + contrib/dataimporthandler \ + contrib/dataimporthandler-extras \ + contrib/extraction \ + contrib/langid \ + contrib/uima \ + contrib/velocity \ + contrib/morphlines-core \ + contrib/morphlines-cell \ + contrib/map-reduce;do +%pom_xpath_inject "pom:dependency[pom:artifactId = 'log4j']" "1.2.17" solr/${p} done -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/dataimporthandler-extras -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/extraction -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/langid -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/map-reduce -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/morphlines-cell -%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/morphlines-core +# Use htrace >= 3.0.4 +%pom_xpath_set -r "pom:dependency[pom:artifactId = 'htrace-core']/pom:groupId" org.cloudera.htrace +# Use hadoop >= 2.6.0 +rm -r solr/core/src/java/org/apache/solr/security/KerberosFilter.java \ + solr/core/src/java/org/apache/solr/security/KerberosPlugin.java +sed -i "s|conf.addResource(TEST_CONF);||" \ + solr/core/src/java/org/apache/solr/util/HdfsUtil.java %build -# Test skipped for unavailable test deps {lucene,solr}/test-framework +# Test skipped for unavailable test deps %mvn_build -f %install %mvn_install %files -f .mfiles -%dir %{_javadir}/%{name} -%doc solr/CHANGES.txt README.txt +%doc solr/CHANGES.txt solr/README.txt %license LICENSE.txt NOTICE.txt %files javadoc -f .mfiles-javadoc %license LICENSE.txt NOTICE.txt %changelog +* Wed Sep 16 2015 gil cattaneo 5.3.0-1 +- update to 5.3.0 (rhbz#1240013,1235424) + * Fri Jun 19 2015 Fedora Release Engineering - 4.10.4-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild diff --git a/sources b/sources index bae9599..8e88dee 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -4376764b3f78579766ee7518b7f21e18 solr-4.10.4-clean.tar.xz +436cc205b1e58a4b6afb236042a355a5 solr-5.3.0-clean.tar.xz