diff --git a/.gitignore b/.gitignore index 9343925..1bc99b8 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ /solr-4.10.4-clean.tar.xz /solr-5.3.0-clean.tar.xz /solr-5.3.1-clean.tar.xz +/solr-5.4.0-clean.tar.xz diff --git a/solr-5.3.0-use-system-hll.patch b/solr-5.3.0-use-system-hll.patch deleted file mode 100644 index 01a596f..0000000 --- a/solr-5.3.0-use-system-hll.patch +++ /dev/null @@ -1,3154 +0,0 @@ -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-07-17 14:10:40.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-09-14 18:11:56.330769125 +0200 -@@ -55,8 +55,8 @@ - import org.apache.solr.search.QueryParsing; - import org.apache.solr.search.SolrIndexSearcher; - import org.apache.solr.search.SyntaxError; --import org.apache.solr.util.hll.HLL; --import org.apache.solr.util.hll.HLLType; -+import net.agkn.hll.HLL; -+import net.agkn.hll.HLLType; - - import com.google.common.hash.Hashing; - import com.google.common.hash.HashFunction; -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-07-17 14:10:40.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-09-14 18:12:12.835927178 +0200 -@@ -33,12 +33,12 @@ - import org.apache.solr.schema.*; - - import com.tdunning.math.stats.AVLTreeDigest; -+ -+import net.agkn.hll.HLL; -+import net.agkn.hll.HLLType; - import com.google.common.hash.Hashing; - import com.google.common.hash.HashFunction; - --import org.apache.solr.util.hll.HLL; --import org.apache.solr.util.hll.HLLType; -- - /** - * Factory class for creating instance of - * {@link org.apache.solr.handler.component.StatsValues} -@@ -824,7 +824,7 @@ - @Override - public long hash(String v) { - // NOTE: renamed hashUnencodedChars starting with guava 15 -- return hasher.hashString(v).asLong(); -+ return hasher.hashUnencodedChars(v).asLong(); - } - - @Override -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-07-17 14:10:40.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-09-15 07:26:56.977497188 +0200 -@@ -23,8 +23,8 @@ - import java.util.List; - import java.util.Set; - --import org.apache.solr.util.hll.HLL; --import org.apache.solr.util.hll.HLLType; -+import net.agkn.hll.HLL; -+import net.agkn.hll.HLLType; - import org.apache.lucene.index.DocValues; - import org.apache.lucene.index.LeafReaderContext; - import org.apache.lucene.index.NumericDocValues; -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-07-17 14:10:40.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-09-16 09:31:52.773252909 +0200 -@@ -21,7 +21,7 @@ - import java.util.ArrayList; - import java.util.List; - --import org.apache.solr.util.hll.HLL; -+import net.agkn.hll.HLL; - import org.apache.lucene.index.LeafReaderContext; - import org.apache.lucene.index.MultiDocValues; - import org.apache.lucene.index.SortedDocValues; -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,173 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}. -- */ --class BigEndianAscendingWordDeserializer implements IWordDeserializer { -- // The number of bits per byte. -- private static final int BITS_PER_BYTE = 8; -- -- // long mask for the maximum value stored in a byte -- private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L; -- -- // ************************************************************************ -- // The length in bits of the words to be read. -- private final int wordLength; -- -- // The byte array to which the words are serialized. -- private final byte[] bytes; -- -- // The number of leading padding bytes in 'bytes' to be ignored. -- private final int bytePadding; -- -- // The number of words that the byte array contains. -- private final int wordCount; -- -- // The current read state. -- private int currentWordIndex; -- -- // ======================================================================== -- /** -- * @param wordLength the length in bits of the words to be deserialized. Must -- * be less than or equal to 64 and greater than or equal to 1. -- * @param bytePadding the number of leading bytes that pad the serialized words. -- * Must be greater than or equal to zero. -- * @param bytes the byte array containing the serialized words. Cannot be -- * null. -- */ -- public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) { -- if((wordLength < 1) || (wordLength > 64)) { -- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); -- } -- -- if(bytePadding < 0) { -- throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")"); -- } -- -- this.wordLength = wordLength; -- this.bytes = bytes; -- this.bytePadding = bytePadding; -- -- final int dataBytes = (bytes.length - bytePadding); -- final long dataBits = (dataBytes * BITS_PER_BYTE); -- -- this.wordCount = (int)(dataBits/wordLength); -- -- currentWordIndex = 0; -- } -- -- // ======================================================================== -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IWordDeserializer#readWord() -- */ -- @Override -- public long readWord() { -- final long word = readWord(currentWordIndex); -- currentWordIndex++; -- -- return word; -- } -- -- // ------------------------------------------------------------------------ -- /** -- * Reads the word at the specified sequence position (zero-indexed). -- * -- * @param position the zero-indexed position of the word to be read. This -- * must be greater than or equal to zero. -- * @return the value of the serialized word at the specified position. -- */ -- private long readWord(final int position) { -- if(position < 0) { -- throw new ArrayIndexOutOfBoundsException(position); -- } -- -- // First bit of the word -- final long firstBitIndex = (position * wordLength); -- final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE)); -- final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE); -- -- // Last bit of the word -- final long lastBitIndex = (firstBitIndex + wordLength - 1); -- final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE)); -- final int lastByteBitsToConsume; -- -- final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE); -- // If the word terminates at the end of the last byte, consume the whole -- // last byte. -- if(bitsAfterByteBoundary == 0) { -- lastByteBitsToConsume = BITS_PER_BYTE; -- } else { -- // Otherwise, only consume what is necessary. -- lastByteBitsToConsume = bitsAfterByteBoundary; -- } -- -- if(lastByteIndex >= bytes.length) { -- throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array."); -- } -- -- // Accumulator -- long value = 0; -- -- // -------------------------------------------------------------------- -- // First byte -- final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits); -- final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength); -- long firstByte = (long)bytes[firstByteIndex]; -- -- // Mask off the bits to skip in the first byte. -- final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L); -- firstByte &= firstByteMask; -- // Right-align relevant bits of first byte. -- firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte); -- -- value |= firstByte; -- -- // If the first byte contains the whole word, short-circuit. -- if(firstByteIndex == lastByteIndex) { -- return value; -- } -- -- // -------------------------------------------------------------------- -- // Middle bytes -- final int middleByteCount = (lastByteIndex - firstByteIndex - 1); -- for(int i=0; i>= (BITS_PER_BYTE - lastByteBitsToConsume); -- value <<= lastByteBitsToConsume; -- value |= lastByte; -- return value; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount() -- */ -- @Override -- public int totalWordCount() { -- return wordCount; -- } --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,174 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A serializer that writes a sequence of fixed bit-width 'words' to a byte array. -- * Bitwise OR is used to write words into bytes, so a low bit in a word is also -- * a low bit in a byte. However, a high byte in a word is written at a lower index -- * in the array than a low byte in a word. The first word is written at the lowest -- * array index. Each serializer is one time use and returns its backing byte -- * array.

-- * -- * This encoding was chosen so that when reading bytes as octets in the typical -- * first-octet-is-the-high-nibble fashion, an octet-to-binary conversion -- * would yield a high-to-low, left-to-right view of the "short words".

-- * -- * Example:

-- * -- * Say short words are 5 bits wide. Our word sequence is the values -- * [31, 1, 5]. In big-endian binary format, the values are -- * [0b11111, 0b00001, 0b00101]. We use 15 of 16 bits in two bytes -- * and pad the last (lowest) bit of the last byte with a zero: -- * -- * -- * [0b11111000, 0b01001010] = [0xF8, 0x4A] -- * . -- */ --class BigEndianAscendingWordSerializer implements IWordSerializer { -- // The number of bits per byte. -- private static final int BITS_PER_BYTE = 8; -- -- // ************************************************************************ -- // The length in bits of the words to be written. -- private final int wordLength; -- // The number of words to be written. -- private final int wordCount; -- -- // The byte array to which the words are serialized. -- private final byte[] bytes; -- -- // ------------------------------------------------------------------------ -- // Write state -- // Number of bits that remain writable in the current byte. -- private int bitsLeftInByte; -- // Index of byte currently being written to. -- private int byteIndex; -- // Number of words written. -- private int wordsWritten; -- -- // ======================================================================== -- /** -- * @param wordLength the length in bits of the words to be serialized. Must -- * be greater than or equal to 1 and less than or equal to 64. -- * @param wordCount the number of words to be serialized. Must be greater than -- * or equal to zero. -- * @param bytePadding the number of leading bytes that should pad the -- * serialized words. Must be greater than or equal to zero. -- */ -- public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) { -- if((wordLength < 1) || (wordLength > 64)) { -- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")"); -- } -- if(wordCount < 0) { -- throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")"); -- } -- if(bytePadding < 0) { -- throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")"); -- } -- -- this.wordLength = wordLength; -- this.wordCount = wordCount; -- -- final long bitsRequired = (wordLength * wordCount); -- final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0); -- final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding; -- bytes = new byte[bytesRequired]; -- -- bitsLeftInByte = BITS_PER_BYTE; -- byteIndex = bytePadding; -- wordsWritten = 0; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IWordSerializer#writeWord(long) -- * @throws RuntimeException if the number of words written is greater than the -- * wordCount parameter in the constructor. -- */ -- @Override -- public void writeWord(final long word) { -- if(wordsWritten == wordCount) { -- throw new RuntimeException("Cannot write more words, backing array full!"); -- } -- -- int bitsLeftInWord = wordLength; -- -- while(bitsLeftInWord > 0) { -- // Move to the next byte if the current one is fully packed. -- if(bitsLeftInByte == 0) { -- byteIndex++; -- bitsLeftInByte = BITS_PER_BYTE; -- } -- -- final long consumedMask; -- if(bitsLeftInWord == 64) { -- consumedMask = ~0L; -- } else { -- consumedMask = ((1L << bitsLeftInWord) - 1L); -- } -- -- // Fix how many bits will be written in this cycle. Choose the -- // smaller of the remaining bits in the word or byte. -- final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord); -- final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite); -- -- // In general, we write the highest bits of the word first, so we -- // strip the highest bits that were consumed in previous cycles. -- final long remainingBitsOfWordToWrite = (word & consumedMask); -- -- final long bitsThatTheByteCanAccept; -- // If there is more left in the word than can be written to this -- // byte, shift off the bits that can't be written off the bottom. -- if(bitsLeftInWord > numberOfBitsToWrite) { -- bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte)); -- } else { -- // If the byte can accept all remaining bits, there is no need -- // to shift off the bits that won't be written in this cycle. -- bitsThatTheByteCanAccept = remainingBitsOfWordToWrite; -- } -- -- // Align the word bits to write up against the byte bits that have -- // already been written. This shift may do nothing if the remainder -- // of the byte is being consumed in this cycle. -- final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite); -- -- // Update the byte with the alignedBits. -- bytes[byteIndex] |= (byte)alignedBits; -- -- // Update state with bit count written. -- bitsLeftInWord -= numberOfBitsToWrite; -- bitsLeftInByte = bitsInByteRemainingAfterWrite; -- } -- -- wordsWritten ++; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IWordSerializer#getBytes() -- * @throws RuntimeException if the number of words written is fewer than the -- * wordCount parameter in the constructor. -- */ -- @Override -- public byte[] getBytes() { -- if(wordsWritten < wordCount) { -- throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")"); -- } -- -- return bytes; -- } --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,71 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A collection of bit utilities. -- */ --class BitUtil { -- /** -- * The set of least-significant bits for a given byte. -1 -- * is used if no bits are set (so as to not be confused with "index of zero" -- * meaning that the least significant bit is the 0th (1st) bit). -- * -- * @see #leastSignificantBit(long) -- */ -- private static final int[] LEAST_SIGNIFICANT_BIT = { -- -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, -- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -- }; -- -- /** -- * Computes the least-significant bit of the specified long -- * that is set to 1. Zero-indexed. -- * -- * @param value the long whose least-significant bit is desired. -- * @return the least-significant bit of the specified long. -- * -1 is returned if there are no bits set. -- */ -- // REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set -- // REF: http://www-graphics.stanford.edu/~seander/bithacks.html -- public static int leastSignificantBit(final long value) { -- if(value == 0L) return -1/*by contract*/; -- if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0; -- if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8; -- if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16; -- if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24; -- if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32; -- if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40; -- if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48; -- return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56; -- } --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,259 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A vector (array) of bits that is accessed in units ("registers") of width -- * bits which are stored as 64bit "words" (longs). In this context -- * a register is at most 64bits. -- */ --class BitVector implements Cloneable { -- // NOTE: in this context, a word is 64bits -- -- // rather than doing division to determine how a bit index fits into 64bit -- // words (i.e. longs), bit shifting is used -- private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/; -- private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD; -- private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1; -- -- // ditto from above but for bytes (for output) -- private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/; -- public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE; -- -- // ======================================================================== -- public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/; -- -- // ************************************************************************ -- // 64bit words -- private final long[] words; -- public final long[] words() { return words; } -- public final int wordCount() { return words.length; } -- public final int byteCount() { return wordCount() * BYTES_PER_WORD; } -- -- // the width of a register in bits (this cannot be more than 64 (the word size)) -- private final int registerWidth; -- public final int registerWidth() { return registerWidth; } -- -- private final long count; -- -- // ------------------------------------------------------------------------ -- private final long registerMask; -- -- // ======================================================================== -- /** -- * @param width the width of each register. This cannot be negative or -- * zero or greater than 63 (the signed word size). -- * @param count the number of registers. This cannot be negative or zero -- */ -- public BitVector(final int width, final long count) { -- // ceil((width * count)/BITS_PER_WORD) -- this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)]; -- this.registerWidth = width; -- this.count = count; -- -- this.registerMask = (1L << width) - 1; -- } -- -- // ======================================================================== -- /** -- * @param registerIndex the index of the register whose value is to be -- * retrieved. This cannot be negative. -- * @return the value at the specified register index -- * @see #setRegister(long, long) -- * @see #setMaxRegister(long, long) -- */ -- // NOTE: if this changes then setMaxRegister() must change -- public long getRegister(final long registerIndex) { -- final long bitIndex = registerIndex * registerWidth; -- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; -- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; -- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; -- -- if(firstWordIndex == secondWordIndex) -- return ((words[firstWordIndex] >>> bitRemainder) & registerMask); -- /* else -- register spans words */ -- return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ -- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; -- } -- -- /** -- * @param registerIndex the index of the register whose value is to be set. -- * This cannot be negative -- * @param value the value to set in the register -- * @see #getRegister(long) -- * @see #setMaxRegister(long, long) -- */ -- // NOTE: if this changes then setMaxRegister() must change -- public void setRegister(final long registerIndex, final long value) { -- final long bitIndex = registerIndex * registerWidth; -- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; -- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; -- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; -- -- final long words[] = this.words/*for convenience/performance*/; -- if(firstWordIndex == secondWordIndex) { -- // clear then set -- words[firstWordIndex] &= ~(registerMask << bitRemainder); -- words[firstWordIndex] |= (value << bitRemainder); -- } else {/*register spans words*/ -- // clear then set each partial word -- words[firstWordIndex] &= (1L << bitRemainder) - 1; -- words[firstWordIndex] |= (value << bitRemainder); -- -- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); -- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); -- } -- } -- -- // ------------------------------------------------------------------------ -- /** -- * @return a LongIterator for iterating starting at the register -- * with index zero. This will never be null. -- */ -- public LongIterator registerIterator() { -- return new LongIterator() { -- final int registerWidth = BitVector.this.registerWidth; -- final long[] words = BitVector.this.words; -- final long registerMask = BitVector.this.registerMask; -- -- // register setup -- long registerIndex = 0; -- int wordIndex = 0; -- int remainingWordBits = BITS_PER_WORD; -- long word = words[wordIndex]; -- -- @Override public long next() { -- long register; -- if(remainingWordBits >= registerWidth) { -- register = word & registerMask; -- -- // shift to the next register -- word >>>= registerWidth; -- remainingWordBits -= registerWidth; -- } else { /*insufficient bits remaining in current word*/ -- wordIndex++/*move to the next word*/; -- -- register = (word | (words[wordIndex] << remainingWordBits)) & registerMask; -- -- // shift to the next partial register (word) -- word = words[wordIndex] >>> (registerWidth - remainingWordBits); -- remainingWordBits += BITS_PER_WORD - registerWidth; -- } -- registerIndex++; -- return register; -- } -- -- @Override public boolean hasNext() { -- return registerIndex < count; -- } -- }; -- } -- -- // ------------------------------------------------------------------------ -- // composite accessors -- /** -- * Sets the value of the specified index register if and only if the specified -- * value is greater than the current value in the register. This is equivalent -- * to but much more performant than:

-- * -- *

vector.setRegister(index, Math.max(vector.getRegister(index), value));
-- * -- * @param registerIndex the index of the register whose value is to be set. -- * This cannot be negative -- * @param value the value to set in the register if and only if this value -- * is greater than the current value in the register -- * @return true if and only if the specified value is greater -- * than or equal to the current register value. false -- * otherwise. -- * @see #getRegister(long) -- * @see #setRegister(long, long) -- * @see java.lang.Math#max(long, long) -- */ -- // NOTE: if this changes then setRegister() must change -- public boolean setMaxRegister(final long registerIndex, final long value) { -- final long bitIndex = registerIndex * registerWidth; -- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/; -- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/; -- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/; -- -- // NOTE: matches getRegister() -- final long registerValue; -- final long words[] = this.words/*for convenience/performance*/; -- if(firstWordIndex == secondWordIndex) -- registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask); -- else /*register spans words*/ -- registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/ -- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask; -- -- // determine which is the larger and update as necessary -- if(value > registerValue) { -- // NOTE: matches setRegister() -- if(firstWordIndex == secondWordIndex) { -- // clear then set -- words[firstWordIndex] &= ~(registerMask << bitRemainder); -- words[firstWordIndex] |= (value << bitRemainder); -- } else {/*register spans words*/ -- // clear then set each partial word -- words[firstWordIndex] &= (1L << bitRemainder) - 1; -- words[firstWordIndex] |= (value << bitRemainder); -- -- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder)); -- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder)); -- } -- } /* else -- the register value is greater (or equal) so nothing needs to be done */ -- -- return (value >= registerValue); -- } -- -- // ======================================================================== -- /** -- * Fills this bit vector with the specified bit value. This can be used to -- * clear the vector by specifying 0. -- * -- * @param value the value to set all bits to (only the lowest bit is used) -- */ -- public void fill(final long value) { -- for(long i=0; inull. -- */ -- public void getRegisterContents(final IWordSerializer serializer) { -- for(final LongIterator iter = registerIterator(); iter.hasNext();) { -- serializer.writeWord(iter.next()); -- } -- } -- -- /** -- * Creates a deep copy of this vector. -- * -- * @see java.lang.Object#clone() -- */ -- @Override -- public BitVector clone() { -- final BitVector copy = new BitVector(registerWidth, count); -- System.arraycopy(words, 0, copy.words, 0, words.length); -- return copy; -- } --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java 2015-07-16 13:14:59.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,1071 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --import java.util.Arrays; -- --import com.carrotsearch.hppc.IntByteOpenHashMap; --import com.carrotsearch.hppc.LongOpenHashSet; --import com.carrotsearch.hppc.cursors.IntByteCursor; --import com.carrotsearch.hppc.cursors.LongCursor; -- --/** -- * A probabilistic set of hashed long elements. Useful for computing -- * the approximate cardinality of a stream of data in very small storage. -- * -- * A modified version of the -- * 'HyperLogLog' data structure and algorithm is used, which combines both -- * probabilistic and non-probabilistic techniques to improve the accuracy and -- * storage requirements of the original algorithm. -- * -- * More specifically, initializing and storing a new {@link HLL} will -- * allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}). -- * After adding the first few values, a sorted list of unique integers is -- * stored in a {@link HLLType#EXPLICIT} hash set. When configured, accuracy can -- * be sacrificed for memory footprint: the values in the sorted list are -- * "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure. -- * Finally, when enough registers are set, the map-based HLL will be converted -- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure. -- * -- * This data structure is interoperable with the implementations found at: -- * -- * when properly serialized. -- */ --public class HLL implements Cloneable { -- // minimum and maximum values for the log-base-2 of the number of registers -- // in the HLL -- public static final int MINIMUM_LOG2M_PARAM = 4; -- public static final int MAXIMUM_LOG2M_PARAM = 30; -- -- // minimum and maximum values for the register width of the HLL -- public static final int MINIMUM_REGWIDTH_PARAM = 1; -- public static final int MAXIMUM_REGWIDTH_PARAM = 8; -- -- // minimum and maximum values for the 'expthresh' parameter of the -- // constructor that is meant to match the PostgreSQL implementation's -- // constructor and parameter names -- public static final int MINIMUM_EXPTHRESH_PARAM = -1; -- public static final int MAXIMUM_EXPTHRESH_PARAM = 18; -- public static final int MAXIMUM_EXPLICIT_THRESHOLD = (1 << (MAXIMUM_EXPTHRESH_PARAM - 1)/*per storage spec*/); -- -- // ************************************************************************ -- // Storage -- // storage used when #type is EXPLICIT, null otherwise -- LongOpenHashSet explicitStorage; -- // storage used when #type is SPARSE, null otherwise -- IntByteOpenHashMap sparseProbabilisticStorage; -- // storage used when #type is FULL, null otherwise -- BitVector probabilisticStorage; -- -- // current type of this HLL instance, if this changes then so should the -- // storage used (see above) -- private HLLType type; -- -- // ------------------------------------------------------------------------ -- // Characteristic parameters -- // NOTE: These members are named to match the PostgreSQL implementation's -- // parameters. -- // log2(the number of probabilistic HLL registers) -- private final int log2m; -- // the size (width) each register in bits -- private final int regwidth; -- -- // ------------------------------------------------------------------------ -- // Computed constants -- // ........................................................................ -- // EXPLICIT-specific constants -- // flag indicating if the EXPLICIT representation should NOT be used -- private final boolean explicitOff; -- // flag indicating that the promotion threshold from EXPLICIT should be -- // computed automatically -- // NOTE: this only has meaning when 'explicitOff' is false -- private final boolean explicitAuto; -- // threshold (in element count) at which a EXPLICIT HLL is converted to a -- // SPARSE or FULL HLL, always greater than or equal to zero and always a -- // power of two OR simply zero -- // NOTE: this only has meaning when 'explicitOff' is false -- private final int explicitThreshold; -- -- // ........................................................................ -- // SPARSE-specific constants -- // the computed width of the short words -- private final int shortWordLength; -- // flag indicating if the SPARSE representation should not be used -- private final boolean sparseOff; -- // threshold (in register count) at which a SPARSE HLL is converted to a -- // FULL HLL, always greater than zero -- private final int sparseThreshold; -- -- // ........................................................................ -- // Probabilistic algorithm constants -- // the number of registers, will always be a power of 2 -- private final int m; -- // a mask of the log2m bits set to one and the rest to zero -- private final int mBitsMask; -- // a mask as wide as a register (see #fromBytes()) -- private final int valueMask; -- // mask used to ensure that p(w) does not overflow register (see #Constructor() and #addRaw()) -- private final long pwMaxMask; -- // alpha * m^2 (the constant in the "'raw' HyperLogLog estimator") -- private final double alphaMSquared; -- // the cutoff value of the estimator for using the "small" range cardinality -- // correction formula -- private final double smallEstimatorCutoff; -- // the cutoff value of the estimator for using the "large" range cardinality -- // correction formula -- private final double largeEstimatorCutoff; -- -- // ======================================================================== -- /** -- * NOTE: Arguments here are named and structured identically to those in the -- * PostgreSQL implementation, which can be found -- * here. -- * -- * @param log2m log-base-2 of the number of registers used in the HyperLogLog -- * algorithm. Must be at least 4 and at most 30. -- * @param regwidth number of bits used per register in the HyperLogLog -- * algorithm. Must be at least 1 and at most 8. -- * @param expthresh tunes when the {@link HLLType#EXPLICIT} to -- * {@link HLLType#SPARSE} promotion occurs, -- * based on the set's cardinality. Must be at least -1 and at most 18. -- * @param sparseon Flag indicating if the {@link HLLType#SPARSE} -- * representation should be used. -- * @param type the type in the promotion hierarchy which this instance should -- * start at. This cannot be null. -- */ -- public HLL(final int log2m, final int regwidth, final int expthresh, final boolean sparseon, final HLLType type) { -- this.log2m = log2m; -- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) { -- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")"); -- } -- -- this.regwidth = regwidth; -- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) { -- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")"); -- } -- -- this.m = (1 << log2m); -- this.mBitsMask = m - 1; -- this.valueMask = (1 << regwidth) - 1; -- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth); -- this.alphaMSquared = HLLUtil.alphaMSquared(m); -- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m); -- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth); -- -- if(expthresh == -1) { -- this.explicitAuto = true; -- this.explicitOff = false; -- -- // NOTE: This math matches the size calculation in the PostgreSQL impl. -- final long fullRepresentationSize = (this.regwidth * (long)this.m + 7/*round up to next whole byte*/)/Byte.SIZE; -- final int numLongs = (int)(fullRepresentationSize / 8/*integer division to round down*/); -- -- if(numLongs > MAXIMUM_EXPLICIT_THRESHOLD) { -- this.explicitThreshold = MAXIMUM_EXPLICIT_THRESHOLD; -- } else { -- this.explicitThreshold = numLongs; -- } -- } else if(expthresh == 0) { -- this.explicitAuto = false; -- this.explicitOff = true; -- this.explicitThreshold = 0; -- } else if((expthresh > 0) && (expthresh <= MAXIMUM_EXPTHRESH_PARAM)){ -- this.explicitAuto = false; -- this.explicitOff = false; -- this.explicitThreshold = (1 << (expthresh - 1)); -- } else { -- throw new IllegalArgumentException("'expthresh' must be at least " + MINIMUM_EXPTHRESH_PARAM + " and at most " + MAXIMUM_EXPTHRESH_PARAM + " (was: " + expthresh + ")"); -- } -- -- this.shortWordLength = (regwidth + log2m); -- this.sparseOff = !sparseon; -- if(this.sparseOff) { -- this.sparseThreshold = 0; -- } else { -- // TODO improve this cutoff to include the cost overhead of Java -- // members/objects -- final int largestPow2LessThanCutoff = -- (int)NumberUtil.log2((this.m * this.regwidth) / this.shortWordLength); -- this.sparseThreshold = (1 << largestPow2LessThanCutoff); -- } -- -- initializeStorage(type); -- } -- -- /** -- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}. -- * -- * This is equivalent to calling HLL(log2m, regwidth, -1, true, HLLType.EMPTY). -- * -- * @param log2m log-base-2 of the number of registers used in the HyperLogLog -- * algorithm. Must be at least 4 and at most 30. -- * @param regwidth number of bits used per register in the HyperLogLog -- * algorithm. Must be at least 1 and at most 8. -- * -- * @see #HLL(int, int, int, boolean, HLLType) -- */ -- public HLL(final int log2m, final int regwidth) { -- this(log2m, regwidth, -1, true, HLLType.EMPTY); -- } -- -- // ------------------------------------------------------------------------- -- /** -- * Convenience constructor for testing. Assumes that both {@link HLLType#EXPLICIT} -- * and {@link HLLType#SPARSE} representations should be enabled. -- * -- * @param log2m log-base-2 of the number of registers used in the HyperLogLog -- * algorithm. Must be at least 4 and at most 30. -- * @param regwidth number of bits used per register in the HyperLogLog -- * algorithm. Must be at least 1 and at most 8. -- * @param explicitThreshold cardinality threshold at which the {@link HLLType#EXPLICIT} -- * representation should be promoted to {@link HLLType#SPARSE}. -- * This must be greater than zero and less than or equal to {@value #MAXIMUM_EXPLICIT_THRESHOLD}. -- * @param sparseThreshold register count threshold at which the {@link HLLType#SPARSE} -- * representation should be promoted to {@link HLLType#FULL}. -- * This must be greater than zero. -- * @param type the type in the promotion hierarchy which this instance should -- * start at. This cannot be null. -- */ -- /*package, for testing*/ HLL(final int log2m, final int regwidth, final int explicitThreshold, final int sparseThreshold, final HLLType type) { -- this.log2m = log2m; -- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) { -- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")"); -- } -- -- this.regwidth = regwidth; -- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) { -- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")"); -- } -- -- this.m = (1 << log2m); -- this.mBitsMask = m - 1; -- this.valueMask = (1 << regwidth) - 1; -- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth); -- this.alphaMSquared = HLLUtil.alphaMSquared(m); -- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m); -- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth); -- -- this.explicitAuto = false; -- this.explicitOff = false; -- this.explicitThreshold = explicitThreshold; -- if((explicitThreshold < 1) || (explicitThreshold > MAXIMUM_EXPLICIT_THRESHOLD)) { -- throw new IllegalArgumentException("'explicitThreshold' must be at least 1 and at most " + MAXIMUM_EXPLICIT_THRESHOLD + " (was: " + explicitThreshold + ")"); -- } -- -- this.shortWordLength = (regwidth + log2m); -- this.sparseOff = false; -- this.sparseThreshold = sparseThreshold; -- -- initializeStorage(type); -- } -- -- /** -- * @return the type in the promotion hierarchy of this instance. This will -- * never be null. -- */ -- public HLLType getType() { return type; } -- -- // ======================================================================== -- // Add -- /** -- * Adds rawValue directly to the HLL. -- * -- * @param rawValue the value to be added. It is very important that this -- * value already be hashed with a strong (but not -- * necessarily cryptographic) hash function. For instance, the -- * Murmur3 implementation in -- * -- * Google's Guava library is an excellent hash function for this -- * purpose and, for seeds greater than zero, matches the output -- * of the hash provided in the PostgreSQL implementation. -- */ -- public void addRaw(final long rawValue) { -- switch(type) { -- case EMPTY: { -- // NOTE: EMPTY type is always promoted on #addRaw() -- if(explicitThreshold > 0) { -- initializeStorage(HLLType.EXPLICIT); -- explicitStorage.add(rawValue); -- } else if(!sparseOff) { -- initializeStorage(HLLType.SPARSE); -- addRawSparseProbabilistic(rawValue); -- } else { -- initializeStorage(HLLType.FULL); -- addRawProbabilistic(rawValue); -- } -- return; -- } -- case EXPLICIT: { -- explicitStorage.add(rawValue); -- -- // promotion, if necessary -- if(explicitStorage.size() > explicitThreshold) { -- if(!sparseOff) { -- initializeStorage(HLLType.SPARSE); -- for (LongCursor c : explicitStorage) { -- addRawSparseProbabilistic(c.value); -- } -- } else { -- initializeStorage(HLLType.FULL); -- for (LongCursor c : explicitStorage) { -- addRawProbabilistic(c.value); -- } -- } -- explicitStorage = null; -- } -- return; -- } -- case SPARSE: { -- addRawSparseProbabilistic(rawValue); -- -- // promotion, if necessary -- if(sparseProbabilisticStorage.size() > sparseThreshold) { -- initializeStorage(HLLType.FULL); -- for(IntByteCursor c : sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- sparseProbabilisticStorage = null; -- } -- return; -- } -- case FULL: -- addRawProbabilistic(rawValue); -- return; -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- } -- -- // ------------------------------------------------------------------------ -- // #addRaw(..) helpers -- /** -- * Adds the raw value to the {@link #sparseProbabilisticStorage}. -- * {@link #type} must be {@link HLLType#SPARSE}. -- * -- * @param rawValue the raw value to add to the sparse storage. -- */ -- private void addRawSparseProbabilistic(final long rawValue) { -- // p(w): position of the least significant set bit (one-indexed) -- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value) -- // -- // By construction of pwMaxMask (see #Constructor()), -- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2, -- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2, -- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1. -- final long substreamValue = (rawValue >>> log2m); -- final byte p_w; -- -- if(substreamValue == 0L) { -- // The paper does not cover p(0x0), so the special value 0 is used. -- // 0 is the original initialization value of the registers, so by -- // doing this the multiset simply ignores it. This is acceptable -- // because the probability is 1/(2^(2^registerSizeInBits)). -- p_w = 0; -- } else { -- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask)); -- } -- -- // Short-circuit if the register is being set to zero, since algorithmically -- // this corresponds to an "unset" register, and "unset" registers aren't -- // stored to save memory. (The very reason this sparse implementation -- // exists.) If a register is set to zero it will break the #algorithmCardinality -- // code. -- if(p_w == 0) { -- return; -- } -- -- // NOTE: no +1 as in paper since 0-based indexing -- final int j = (int)(rawValue & mBitsMask); -- -- final byte currentValue; -- if (sparseProbabilisticStorage.containsKey(j)) { -- currentValue = sparseProbabilisticStorage.lget(); -- } else { -- currentValue = 0; -- } -- -- if(p_w > currentValue) { -- sparseProbabilisticStorage.put(j, p_w); -- } -- } -- -- /** -- * Adds the raw value to the {@link #probabilisticStorage}. -- * {@link #type} must be {@link HLLType#FULL}. -- * -- * @param rawValue the raw value to add to the full probabilistic storage. -- */ -- private void addRawProbabilistic(final long rawValue) { -- // p(w): position of the least significant set bit (one-indexed) -- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value) -- // -- // By construction of pwMaxMask (see #Constructor()), -- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2, -- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2, -- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1. -- final long substreamValue = (rawValue >>> log2m); -- final byte p_w; -- -- if (substreamValue == 0L) { -- // The paper does not cover p(0x0), so the special value 0 is used. -- // 0 is the original initialization value of the registers, so by -- // doing this the multiset simply ignores it. This is acceptable -- // because the probability is 1/(2^(2^registerSizeInBits)). -- p_w = 0; -- } else { -- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask)); -- } -- -- // Short-circuit if the register is being set to zero, since algorithmically -- // this corresponds to an "unset" register, and "unset" registers aren't -- // stored to save memory. (The very reason this sparse implementation -- // exists.) If a register is set to zero it will break the #algorithmCardinality -- // code. -- if(p_w == 0) { -- return; -- } -- -- // NOTE: no +1 as in paper since 0-based indexing -- final int j = (int)(rawValue & mBitsMask); -- -- probabilisticStorage.setMaxRegister(j, p_w); -- } -- -- // ------------------------------------------------------------------------ -- // Storage helper -- /** -- * Initializes storage for the specified {@link HLLType} and changes the -- * instance's {@link #type}. -- * -- * @param type the {@link HLLType} to initialize storage for. This cannot be -- * null and must be an instantiable type. -- */ -- private void initializeStorage(final HLLType type) { -- this.type = type; -- switch(type) { -- case EMPTY: -- // nothing to be done -- break; -- case EXPLICIT: -- this.explicitStorage = new LongOpenHashSet(); -- break; -- case SPARSE: -- this.sparseProbabilisticStorage = new IntByteOpenHashMap(); -- break; -- case FULL: -- this.probabilisticStorage = new BitVector(regwidth, m); -- break; -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- } -- -- // ======================================================================== -- // Cardinality -- /** -- * Computes the cardinality of the HLL. -- * -- * @return the cardinality of HLL. This will never be negative. -- */ -- public long cardinality() { -- switch(type) { -- case EMPTY: -- return 0/*by definition*/; -- case EXPLICIT: -- return explicitStorage.size(); -- case SPARSE: -- return (long)Math.ceil(sparseProbabilisticAlgorithmCardinality()); -- case FULL: -- return (long)Math.ceil(fullProbabilisticAlgorithmCardinality()); -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- } -- -- // ------------------------------------------------------------------------ -- // Cardinality helpers -- /** -- * Computes the exact cardinality value returned by the HLL algorithm when -- * represented as a {@link HLLType#SPARSE} HLL. Kept -- * separate from {@link #cardinality()} for testing purposes. {@link #type} -- * must be {@link HLLType#SPARSE}. -- * -- * @return the exact, unrounded cardinality given by the HLL algorithm -- */ -- /*package, for testing*/ double sparseProbabilisticAlgorithmCardinality() { -- final int m = this.m/*for performance*/; -- -- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the -- // 'j'th register value -- double sum = 0; -- int numberOfZeroes = 0/*"V" in the paper*/; -- for(int j=0; jclear does NOT handle -- * transitions between {@link HLLType}s - a probabilistic type will remain -- * probabilistic after being cleared. -- */ -- public void clear() { -- switch(type) { -- case EMPTY: -- return /*do nothing*/; -- case EXPLICIT: -- explicitStorage.clear(); -- return; -- case SPARSE: -- sparseProbabilisticStorage.clear(); -- return; -- case FULL: -- probabilisticStorage.fill(0); -- return; -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- } -- -- // ======================================================================== -- // Union -- /** -- * Computes the union of HLLs and stores the result in this instance. -- * -- * @param other the other {@link HLL} instance to union into this one. This -- * cannot be null. -- */ -- public void union(final HLL other) { -- // TODO: verify HLLs are compatible -- final HLLType otherType = other.getType(); -- -- if(type.equals(otherType)) { -- homogeneousUnion(other); -- return; -- } else { -- heterogenousUnion(other); -- return; -- } -- } -- -- // ------------------------------------------------------------------------ -- // Union helpers -- /** -- * Computes the union of two HLLs, of different types, and stores the -- * result in this instance. -- * -- * @param other the other {@link HLL} instance to union into this one. This -- * cannot be null. -- */ -- /*package, for testing*/ void heterogenousUnion(final HLL other) { -- /* -- * The logic here is divided into two sections: unions with an EMPTY -- * HLL, and unions between EXPLICIT/SPARSE/FULL -- * HLL. -- * -- * Between those two sections, all possible heterogeneous unions are -- * covered. Should another type be added to HLLType whose unions -- * are not easily reduced (say, as EMPTY's are below) this may be more -- * easily implemented as Strategies. However, that is unnecessary as it -- * stands. -- */ -- -- // .................................................................... -- // Union with an EMPTY -- if(HLLType.EMPTY.equals(type)) { -- // NOTE: The union of empty with non-empty HLL is just a -- // clone of the non-empty. -- -- switch(other.getType()) { -- case EXPLICIT: { -- // src: EXPLICIT -- // dest: EMPTY -- -- if(other.explicitStorage.size() <= explicitThreshold) { -- type = HLLType.EXPLICIT; -- explicitStorage = other.explicitStorage.clone(); -- } else { -- if(!sparseOff) { -- initializeStorage(HLLType.SPARSE); -- } else { -- initializeStorage(HLLType.FULL); -- } -- for(LongCursor c : other.explicitStorage) { -- addRaw(c.value); -- } -- } -- return; -- } -- case SPARSE: { -- // src: SPARSE -- // dest: EMPTY -- -- if(!sparseOff) { -- type = HLLType.SPARSE; -- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone(); -- } else { -- initializeStorage(HLLType.FULL); -- for(IntByteCursor c : other.sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- } -- return; -- } -- default/*case FULL*/: { -- // src: FULL -- // dest: EMPTY -- -- type = HLLType.FULL; -- probabilisticStorage = other.probabilisticStorage.clone(); -- return; -- } -- } -- } else if (HLLType.EMPTY.equals(other.getType())) { -- // source is empty, so just return destination since it is unchanged -- return; -- } /* else -- both of the sets are not empty */ -- -- // .................................................................... -- // NOTE: Since EMPTY is handled above, the HLLs are non-EMPTY below -- switch(type) { -- case EXPLICIT: { -- // src: FULL/SPARSE -- // dest: EXPLICIT -- // "Storing into destination" cannot be done (since destination -- // is by definition of smaller capacity than source), so a clone -- // of source is made and values from destination are inserted -- // into that. -- -- // Determine source and destination storage. -- // NOTE: destination storage may change through promotion if -- // source is SPARSE. -- if(HLLType.SPARSE.equals(other.getType())) { -- if(!sparseOff) { -- type = HLLType.SPARSE; -- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone(); -- } else { -- initializeStorage(HLLType.FULL); -- for(IntByteCursor c : other.sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- } -- } else /*source is HLLType.FULL*/ { -- type = HLLType.FULL; -- probabilisticStorage = other.probabilisticStorage.clone(); -- } -- for(LongCursor c : explicitStorage) { -- addRaw(c.value); -- } -- explicitStorage = null; -- return; -- } -- case SPARSE: { -- if(HLLType.EXPLICIT.equals(other.getType())) { -- // src: EXPLICIT -- // dest: SPARSE -- // Add the raw values from the source to the destination. -- -- for(LongCursor c : other.explicitStorage) { -- addRaw(c.value); -- } -- // NOTE: addRaw will handle promotion cleanup -- } else /*source is HLLType.FULL*/ { -- // src: FULL -- // dest: SPARSE -- // "Storing into destination" cannot be done (since destination -- // is by definition of smaller capacity than source), so a -- // clone of source is made and registers from the destination -- // are merged into the clone. -- -- type = HLLType.FULL; -- probabilisticStorage = other.probabilisticStorage.clone(); -- for(IntByteCursor c : sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- sparseProbabilisticStorage = null; -- } -- return; -- } -- default/*destination is HLLType.FULL*/: { -- if(HLLType.EXPLICIT.equals(other.getType())) { -- // src: EXPLICIT -- // dest: FULL -- // Add the raw values from the source to the destination. -- // Promotion is not possible, so don't bother checking. -- -- for(LongCursor c : other.explicitStorage) { -- addRaw(c.value); -- } -- } else /*source is HLLType.SPARSE*/ { -- // src: SPARSE -- // dest: FULL -- // Merge the registers from the source into the destination. -- // Promotion is not possible, so don't bother checking. -- -- for(IntByteCursor c : other.sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- } -- } -- } -- } -- -- /** -- * Computes the union of two HLLs of the same type, and stores the -- * result in this instance. -- * -- * @param other the other {@link HLL} instance to union into this one. This -- * cannot be null. -- */ -- private void homogeneousUnion(final HLL other) { -- switch(type) { -- case EMPTY: -- // union of empty and empty is empty -- return; -- case EXPLICIT: -- for(LongCursor c : other.explicitStorage) { -- addRaw(c.value); -- } -- // NOTE: #addRaw() will handle promotion, if necessary -- return; -- case SPARSE: -- for(IntByteCursor c : other.sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- final byte currentRegisterValue = sparseProbabilisticStorage.get(registerIndex); -- if(registerValue > currentRegisterValue) { -- sparseProbabilisticStorage.put(registerIndex, registerValue); -- } -- } -- -- // promotion, if necessary -- if(sparseProbabilisticStorage.size() > sparseThreshold) { -- initializeStorage(HLLType.FULL); -- for(IntByteCursor c : sparseProbabilisticStorage) { -- final int registerIndex = c.key; -- final byte registerValue = c.value; -- probabilisticStorage.setMaxRegister(registerIndex, registerValue); -- } -- sparseProbabilisticStorage = null; -- } -- return; -- case FULL: -- for(int i=0; inull or empty. -- */ -- public byte[] toBytes() { -- return toBytes(SerializationUtil.DEFAULT_SCHEMA_VERSION); -- } -- -- /** -- * Serializes the HLL to an array of bytes in correspondence with the format -- * of the specified schema version. -- * -- * @param schemaVersion the schema version dictating the serialization format -- * @return the array of bytes representing the HLL. This will never be -- * null or empty. -- */ -- public byte[] toBytes(final ISchemaVersion schemaVersion) { -- final byte[] bytes; -- switch(type) { -- case EMPTY: -- bytes = new byte[schemaVersion.paddingBytes(type)]; -- break; -- case EXPLICIT: { -- final IWordSerializer serializer = -- schemaVersion.getSerializer(type, Long.SIZE, explicitStorage.size()); -- -- final long[] values = explicitStorage.toArray(); -- Arrays.sort(values); -- for(final long value : values) { -- serializer.writeWord(value); -- } -- -- bytes = serializer.getBytes(); -- break; -- } -- case SPARSE: { -- final IWordSerializer serializer = -- schemaVersion.getSerializer(type, shortWordLength, sparseProbabilisticStorage.size()); -- -- final int[] indices = sparseProbabilisticStorage.keys().toArray(); -- Arrays.sort(indices); -- for(final int registerIndex : indices) { -- assert sparseProbabilisticStorage.containsKey(registerIndex); -- final long registerValue = sparseProbabilisticStorage.get(registerIndex); -- // pack index and value into "short word" -- final long shortWord = ((registerIndex << regwidth) | registerValue); -- serializer.writeWord(shortWord); -- } -- -- bytes = serializer.getBytes(); -- break; -- } -- case FULL: { -- final IWordSerializer serializer = schemaVersion.getSerializer(type, regwidth, m); -- probabilisticStorage.getRegisterContents(serializer); -- -- bytes = serializer.getBytes(); -- break; -- } -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- -- final IHLLMetadata metadata = new HLLMetadata(schemaVersion.schemaVersionNumber(), -- type, -- log2m, -- regwidth, -- (int)NumberUtil.log2(explicitThreshold), -- explicitOff, -- explicitAuto, -- !sparseOff); -- schemaVersion.writeMetadata(bytes, metadata); -- -- return bytes; -- } -- -- /** -- * Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized -- * into bytes. -- * -- * @param bytes the serialized bytes of new HLL -- * @return the deserialized HLL. This will never be null. -- * -- * @see #toBytes(ISchemaVersion) -- */ -- public static HLL fromBytes(final byte[] bytes) { -- final ISchemaVersion schemaVersion = SerializationUtil.getSchemaVersion(bytes); -- final IHLLMetadata metadata = schemaVersion.readMetadata(bytes); -- -- final HLLType type = metadata.HLLType(); -- final int regwidth = metadata.registerWidth(); -- final int log2m = metadata.registerCountLog2(); -- final boolean sparseon = metadata.sparseEnabled(); -- -- final int expthresh; -- if(metadata.explicitAuto()) { -- expthresh = -1; -- } else if(metadata.explicitOff()) { -- expthresh = 0; -- } else { -- // NOTE: take into account that the postgres-compatible constructor -- // subtracts one before taking a power of two. -- expthresh = metadata.log2ExplicitCutoff() + 1; -- } -- -- final HLL hll = new HLL(log2m, regwidth, expthresh, sparseon, type); -- -- // Short-circuit on empty, which needs no other deserialization. -- if(HLLType.EMPTY.equals(type)) { -- return hll; -- } -- -- final int wordLength; -- switch(type) { -- case EXPLICIT: -- wordLength = Long.SIZE; -- break; -- case SPARSE: -- wordLength = hll.shortWordLength; -- break; -- case FULL: -- wordLength = hll.regwidth; -- break; -- default: -- throw new RuntimeException("Unsupported HLL type " + type); -- } -- -- final IWordDeserializer deserializer = -- schemaVersion.getDeserializer(type, wordLength, bytes); -- switch(type) { -- case EXPLICIT: -- // NOTE: This should not exceed expthresh and this will always -- // be exactly the number of words that were encoded, -- // because the word length is at least a byte wide. -- // SEE: IWordDeserializer#totalWordCount() -- for(int i=0; i>> hll.regwidth), registerValue); -- } -- } -- break; -- case FULL: -- // NOTE: Iteration is done using m (register count) and NOT -- // deserializer#totalWordCount() because regwidth may be -- // less than 8 and as such the padding on the 'last' byte -- // may be larger than regwidth, causing an extra register -- // to be read. -- // SEE: IWordDeserializer#totalWordCount() -- for(long i=0; inull. -- * @param registerCountLog2 the log-base-2 register count parameter for -- * probabilistic HLLs. This must be greater than or equal to zero. -- * @param registerWidth the register width parameter for probabilistic -- * HLLs. This must be greater than or equal to zero. -- * @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff, -- * if it is explicitly defined. (If explicitOff or -- * explicitAuto is true then this has no -- * meaning.) -- * @param explicitOff the flag for 'explicit off'-mode, where the -- * {@link HLLType#EXPLICIT} representation is not used. Both this and -- * explicitAuto cannot be true at the same -- * time. -- * @param explicitAuto the flag for 'explicit auto'-mode, where the -- * {@link HLLType#EXPLICIT} representation's promotion cutoff is -- * determined based on in-memory size automatically. Both this and -- * explicitOff cannot be true at the same -- * time. -- * @param sparseEnabled the flag for 'sparse-enabled'-mode, where the -- * {@link HLLType#SPARSE} representation is used. -- */ -- public HLLMetadata(final int schemaVersion, -- final HLLType type, -- final int registerCountLog2, -- final int registerWidth, -- final int log2ExplicitCutoff, -- final boolean explicitOff, -- final boolean explicitAuto, -- final boolean sparseEnabled) { -- this.schemaVersion = schemaVersion; -- this.type = type; -- this.registerCountLog2 = registerCountLog2; -- this.registerWidth = registerWidth; -- this.log2ExplicitCutoff = log2ExplicitCutoff; -- this.explicitOff = explicitOff; -- this.explicitAuto = explicitAuto; -- this.sparseEnabled = sparseEnabled; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion() -- */ -- @Override -- public int schemaVersion() { return schemaVersion; } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#HLLType() -- */ -- @Override -- public HLLType HLLType() { return type; } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2() -- */ -- @Override -- public int registerCountLog2() { return registerCountLog2; } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#registerWidth() -- */ -- @Override -- public int registerWidth() { return registerWidth; } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff() -- */ -- @Override -- public int log2ExplicitCutoff() { return log2ExplicitCutoff; } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#explicitOff() -- */ -- @Override -- public boolean explicitOff() { -- return explicitOff; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto() -- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff() -- */ -- @Override -- public boolean explicitAuto() { -- return explicitAuto; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled() -- */ -- @Override -- public boolean sparseEnabled() { return sparseEnabled; } -- -- /* (non-Javadoc) -- * @see java.lang.Object#toString() -- */ -- @Override -- public String toString() { -- return ""; -- } --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,29 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * The types of algorithm/data structure that {@link HLL} can utilize. For more -- * information, see the Javadoc for {@link HLL}. -- */ --public enum HLLType { -- EMPTY, -- EXPLICIT, -- SPARSE, -- FULL; --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,199 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * Static functions for computing constants and parameters used in the HLL -- * algorithm. -- */ --final class HLLUtil { -- /** -- * Precomputed pwMaxMask values indexed by registerSizeInBits. -- * Calculated with this formula: -- *
--     *     int maxRegisterValue = (1 << registerSizeInBits) - 1;
--     *     // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
--     *     return ~((1L << (maxRegisterValue - 1)) - 1);
--     * 
-- * -- * @see #pwMaxMask(int) -- */ -- private static final long[] PW_MASK = { -- ~((1L << (((1 << 0) - 1) - 1)) - 1), -- ~((1L << (((1 << 1) - 1) - 1)) - 1), -- ~((1L << (((1 << 2) - 1) - 1)) - 1), -- ~((1L << (((1 << 3) - 1) - 1)) - 1), -- ~((1L << (((1 << 4) - 1) - 1)) - 1), -- ~((1L << (((1 << 5) - 1) - 1)) - 1), -- ~((1L << (((1 << 6) - 1) - 1)) - 1), -- ~((1L << (((1 << 7) - 1) - 1)) - 1), -- ~((1L << (((1 << 8) - 1) - 1)) - 1) -- }; -- -- /** -- * Precomputed twoToL values indexed by a linear combination of -- * regWidth and log2m. -- * -- * The array is one-dimensional and can be accessed by using index -- * (REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m -- * for regWidth and log2m between the specified -- * HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM constants. -- * -- * @see #largeEstimator(int, int, double) -- * @see #largeEstimatorCutoff(int, int) -- * @see "Blog post with section on 2^L" -- */ -- private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)]; -- -- /** -- * Spacing constant used to compute offsets into {@link #TWO_TO_L}. -- */ -- private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1; -- -- static { -- for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) { -- for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) { -- int maxRegisterValue = (1 << regWidth) - 1; -- -- // Since 1 is added to p(w) in the insertion algorithm, only -- // (maxRegisterValue - 1) bits are inspected hence the hash -- // space is one power of two smaller. -- final int pwBits = (maxRegisterValue - 1); -- final int totalBits = (pwBits + log2m); -- final double twoToL = Math.pow(2, totalBits); -- TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL; -- } -- } -- } -- -- // ************************************************************************ -- /** -- * Computes the bit-width of HLL registers necessary to estimate a set of -- * the specified cardinality. -- * -- * @param expectedUniqueElements an upper bound on the number of unique -- * elements that are expected. This must be greater than zero. -- * @return a register size in bits (i.e. log2(log2(n))) -- */ -- public static int registerBitSize(final long expectedUniqueElements) { -- return Math.max(HLL.MINIMUM_REGWIDTH_PARAM, -- (int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements)))); -- } -- -- // ======================================================================== -- /** -- * Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm. -- * -- * @param m this must be a power of two, cannot be less than -- * 16 (24), and cannot be greater than 65536 (216). -- * @return gamma times registerCount squared where gamma is -- * based on the value of registerCount. -- * @throws IllegalArgumentException if registerCount is less -- * than 16. -- */ -- public static double alphaMSquared(final int m) { -- switch(m) { -- case 1/*2^0*/: -- case 2/*2^1*/: -- case 4/*2^2*/: -- case 8/*2^3*/: -- throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16)."); -- -- case 16/*2^4*/: -- return 0.673 * m * m; -- -- case 32/*2^5*/: -- return 0.697 * m * m; -- -- case 64/*2^6*/: -- return 0.709 * m * m; -- -- default/*>2^6*/: -- return (0.7213 / (1.0 + 1.079 / m)) * m * m; -- } -- } -- -- // ======================================================================== -- /** -- * Computes a mask that prevents overflow of HyperLogLog registers. -- * -- * @param registerSizeInBits the size of the HLL registers, in bits. -- * @return mask a long mask to prevent overflow of the registers -- * @see #registerBitSize(long) -- */ -- public static long pwMaxMask(final int registerSizeInBits) { -- return PW_MASK[registerSizeInBits]; -- } -- -- // ======================================================================== -- /** -- * The cutoff for using the "small range correction" formula, in the -- * HyperLogLog algorithm. -- * -- * @param m the number of registers in the HLL. m in the paper. -- * @return the cutoff for the small range correction. -- * @see #smallEstimator(int, int) -- */ -- public static double smallEstimatorCutoff(final int m) { -- return ((double)m * 5) / 2; -- } -- -- /** -- * The "small range correction" formula from the HyperLogLog algorithm. Only -- * appropriate if both the estimator is smaller than
(5/2) * m
and -- * there are still registers that have the zero value. -- * -- * @param m the number of registers in the HLL. m in the paper. -- * @param numberOfZeroes the number of registers with value zero. V -- * in the paper. -- * @return a corrected cardinality estimate. -- */ -- public static double smallEstimator(final int m, final int numberOfZeroes) { -- return m * Math.log((double)m / numberOfZeroes); -- } -- -- /** -- * The cutoff for using the "large range correction" formula, from the -- * HyperLogLog algorithm, adapted for 64 bit hashes. -- * -- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper. -- * @param registerSizeInBits the size of the HLL registers, in bits. -- * @return the cutoff for the large range correction. -- * @see #largeEstimator(int, int, double) -- * @see "Blog post with section on 64 bit hashes and 'large range correction' cutoff" -- */ -- public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) { -- return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0; -- } -- -- /** -- * The "large range correction" formula from the HyperLogLog algorithm, adapted -- * for 64 bit hashes. Only appropriate for estimators whose value exceeds -- * the return of {@link #largeEstimatorCutoff(int, int)}. -- * -- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper. -- * @param registerSizeInBits the size of the HLL registers, in bits. -- * @param estimator the original estimator ("E" in the paper). -- * @return a corrected cardinality estimate. -- * @see "Blog post with section on 64 bit hashes and 'large range correction'" -- */ -- public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) { -- final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]; -- return -1 * twoToL * Math.log(1.0 - (estimator/twoToL)); -- } --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,71 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * The metadata and parameters associated with a HLL. -- */ --interface IHLLMetadata { -- /** -- * @return the schema version of the HLL. This will never be null. -- */ -- int schemaVersion(); -- -- /** -- * @return the type of the HLL. This will never be null. -- */ -- HLLType HLLType(); -- -- /** -- * @return the log-base-2 of the register count parameter of the HLL. This -- * will always be greater than or equal to 4 and less than or equal -- * to 31. -- */ -- int registerCountLog2(); -- -- /** -- * @return the register width parameter of the HLL. This will always be -- * greater than or equal to 1 and less than or equal to 8. -- */ -- int registerWidth(); -- -- /** -- * @return the log-base-2 of the explicit cutoff cardinality. This will always -- * be greater than or equal to zero and less than 31, per the specification. -- */ -- int log2ExplicitCutoff(); -- -- /** -- * @return true if the {@link HLLType#EXPLICIT} representation -- * has been disabled. false otherwise. -- */ -- boolean explicitOff(); -- -- /** -- * @return true if the {@link HLLType#EXPLICIT} representation -- * cutoff cardinality is set to be automatically chosen, -- * false otherwise. -- */ -- boolean explicitAuto(); -- -- /** -- * @return true if the {@link HLLType#SPARSE} representation -- * is enabled. -- */ -- boolean sparseEnabled(); --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 2015-07-16 13:22:50.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,85 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A serialization schema for HLLs. Reads and writes HLL metadata to -- * and from byte[] representations. -- */ --interface ISchemaVersion { -- /** -- * The number of metadata bytes required for a serialized HLL of the -- * specified type. -- * -- * @param type the type of the serialized HLL -- * @return the number of padding bytes needed in order to fully accommodate -- * the needed metadata. -- */ -- int paddingBytes(HLLType type); -- -- /** -- * Writes metadata bytes to serialized HLL. -- * -- * @param bytes the padded data bytes of the HLL -- * @param metadata the metadata to write to the padding bytes -- */ -- void writeMetadata(byte[] bytes, IHLLMetadata metadata); -- -- /** -- * Reads the metadata bytes of the serialized HLL. -- * -- * @param bytes the serialized HLL -- * @return the HLL metadata -- */ -- IHLLMetadata readMetadata(byte[] bytes); -- -- /** -- * Builds an HLL serializer that matches this schema version. -- * -- * @param type the HLL type that will be serialized. This cannot be -- * null. -- * @param wordLength the length of the 'words' that comprise the data of the -- * HLL. Words must be at least 5 bits and at most 64 bits long. -- * @param wordCount the number of 'words' in the HLL's data. -- * @return a byte array serializer used to serialize a HLL according -- * to this schema version's specification. -- * @see #paddingBytes(HLLType) -- * @see IWordSerializer -- */ -- IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount); -- -- /** -- * Builds an HLL deserializer that matches this schema version. -- * -- * @param type the HLL type that will be deserialized. This cannot be -- * null. -- * @param wordLength the length of the 'words' that comprise the data of the -- * serialized HLL. Words must be at least 5 bits and at most 64 -- * bits long. -- * @param bytes the serialized HLL to deserialize. This cannot be -- * null. -- * @return a byte array deserializer used to deserialize a HLL serialized -- * according to this schema version's specification. -- */ -- IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes); -- -- /** -- * @return the schema version number. -- */ -- int schemaVersionNumber(); --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 2015-07-16 13:14:59.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,41 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * Reads 'words' of a fixed width, in sequence, from a byte array. -- */ --public interface IWordDeserializer { -- /** -- * @return the next word in the sequence. Should not be called more than -- * {@link #totalWordCount()} times. -- */ -- long readWord(); -- -- /** -- * Returns the number of words that could be encoded in the sequence. -- * -- * NOTE: the sequence that was encoded may be shorter than the value this -- * method returns due to padding issues within bytes. This guarantees -- * only an upper bound on the number of times {@link #readWord()} -- * can be called. -- * -- * @return the maximum number of words that could be read from the sequence. -- */ -- int totalWordCount(); --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,39 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * Writes 'words' of fixed width, in sequence, to a byte array. -- */ --interface IWordSerializer { -- -- /** -- * Writes the word to the backing array. -- * -- * @param word the word to write. -- */ -- void writeWord(final long word); -- -- /** -- * Returns the backing array of bytes that contain the serialized -- * words. -- * @return the serialized words as a byte[]. -- */ -- byte[] getBytes(); -- --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,35 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A long-based iterator. This is not is-a {@link java.util.Iterator} -- * to prevent autoboxing between Long and long. -- */ --interface LongIterator { -- /** -- * @return true if and only if there are more elements to -- * iterate over. false otherwise. -- */ -- boolean hasNext(); -- -- /** -- * @return the next long in the collection. -- */ -- long next(); --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,172 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A collection of utilities to work with numbers. -- */ --class NumberUtil { -- // loge(2) (log-base e of 2) -- public static final double LOGE_2 = 0.6931471805599453; -- -- // ************************************************************************ -- /** -- * Computes the log2 (log-base-two) of the specified value. -- * -- * @param value the double for which the log2 is -- * desired. -- * @return the log2 of the specified value -- */ -- public static double log2(final double value) { -- // REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases) -- return Math.log(value) / LOGE_2; -- } -- -- // ======================================================================== -- // the hex characters -- private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7', -- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; -- -- // ------------------------------------------------------------------------ -- /** -- * Converts the specified array of bytes into a string of -- * hex characters (low byte first). -- * -- * @param bytes the array of bytes that are to be converted. -- * This cannot be null though it may be empty. -- * @param offset the offset in bytes at which the bytes will -- * be taken. This cannot be negative and must be less than -- * bytes.length - 1. -- * @param count the number of bytes to be retrieved from the specified array. -- * This cannot be negative. If greater than bytes.length - offset -- * then that value is used. -- * @return a string of at most count characters that represents -- * the specified byte array in hex. This will never be null -- * though it may be empty if bytes is empty or count -- * is zero. -- * @throws IllegalArgumentException if offset is greater than -- * or equal to bytes.length. -- * @see #fromHex(String, int, int) -- */ -- public static String toHex(final byte[] bytes, final int offset, final int count) { -- if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/; -- final int byteCount = Math.min( (bytes.length - offset), count); -- final int upperBound = byteCount + offset; -- -- final char[] chars = new char[byteCount * 2/*two chars per byte*/]; -- int charIndex = 0; -- for(int i=offset; i>> 4) & 0x0F]; -- chars[charIndex++] = HEX[value & 0x0F]; -- } -- -- return new String(chars); -- } -- -- /** -- * Converts the specified array of hex characters into an array of bytes -- * (low byte first). -- * -- * @param string the string of hex characters to be converted into bytes. -- * This cannot be null though it may be blank. -- * @param offset the offset in the string at which the characters will be -- * taken. This cannot be negative and must be less than string.length() - 1. -- * @param count the number of characters to be retrieved from the specified -- * string. This cannot be negative and must be divisible by two -- * (since there are two characters per byte). -- * @return the array of bytes that were converted from the -- * specified string (in the specified range). This will never be -- * null though it may be empty if string -- * is empty or count is zero. -- * @throws IllegalArgumentException if offset is greater than -- * or equal to string.length() or if count -- * is not divisible by two. -- * @see #toHex(byte[], int, int) -- */ -- public static byte[] fromHex(final String string, final int offset, final int count) { -- if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/; -- if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/; -- final int charCount = Math.min((string.length() - offset), count); -- final int upperBound = offset + charCount; -- -- final byte[] bytes = new byte[charCount >>> 1/*aka /2*/]; -- int byteIndex = 0/*beginning*/; -- for(int i=offset; ibyte. -- * This cannot be a character other than [a-fA-F0-9]. -- * @return the value of the specified character. This will be a value 0 -- * through 15. -- * @throws IllegalArgumentException if the specified character is not in -- * [a-fA-F0-9] -- */ -- private static final int digit(final char character) { -- switch(character) { -- case '0': -- return 0; -- case '1': -- return 1; -- case '2': -- return 2; -- case '3': -- return 3; -- case '4': -- return 4; -- case '5': -- return 5; -- case '6': -- return 6; -- case '7': -- return 7; -- case '8': -- return 8; -- case '9': -- return 9; -- case 'a': -- case 'A': -- return 10; -- case 'b': -- case 'B': -- return 11; -- case 'c': -- case 'C': -- return 12; -- case 'd': -- case 'D': -- return 13; -- case 'e': -- case 'E': -- return 14; -- case 'f': -- case 'F': -- return 15; -- -- default: -- throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "')."); -- } -- } --} -\ Manca newline alla fine del file -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,24 +0,0 @@ --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A fork of Java-HyperLogLog package tweaked -- * not to depend on fastutil and with cleanups to make it lean and clean. -- */ --package org.apache.solr.util.hll; -- -- -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 2015-07-16 13:22:50.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,154 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A concrete {@link ISchemaVersion} representing schema version one. -- */ --class SchemaVersionOne implements ISchemaVersion { -- /** -- * The schema version number for this instance. -- */ -- public static final int SCHEMA_VERSION = 1; -- -- // ------------------------------------------------------------------------ -- // Version-specific ordinals (array position) for each of the HLL types -- private static final HLLType[] TYPE_ORDINALS = new HLLType[] { -- HLLType.EMPTY, -- HLLType.EXPLICIT, -- HLLType.SPARSE, -- HLLType.FULL -- }; -- -- // ------------------------------------------------------------------------ -- // number of header bytes for all HLL types -- private static final int HEADER_BYTE_COUNT = 3; -- -- // sentinel values from the spec for explicit off and auto -- private static final int EXPLICIT_OFF = 0; -- private static final int EXPLICIT_AUTO = 63; -- -- // ************************************************************************ -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType) -- */ -- @Override -- public int paddingBytes(final HLLType type) { -- return HEADER_BYTE_COUNT; -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata) -- */ -- @Override -- public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) { -- final HLLType type = metadata.HLLType(); -- final int typeOrdinal = getOrdinal(type); -- -- final int explicitCutoffValue; -- if(metadata.explicitOff()) { -- explicitCutoffValue = EXPLICIT_OFF; -- } else if(metadata.explicitAuto()) { -- explicitCutoffValue = EXPLICIT_AUTO; -- } else { -- explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/; -- } -- -- bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal); -- bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2()); -- bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled()); -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[]) -- */ -- @Override -- public IHLLMetadata readMetadata(final byte[] bytes) { -- final byte versionByte = bytes[0]; -- final byte parametersByte = bytes[1]; -- final byte cutoffByte = bytes[2]; -- -- final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte); -- final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte); -- final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF); -- final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO); -- final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/); -- -- return new HLLMetadata(SCHEMA_VERSION, -- getType(typeOrdinal), -- SerializationUtil.registerCountLog2(parametersByte), -- SerializationUtil.registerWidth(parametersByte), -- log2ExplicitCutoff, -- explicitOff, -- explicitAuto, -- SerializationUtil.sparseEnabled(cutoffByte)); -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int) -- */ -- @Override -- public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) { -- return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type)); -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[]) -- */ -- @Override -- public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) { -- return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes); -- } -- -- /* (non-Javadoc) -- * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber() -- */ -- @Override -- public int schemaVersionNumber() { -- return SCHEMA_VERSION; -- } -- -- // ======================================================================== -- // Type/Ordinal lookups -- /** -- * Gets the ordinal for the specified {@link HLLType}. -- * -- * @param type the type whose ordinal is desired -- * @return the ordinal for the specified type, to be used in the version byte. -- * This will always be non-negative. -- */ -- private static int getOrdinal(final HLLType type) { -- for(int i=0; inull. -- */ -- private static HLLType getType(final int ordinal) { -- if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) { -- throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed."); -- } -- return TYPE_ORDINALS[ordinal]; -- } --} -diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java ---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 2015-07-16 12:32:07.000000000 +0200 -+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 1970-01-01 01:00:00.000000000 +0100 -@@ -1,277 +0,0 @@ --package org.apache.solr.util.hll; -- --/* -- * Licensed to the Apache Software Foundation (ASF) under one or more -- * contributor license agreements. See the NOTICE file distributed with -- * this work for additional information regarding copyright ownership. -- * The ASF licenses this file to You under the Apache License, Version 2.0 -- * (the "License"); you may not use this file except in compliance with -- * the License. You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/** -- * A collection of constants and utilities for serializing and deserializing -- * HLLs. -- * -- * NOTE: 'package' visibility is used for many methods that only need to be -- * used by the {@link ISchemaVersion} implementations. The structure of -- * a serialized HLL's metadata should be opaque to the rest of the -- * library. -- */ --class SerializationUtil { -- /** -- * The number of bits (of the parameters byte) dedicated to encoding the -- * width of the registers. -- */ -- /*package*/ static int REGISTER_WIDTH_BITS = 3; -- -- /** -- * A mask to cap the maximum value of the register width. -- */ -- /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1; -- -- /** -- * The number of bits (of the parameters byte) dedicated to encoding -- * log2(registerCount). -- */ -- /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5; -- -- /** -- * A mask to cap the maximum value of log2(registerCount). -- */ -- /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1; -- -- /** -- * The number of bits (of the cutoff byte) dedicated to encoding the -- * log-base-2 of the explicit cutoff or sentinel values for -- * 'explicit-disabled' or 'auto'. -- */ -- /*package*/ static int EXPLICIT_CUTOFF_BITS = 6; -- -- /** -- * A mask to cap the maximum value of the explicit cutoff choice. -- */ -- /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1; -- -- /** -- * Number of bits in a nibble. -- */ -- private static int NIBBLE_BITS = 4; -- -- /** -- * A mask to cap the maximum value of a nibble. -- */ -- private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1; -- -- // ************************************************************************ -- // Serialization utilities -- -- /** -- * Schema version one (v1). -- */ -- public static ISchemaVersion VERSION_ONE = new SchemaVersionOne(); -- -- /** -- * The default schema version for serializing HLLs. -- */ -- public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE; -- -- /** -- * List of registered schema versions, indexed by their version numbers. If -- * an entry is null, then no such schema version is registered. -- * Similarly, registering a new schema version simply entails assigning an -- * {@link ISchemaVersion} instance to the appropriate index of this array.

-- * -- * By default, only {@link SchemaVersionOne} is registered. Note that version -- * zero will always be reserved for internal (e.g. proprietary, legacy) schema -- * specifications/implementations and will never be assigned to in by this -- * library. -- */ -- public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16]; -- -- static { -- REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE; -- } -- -- /** -- * @param schemaVersionNumber the version number of the {@link ISchemaVersion} -- * desired. This must be a registered schema version number. -- * @return The {@link ISchemaVersion} for the given number. This will never -- * be null. -- */ -- public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) { -- if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) { -- throw new RuntimeException("Invalid schema version number " + schemaVersionNumber); -- } -- final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber]; -- if(schemaVersion == null) { -- throw new RuntimeException("Unknown schema version number " + schemaVersionNumber); -- } -- return schemaVersion; -- } -- -- /** -- * Get the appropriate {@link ISchemaVersion schema version} for the specified -- * serialized HLL. -- * -- * @param bytes the serialized HLL whose schema version is desired. -- * @return the schema version for the specified HLL. This will never -- * be null. -- */ -- public static ISchemaVersion getSchemaVersion(final byte[] bytes) { -- final byte versionByte = bytes[0]; -- final int schemaVersionNumber = schemaVersion(versionByte); -- -- return getSchemaVersion(schemaVersionNumber); -- } -- -- // ************************************************************************ -- // Package-specific shared helpers -- -- /** -- * Generates a byte that encodes the schema version and the type ordinal -- * of the HLL. -- * -- * The top nibble is the schema version and the bottom nibble is the type -- * ordinal. -- * -- * @param schemaVersion the schema version to encode. -- * @param typeOrdinal the type ordinal of the HLL to encode. -- * @return the packed version byte -- */ -- public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) { -- return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal)); -- } -- /** -- * Generates a byte that encodes the log-base-2 of the explicit cutoff -- * or sentinel values for 'explicit-disabled' or 'auto', as well as the -- * boolean indicating whether to use {@link HLLType#SPARSE} -- * in the promotion hierarchy. -- * -- * The top bit is always padding, the second highest bit indicates the -- * 'sparse-enabled' boolean, and the lowest six bits encode the explicit -- * cutoff value. -- * -- * @param explicitCutoff the explicit cutoff value to encode. -- *

    -- *
  • -- * If 'explicit-disabled' is chosen, this value should be 0. -- *
  • -- *
  • -- * If 'auto' is chosen, this value should be 63. -- *
  • -- *
  • -- * If a cutoff of 2n is desired, for 0 <= n < 31, -- * this value should be n + 1. -- *
  • -- *
-- * @param sparseEnabled whether {@link HLLType#SPARSE} -- * should be used in the promotion hierarchy to improve HLL -- * storage. -- * -- * @return the packed cutoff byte -- */ -- public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) { -- final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0); -- return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff)); -- } -- -- /** -- * Generates a byte that encodes the parameters of a -- * {@link HLLType#FULL} or {@link HLLType#SPARSE} -- * HLL.

-- * -- * The top 3 bits are used to encode registerWidth - 1 -- * (range of registerWidth is thus 1-9) and the bottom 5 -- * bits are used to encode registerCountLog2 -- * (range of registerCountLog2 is thus 0-31). -- * -- * @param registerWidth the register width (must be at least 1 and at -- * most 9) -- * @param registerCountLog2 the log-base-2 of the register count (must -- * be at least 0 and at most 31) -- * @return the packed parameters byte -- */ -- public static byte packParametersByte(final int registerWidth, final int registerCountLog2) { -- final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK); -- final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK); -- return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits); -- } -- -- /** -- * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized -- * HLL. -- * -- * @param cutoffByte the cutoff byte of the serialized HLL -- * @return the 'sparse-enabled' boolean -- */ -- public static boolean sparseEnabled(final byte cutoffByte) { -- return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1; -- } -- -- /** -- * Extracts the explicit cutoff value from the cutoff byte of a serialized -- * HLL. -- * -- * @param cutoffByte the cutoff byte of the serialized HLL -- * @return the explicit cutoff value -- */ -- public static int explicitCutoff(final byte cutoffByte) { -- return (cutoffByte & EXPLICIT_CUTOFF_MASK); -- } -- -- /** -- * Extracts the schema version from the version byte of a serialized -- * HLL. -- * -- * @param versionByte the version byte of the serialized HLL -- * @return the schema version of the serialized HLL -- */ -- public static int schemaVersion(final byte versionByte) { -- return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS); -- } -- -- /** -- * Extracts the type ordinal from the version byte of a serialized HLL. -- * -- * @param versionByte the version byte of the serialized HLL -- * @return the type ordinal of the serialized HLL -- */ -- public static int typeOrdinal(final byte versionByte) { -- return (versionByte & NIBBLE_MASK); -- } -- -- /** -- * Extracts the register width from the parameters byte of a serialized -- * {@link HLLType#FULL} HLL. -- * -- * @param parametersByte the parameters byte of the serialized HLL -- * @return the register width of the serialized HLL -- * -- * @see #packParametersByte(int, int) -- */ -- public static int registerWidth(final byte parametersByte) { -- return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1; -- } -- -- /** -- * Extracts the log2(registerCount) from the parameters byte of a -- * serialized {@link HLLType#FULL} HLL. -- * -- * @param parametersByte the parameters byte of the serialized HLL -- * @return log2(registerCount) of the serialized HLL -- * -- * @see #packParametersByte(int, int) -- */ -- public static int registerCountLog2(final byte parametersByte) { -- return (parametersByte & LOG2_REGISTER_COUNT_MASK); -- } --} diff --git a/solr.spec b/solr.spec index de8c3e1..39de3ff 100644 --- a/solr.spec +++ b/solr.spec @@ -9,7 +9,7 @@ %endif Name: solr -Version: 5.3.1 +Version: 5.4.0 Release: 1%{?dist} Summary: Ultra-fast Lucene-based Search Server # MIT/X11 (BSD like) solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java @@ -18,8 +18,7 @@ URL: http://lucene.apache.org/solr/ # Use solr-repack.sh Source0: %{name}-%{version}-clean.tar.xz Source2: solr-repack.sh -Patch0: solr-5.3.0-use-system-hll.patch -Patch1: solr-5.3.0-jetty9.3.3.patch +Patch0: solr-5.3.0-jetty9.3.3.patch BuildRequires: maven-local BuildRequires: mvn(com.adobe.xmp:xmpcore) @@ -60,7 +59,7 @@ BuildRequires: mvn(log4j:log4j:1.2.17) BuildRequires: mvn(net.agkn:hll) BuildRequires: mvn(net.arnx:jsonic) BuildRequires: mvn(net.sourceforge.jmatio:jmatio) -BuildRequires: mvn(org.antlr:antlr-runtime) +BuildRequires: mvn(org.antlr:antlr4-runtime) BuildRequires: mvn(org.apache:apache:pom:) BuildRequires: mvn(org.apache.ant:ant) BuildRequires: mvn(org.apache.commons:commons-exec) @@ -279,9 +278,12 @@ find . -name "*.class" -print -delete find . -name "*.jar" -print -delete find . -name "*.js" -print -delete -%patch0 -p1 -rm -rf solr/core/src/java/org/apache/solr/util/hll +rm -r solr/core/src/java/org/apache/solr/util/hll %pom_add_dep net.agkn:hll:1.6.0 solr/core +find ./solr -name "*.java" -exec sed -i "s/org.apache.solr.util.hll/net.agkn.hll/g" {} + + +sed -i "s|return hasher.hashString(v).asLong();|return hasher.hashString(v, null).asLong();|" \ + solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java %if %{?fedora} >= 23 %patch1 -p1 @@ -447,6 +449,9 @@ sed -i "s|conf.addResource(TEST_CONF);||" \ %license LICENSE.txt NOTICE.txt %changelog +* Sat Jan 23 2016 gil cattaneo 5.4.0-1 +- update to 5.4.0 + * Tue Oct 06 2015 gil cattaneo 5.3.1-1 - update to 5.3.1 diff --git a/sources b/sources index b351065..a3ceefd 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -060bea064bd392557f7bf384f4a44480 solr-5.3.1-clean.tar.xz +37eadf14e17e2999832fb921783e9c5f solr-5.4.0-clean.tar.xz