>= (BITS_PER_BYTE - lastByteBitsToConsume);
-- value <<= lastByteBitsToConsume;
-- value |= lastByte;
-- return value;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount()
-- */
-- @Override
-- public int totalWordCount() {
-- return wordCount;
-- }
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,174 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A serializer that writes a sequence of fixed bit-width 'words' to a byte array.
-- * Bitwise OR is used to write words into bytes, so a low bit in a word is also
-- * a low bit in a byte. However, a high byte in a word is written at a lower index
-- * in the array than a low byte in a word. The first word is written at the lowest
-- * array index. Each serializer is one time use and returns its backing byte
-- * array.
-- *
-- * This encoding was chosen so that when reading bytes as octets in the typical
-- * first-octet-is-the-high-nibble fashion, an octet-to-binary conversion
-- * would yield a high-to-low, left-to-right view of the "short words".
-- *
-- * Example:
-- *
-- * Say short words are 5 bits wide. Our word sequence is the values
-- * [31, 1, 5]
. In big-endian binary format, the values are
-- * [0b11111, 0b00001, 0b00101]
. We use 15 of 16 bits in two bytes
-- * and pad the last (lowest) bit of the last byte with a zero:
-- *
-- *
-- * [0b11111000, 0b01001010] = [0xF8, 0x4A]
-- *
.
-- */
--class BigEndianAscendingWordSerializer implements IWordSerializer {
-- // The number of bits per byte.
-- private static final int BITS_PER_BYTE = 8;
--
-- // ************************************************************************
-- // The length in bits of the words to be written.
-- private final int wordLength;
-- // The number of words to be written.
-- private final int wordCount;
--
-- // The byte array to which the words are serialized.
-- private final byte[] bytes;
--
-- // ------------------------------------------------------------------------
-- // Write state
-- // Number of bits that remain writable in the current byte.
-- private int bitsLeftInByte;
-- // Index of byte currently being written to.
-- private int byteIndex;
-- // Number of words written.
-- private int wordsWritten;
--
-- // ========================================================================
-- /**
-- * @param wordLength the length in bits of the words to be serialized. Must
-- * be greater than or equal to 1 and less than or equal to 64.
-- * @param wordCount the number of words to be serialized. Must be greater than
-- * or equal to zero.
-- * @param bytePadding the number of leading bytes that should pad the
-- * serialized words. Must be greater than or equal to zero.
-- */
-- public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) {
-- if((wordLength < 1) || (wordLength > 64)) {
-- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
-- }
-- if(wordCount < 0) {
-- throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")");
-- }
-- if(bytePadding < 0) {
-- throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")");
-- }
--
-- this.wordLength = wordLength;
-- this.wordCount = wordCount;
--
-- final long bitsRequired = (wordLength * wordCount);
-- final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0);
-- final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding;
-- bytes = new byte[bytesRequired];
--
-- bitsLeftInByte = BITS_PER_BYTE;
-- byteIndex = bytePadding;
-- wordsWritten = 0;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IWordSerializer#writeWord(long)
-- * @throws RuntimeException if the number of words written is greater than the
-- * wordCount
parameter in the constructor.
-- */
-- @Override
-- public void writeWord(final long word) {
-- if(wordsWritten == wordCount) {
-- throw new RuntimeException("Cannot write more words, backing array full!");
-- }
--
-- int bitsLeftInWord = wordLength;
--
-- while(bitsLeftInWord > 0) {
-- // Move to the next byte if the current one is fully packed.
-- if(bitsLeftInByte == 0) {
-- byteIndex++;
-- bitsLeftInByte = BITS_PER_BYTE;
-- }
--
-- final long consumedMask;
-- if(bitsLeftInWord == 64) {
-- consumedMask = ~0L;
-- } else {
-- consumedMask = ((1L << bitsLeftInWord) - 1L);
-- }
--
-- // Fix how many bits will be written in this cycle. Choose the
-- // smaller of the remaining bits in the word or byte.
-- final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord);
-- final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite);
--
-- // In general, we write the highest bits of the word first, so we
-- // strip the highest bits that were consumed in previous cycles.
-- final long remainingBitsOfWordToWrite = (word & consumedMask);
--
-- final long bitsThatTheByteCanAccept;
-- // If there is more left in the word than can be written to this
-- // byte, shift off the bits that can't be written off the bottom.
-- if(bitsLeftInWord > numberOfBitsToWrite) {
-- bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte));
-- } else {
-- // If the byte can accept all remaining bits, there is no need
-- // to shift off the bits that won't be written in this cycle.
-- bitsThatTheByteCanAccept = remainingBitsOfWordToWrite;
-- }
--
-- // Align the word bits to write up against the byte bits that have
-- // already been written. This shift may do nothing if the remainder
-- // of the byte is being consumed in this cycle.
-- final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite);
--
-- // Update the byte with the alignedBits.
-- bytes[byteIndex] |= (byte)alignedBits;
--
-- // Update state with bit count written.
-- bitsLeftInWord -= numberOfBitsToWrite;
-- bitsLeftInByte = bitsInByteRemainingAfterWrite;
-- }
--
-- wordsWritten ++;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IWordSerializer#getBytes()
-- * @throws RuntimeException if the number of words written is fewer than the
-- * wordCount
parameter in the constructor.
-- */
-- @Override
-- public byte[] getBytes() {
-- if(wordsWritten < wordCount) {
-- throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")");
-- }
--
-- return bytes;
-- }
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,71 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A collection of bit utilities.
-- */
--class BitUtil {
-- /**
-- * The set of least-significant bits for a given byte
. -1
-- * is used if no bits are set (so as to not be confused with "index of zero"
-- * meaning that the least significant bit is the 0th (1st) bit).
-- *
-- * @see #leastSignificantBit(long)
-- */
-- private static final int[] LEAST_SIGNIFICANT_BIT = {
-- -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-- };
--
-- /**
-- * Computes the least-significant bit of the specified long
-- * that is set to 1
. Zero-indexed.
-- *
-- * @param value the long
whose least-significant bit is desired.
-- * @return the least-significant bit of the specified long
.
-- * -1
is returned if there are no bits set.
-- */
-- // REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
-- // REF: http://www-graphics.stanford.edu/~seander/bithacks.html
-- public static int leastSignificantBit(final long value) {
-- if(value == 0L) return -1/*by contract*/;
-- if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0;
-- if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8;
-- if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16;
-- if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24;
-- if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32;
-- if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40;
-- if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48;
-- return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56;
-- }
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,259 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A vector (array) of bits that is accessed in units ("registers") of width
-- * bits which are stored as 64bit "words" (long
s). In this context
-- * a register is at most 64bits.
-- */
--class BitVector implements Cloneable {
-- // NOTE: in this context, a word is 64bits
--
-- // rather than doing division to determine how a bit index fits into 64bit
-- // words (i.e. longs), bit shifting is used
-- private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/;
-- private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD;
-- private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1;
--
-- // ditto from above but for bytes (for output)
-- private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/;
-- public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE;
--
-- // ========================================================================
-- public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/;
--
-- // ************************************************************************
-- // 64bit words
-- private final long[] words;
-- public final long[] words() { return words; }
-- public final int wordCount() { return words.length; }
-- public final int byteCount() { return wordCount() * BYTES_PER_WORD; }
--
-- // the width of a register in bits (this cannot be more than 64 (the word size))
-- private final int registerWidth;
-- public final int registerWidth() { return registerWidth; }
--
-- private final long count;
--
-- // ------------------------------------------------------------------------
-- private final long registerMask;
--
-- // ========================================================================
-- /**
-- * @param width the width of each register. This cannot be negative or
-- * zero or greater than 63 (the signed word size).
-- * @param count the number of registers. This cannot be negative or zero
-- */
-- public BitVector(final int width, final long count) {
-- // ceil((width * count)/BITS_PER_WORD)
-- this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)];
-- this.registerWidth = width;
-- this.count = count;
--
-- this.registerMask = (1L << width) - 1;
-- }
--
-- // ========================================================================
-- /**
-- * @param registerIndex the index of the register whose value is to be
-- * retrieved. This cannot be negative.
-- * @return the value at the specified register index
-- * @see #setRegister(long, long)
-- * @see #setMaxRegister(long, long)
-- */
-- // NOTE: if this changes then setMaxRegister() must change
-- public long getRegister(final long registerIndex) {
-- final long bitIndex = registerIndex * registerWidth;
-- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
-- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
-- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
--
-- if(firstWordIndex == secondWordIndex)
-- return ((words[firstWordIndex] >>> bitRemainder) & registerMask);
-- /* else -- register spans words */
-- return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
-- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
-- }
--
-- /**
-- * @param registerIndex the index of the register whose value is to be set.
-- * This cannot be negative
-- * @param value the value to set in the register
-- * @see #getRegister(long)
-- * @see #setMaxRegister(long, long)
-- */
-- // NOTE: if this changes then setMaxRegister() must change
-- public void setRegister(final long registerIndex, final long value) {
-- final long bitIndex = registerIndex * registerWidth;
-- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
-- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
-- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
--
-- final long words[] = this.words/*for convenience/performance*/;
-- if(firstWordIndex == secondWordIndex) {
-- // clear then set
-- words[firstWordIndex] &= ~(registerMask << bitRemainder);
-- words[firstWordIndex] |= (value << bitRemainder);
-- } else {/*register spans words*/
-- // clear then set each partial word
-- words[firstWordIndex] &= (1L << bitRemainder) - 1;
-- words[firstWordIndex] |= (value << bitRemainder);
--
-- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
-- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
-- }
-- }
--
-- // ------------------------------------------------------------------------
-- /**
-- * @return a LongIterator
for iterating starting at the register
-- * with index zero. This will never be null
.
-- */
-- public LongIterator registerIterator() {
-- return new LongIterator() {
-- final int registerWidth = BitVector.this.registerWidth;
-- final long[] words = BitVector.this.words;
-- final long registerMask = BitVector.this.registerMask;
--
-- // register setup
-- long registerIndex = 0;
-- int wordIndex = 0;
-- int remainingWordBits = BITS_PER_WORD;
-- long word = words[wordIndex];
--
-- @Override public long next() {
-- long register;
-- if(remainingWordBits >= registerWidth) {
-- register = word & registerMask;
--
-- // shift to the next register
-- word >>>= registerWidth;
-- remainingWordBits -= registerWidth;
-- } else { /*insufficient bits remaining in current word*/
-- wordIndex++/*move to the next word*/;
--
-- register = (word | (words[wordIndex] << remainingWordBits)) & registerMask;
--
-- // shift to the next partial register (word)
-- word = words[wordIndex] >>> (registerWidth - remainingWordBits);
-- remainingWordBits += BITS_PER_WORD - registerWidth;
-- }
-- registerIndex++;
-- return register;
-- }
--
-- @Override public boolean hasNext() {
-- return registerIndex < count;
-- }
-- };
-- }
--
-- // ------------------------------------------------------------------------
-- // composite accessors
-- /**
-- * Sets the value of the specified index register if and only if the specified
-- * value is greater than the current value in the register. This is equivalent
-- * to but much more performant than:
-- *
-- * vector.setRegister(index, Math.max(vector.getRegister(index), value));
-- *
-- * @param registerIndex the index of the register whose value is to be set.
-- * This cannot be negative
-- * @param value the value to set in the register if and only if this value
-- * is greater than the current value in the register
-- * @return true
if and only if the specified value is greater
-- * than or equal to the current register value. false
-- * otherwise.
-- * @see #getRegister(long)
-- * @see #setRegister(long, long)
-- * @see java.lang.Math#max(long, long)
-- */
-- // NOTE: if this changes then setRegister() must change
-- public boolean setMaxRegister(final long registerIndex, final long value) {
-- final long bitIndex = registerIndex * registerWidth;
-- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
-- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
-- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
--
-- // NOTE: matches getRegister()
-- final long registerValue;
-- final long words[] = this.words/*for convenience/performance*/;
-- if(firstWordIndex == secondWordIndex)
-- registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask);
-- else /*register spans words*/
-- registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
-- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
--
-- // determine which is the larger and update as necessary
-- if(value > registerValue) {
-- // NOTE: matches setRegister()
-- if(firstWordIndex == secondWordIndex) {
-- // clear then set
-- words[firstWordIndex] &= ~(registerMask << bitRemainder);
-- words[firstWordIndex] |= (value << bitRemainder);
-- } else {/*register spans words*/
-- // clear then set each partial word
-- words[firstWordIndex] &= (1L << bitRemainder) - 1;
-- words[firstWordIndex] |= (value << bitRemainder);
--
-- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
-- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
-- }
-- } /* else -- the register value is greater (or equal) so nothing needs to be done */
--
-- return (value >= registerValue);
-- }
--
-- // ========================================================================
-- /**
-- * Fills this bit vector with the specified bit value. This can be used to
-- * clear the vector by specifying 0
.
-- *
-- * @param value the value to set all bits to (only the lowest bit is used)
-- */
-- public void fill(final long value) {
-- for(long i=0; inull.
-- */
-- public void getRegisterContents(final IWordSerializer serializer) {
-- for(final LongIterator iter = registerIterator(); iter.hasNext();) {
-- serializer.writeWord(iter.next());
-- }
-- }
--
-- /**
-- * Creates a deep copy of this vector.
-- *
-- * @see java.lang.Object#clone()
-- */
-- @Override
-- public BitVector clone() {
-- final BitVector copy = new BitVector(registerWidth, count);
-- System.arraycopy(words, 0, copy.words, 0, words.length);
-- return copy;
-- }
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java 2015-07-16 13:14:59.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,1071 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--import java.util.Arrays;
--
--import com.carrotsearch.hppc.IntByteOpenHashMap;
--import com.carrotsearch.hppc.LongOpenHashSet;
--import com.carrotsearch.hppc.cursors.IntByteCursor;
--import com.carrotsearch.hppc.cursors.LongCursor;
--
--/**
-- * A probabilistic set of hashed long
elements. Useful for computing
-- * the approximate cardinality of a stream of data in very small storage.
-- *
-- * A modified version of the
-- * 'HyperLogLog' data structure and algorithm is used, which combines both
-- * probabilistic and non-probabilistic techniques to improve the accuracy and
-- * storage requirements of the original algorithm.
-- *
-- * More specifically, initializing and storing a new {@link HLL} will
-- * allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}).
-- * After adding the first few values, a sorted list of unique integers is
-- * stored in a {@link HLLType#EXPLICIT} hash set. When configured, accuracy can
-- * be sacrificed for memory footprint: the values in the sorted list are
-- * "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure.
-- * Finally, when enough registers are set, the map-based HLL will be converted
-- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure.
-- *
-- * This data structure is interoperable with the implementations found at:
-- *
-- * when properly serialized.
-- */
--public class HLL implements Cloneable {
-- // minimum and maximum values for the log-base-2 of the number of registers
-- // in the HLL
-- public static final int MINIMUM_LOG2M_PARAM = 4;
-- public static final int MAXIMUM_LOG2M_PARAM = 30;
--
-- // minimum and maximum values for the register width of the HLL
-- public static final int MINIMUM_REGWIDTH_PARAM = 1;
-- public static final int MAXIMUM_REGWIDTH_PARAM = 8;
--
-- // minimum and maximum values for the 'expthresh' parameter of the
-- // constructor that is meant to match the PostgreSQL implementation's
-- // constructor and parameter names
-- public static final int MINIMUM_EXPTHRESH_PARAM = -1;
-- public static final int MAXIMUM_EXPTHRESH_PARAM = 18;
-- public static final int MAXIMUM_EXPLICIT_THRESHOLD = (1 << (MAXIMUM_EXPTHRESH_PARAM - 1)/*per storage spec*/);
--
-- // ************************************************************************
-- // Storage
-- // storage used when #type is EXPLICIT, null otherwise
-- LongOpenHashSet explicitStorage;
-- // storage used when #type is SPARSE, null otherwise
-- IntByteOpenHashMap sparseProbabilisticStorage;
-- // storage used when #type is FULL, null otherwise
-- BitVector probabilisticStorage;
--
-- // current type of this HLL instance, if this changes then so should the
-- // storage used (see above)
-- private HLLType type;
--
-- // ------------------------------------------------------------------------
-- // Characteristic parameters
-- // NOTE: These members are named to match the PostgreSQL implementation's
-- // parameters.
-- // log2(the number of probabilistic HLL registers)
-- private final int log2m;
-- // the size (width) each register in bits
-- private final int regwidth;
--
-- // ------------------------------------------------------------------------
-- // Computed constants
-- // ........................................................................
-- // EXPLICIT-specific constants
-- // flag indicating if the EXPLICIT representation should NOT be used
-- private final boolean explicitOff;
-- // flag indicating that the promotion threshold from EXPLICIT should be
-- // computed automatically
-- // NOTE: this only has meaning when 'explicitOff' is false
-- private final boolean explicitAuto;
-- // threshold (in element count) at which a EXPLICIT HLL is converted to a
-- // SPARSE or FULL HLL, always greater than or equal to zero and always a
-- // power of two OR simply zero
-- // NOTE: this only has meaning when 'explicitOff' is false
-- private final int explicitThreshold;
--
-- // ........................................................................
-- // SPARSE-specific constants
-- // the computed width of the short words
-- private final int shortWordLength;
-- // flag indicating if the SPARSE representation should not be used
-- private final boolean sparseOff;
-- // threshold (in register count) at which a SPARSE HLL is converted to a
-- // FULL HLL, always greater than zero
-- private final int sparseThreshold;
--
-- // ........................................................................
-- // Probabilistic algorithm constants
-- // the number of registers, will always be a power of 2
-- private final int m;
-- // a mask of the log2m bits set to one and the rest to zero
-- private final int mBitsMask;
-- // a mask as wide as a register (see #fromBytes())
-- private final int valueMask;
-- // mask used to ensure that p(w) does not overflow register (see #Constructor() and #addRaw())
-- private final long pwMaxMask;
-- // alpha * m^2 (the constant in the "'raw' HyperLogLog estimator")
-- private final double alphaMSquared;
-- // the cutoff value of the estimator for using the "small" range cardinality
-- // correction formula
-- private final double smallEstimatorCutoff;
-- // the cutoff value of the estimator for using the "large" range cardinality
-- // correction formula
-- private final double largeEstimatorCutoff;
--
-- // ========================================================================
-- /**
-- * NOTE: Arguments here are named and structured identically to those in the
-- * PostgreSQL implementation, which can be found
-- * here.
-- *
-- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
-- * algorithm. Must be at least 4 and at most 30.
-- * @param regwidth number of bits used per register in the HyperLogLog
-- * algorithm. Must be at least 1 and at most 8.
-- * @param expthresh tunes when the {@link HLLType#EXPLICIT} to
-- * {@link HLLType#SPARSE} promotion occurs,
-- * based on the set's cardinality. Must be at least -1 and at most 18.
-- * @param sparseon Flag indicating if the {@link HLLType#SPARSE}
-- * representation should be used.
-- * @param type the type in the promotion hierarchy which this instance should
-- * start at. This cannot be null
.
-- */
-- public HLL(final int log2m, final int regwidth, final int expthresh, final boolean sparseon, final HLLType type) {
-- this.log2m = log2m;
-- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
-- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
-- }
--
-- this.regwidth = regwidth;
-- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
-- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
-- }
--
-- this.m = (1 << log2m);
-- this.mBitsMask = m - 1;
-- this.valueMask = (1 << regwidth) - 1;
-- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
-- this.alphaMSquared = HLLUtil.alphaMSquared(m);
-- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
-- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
--
-- if(expthresh == -1) {
-- this.explicitAuto = true;
-- this.explicitOff = false;
--
-- // NOTE: This math matches the size calculation in the PostgreSQL impl.
-- final long fullRepresentationSize = (this.regwidth * (long)this.m + 7/*round up to next whole byte*/)/Byte.SIZE;
-- final int numLongs = (int)(fullRepresentationSize / 8/*integer division to round down*/);
--
-- if(numLongs > MAXIMUM_EXPLICIT_THRESHOLD) {
-- this.explicitThreshold = MAXIMUM_EXPLICIT_THRESHOLD;
-- } else {
-- this.explicitThreshold = numLongs;
-- }
-- } else if(expthresh == 0) {
-- this.explicitAuto = false;
-- this.explicitOff = true;
-- this.explicitThreshold = 0;
-- } else if((expthresh > 0) && (expthresh <= MAXIMUM_EXPTHRESH_PARAM)){
-- this.explicitAuto = false;
-- this.explicitOff = false;
-- this.explicitThreshold = (1 << (expthresh - 1));
-- } else {
-- throw new IllegalArgumentException("'expthresh' must be at least " + MINIMUM_EXPTHRESH_PARAM + " and at most " + MAXIMUM_EXPTHRESH_PARAM + " (was: " + expthresh + ")");
-- }
--
-- this.shortWordLength = (regwidth + log2m);
-- this.sparseOff = !sparseon;
-- if(this.sparseOff) {
-- this.sparseThreshold = 0;
-- } else {
-- // TODO improve this cutoff to include the cost overhead of Java
-- // members/objects
-- final int largestPow2LessThanCutoff =
-- (int)NumberUtil.log2((this.m * this.regwidth) / this.shortWordLength);
-- this.sparseThreshold = (1 << largestPow2LessThanCutoff);
-- }
--
-- initializeStorage(type);
-- }
--
-- /**
-- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}.
-- *
-- * This is equivalent to calling HLL(log2m, regwidth, -1, true, HLLType.EMPTY)
.
-- *
-- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
-- * algorithm. Must be at least 4 and at most 30.
-- * @param regwidth number of bits used per register in the HyperLogLog
-- * algorithm. Must be at least 1 and at most 8.
-- *
-- * @see #HLL(int, int, int, boolean, HLLType)
-- */
-- public HLL(final int log2m, final int regwidth) {
-- this(log2m, regwidth, -1, true, HLLType.EMPTY);
-- }
--
-- // -------------------------------------------------------------------------
-- /**
-- * Convenience constructor for testing. Assumes that both {@link HLLType#EXPLICIT}
-- * and {@link HLLType#SPARSE} representations should be enabled.
-- *
-- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
-- * algorithm. Must be at least 4 and at most 30.
-- * @param regwidth number of bits used per register in the HyperLogLog
-- * algorithm. Must be at least 1 and at most 8.
-- * @param explicitThreshold cardinality threshold at which the {@link HLLType#EXPLICIT}
-- * representation should be promoted to {@link HLLType#SPARSE}.
-- * This must be greater than zero and less than or equal to {@value #MAXIMUM_EXPLICIT_THRESHOLD}.
-- * @param sparseThreshold register count threshold at which the {@link HLLType#SPARSE}
-- * representation should be promoted to {@link HLLType#FULL}.
-- * This must be greater than zero.
-- * @param type the type in the promotion hierarchy which this instance should
-- * start at. This cannot be null
.
-- */
-- /*package, for testing*/ HLL(final int log2m, final int regwidth, final int explicitThreshold, final int sparseThreshold, final HLLType type) {
-- this.log2m = log2m;
-- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
-- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
-- }
--
-- this.regwidth = regwidth;
-- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
-- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
-- }
--
-- this.m = (1 << log2m);
-- this.mBitsMask = m - 1;
-- this.valueMask = (1 << regwidth) - 1;
-- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
-- this.alphaMSquared = HLLUtil.alphaMSquared(m);
-- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
-- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
--
-- this.explicitAuto = false;
-- this.explicitOff = false;
-- this.explicitThreshold = explicitThreshold;
-- if((explicitThreshold < 1) || (explicitThreshold > MAXIMUM_EXPLICIT_THRESHOLD)) {
-- throw new IllegalArgumentException("'explicitThreshold' must be at least 1 and at most " + MAXIMUM_EXPLICIT_THRESHOLD + " (was: " + explicitThreshold + ")");
-- }
--
-- this.shortWordLength = (regwidth + log2m);
-- this.sparseOff = false;
-- this.sparseThreshold = sparseThreshold;
--
-- initializeStorage(type);
-- }
--
-- /**
-- * @return the type in the promotion hierarchy of this instance. This will
-- * never be null
.
-- */
-- public HLLType getType() { return type; }
--
-- // ========================================================================
-- // Add
-- /**
-- * Adds rawValue
directly to the HLL.
-- *
-- * @param rawValue the value to be added. It is very important that this
-- * value already be hashed with a strong (but not
-- * necessarily cryptographic) hash function. For instance, the
-- * Murmur3 implementation in
-- *
-- * Google's Guava library is an excellent hash function for this
-- * purpose and, for seeds greater than zero, matches the output
-- * of the hash provided in the PostgreSQL implementation.
-- */
-- public void addRaw(final long rawValue) {
-- switch(type) {
-- case EMPTY: {
-- // NOTE: EMPTY type is always promoted on #addRaw()
-- if(explicitThreshold > 0) {
-- initializeStorage(HLLType.EXPLICIT);
-- explicitStorage.add(rawValue);
-- } else if(!sparseOff) {
-- initializeStorage(HLLType.SPARSE);
-- addRawSparseProbabilistic(rawValue);
-- } else {
-- initializeStorage(HLLType.FULL);
-- addRawProbabilistic(rawValue);
-- }
-- return;
-- }
-- case EXPLICIT: {
-- explicitStorage.add(rawValue);
--
-- // promotion, if necessary
-- if(explicitStorage.size() > explicitThreshold) {
-- if(!sparseOff) {
-- initializeStorage(HLLType.SPARSE);
-- for (LongCursor c : explicitStorage) {
-- addRawSparseProbabilistic(c.value);
-- }
-- } else {
-- initializeStorage(HLLType.FULL);
-- for (LongCursor c : explicitStorage) {
-- addRawProbabilistic(c.value);
-- }
-- }
-- explicitStorage = null;
-- }
-- return;
-- }
-- case SPARSE: {
-- addRawSparseProbabilistic(rawValue);
--
-- // promotion, if necessary
-- if(sparseProbabilisticStorage.size() > sparseThreshold) {
-- initializeStorage(HLLType.FULL);
-- for(IntByteCursor c : sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- sparseProbabilisticStorage = null;
-- }
-- return;
-- }
-- case FULL:
-- addRawProbabilistic(rawValue);
-- return;
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
-- }
--
-- // ------------------------------------------------------------------------
-- // #addRaw(..) helpers
-- /**
-- * Adds the raw value to the {@link #sparseProbabilisticStorage}.
-- * {@link #type} must be {@link HLLType#SPARSE}.
-- *
-- * @param rawValue the raw value to add to the sparse storage.
-- */
-- private void addRawSparseProbabilistic(final long rawValue) {
-- // p(w): position of the least significant set bit (one-indexed)
-- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
-- //
-- // By construction of pwMaxMask (see #Constructor()),
-- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
-- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
-- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
-- final long substreamValue = (rawValue >>> log2m);
-- final byte p_w;
--
-- if(substreamValue == 0L) {
-- // The paper does not cover p(0x0), so the special value 0 is used.
-- // 0 is the original initialization value of the registers, so by
-- // doing this the multiset simply ignores it. This is acceptable
-- // because the probability is 1/(2^(2^registerSizeInBits)).
-- p_w = 0;
-- } else {
-- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
-- }
--
-- // Short-circuit if the register is being set to zero, since algorithmically
-- // this corresponds to an "unset" register, and "unset" registers aren't
-- // stored to save memory. (The very reason this sparse implementation
-- // exists.) If a register is set to zero it will break the #algorithmCardinality
-- // code.
-- if(p_w == 0) {
-- return;
-- }
--
-- // NOTE: no +1 as in paper since 0-based indexing
-- final int j = (int)(rawValue & mBitsMask);
--
-- final byte currentValue;
-- if (sparseProbabilisticStorage.containsKey(j)) {
-- currentValue = sparseProbabilisticStorage.lget();
-- } else {
-- currentValue = 0;
-- }
--
-- if(p_w > currentValue) {
-- sparseProbabilisticStorage.put(j, p_w);
-- }
-- }
--
-- /**
-- * Adds the raw value to the {@link #probabilisticStorage}.
-- * {@link #type} must be {@link HLLType#FULL}.
-- *
-- * @param rawValue the raw value to add to the full probabilistic storage.
-- */
-- private void addRawProbabilistic(final long rawValue) {
-- // p(w): position of the least significant set bit (one-indexed)
-- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
-- //
-- // By construction of pwMaxMask (see #Constructor()),
-- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
-- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
-- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
-- final long substreamValue = (rawValue >>> log2m);
-- final byte p_w;
--
-- if (substreamValue == 0L) {
-- // The paper does not cover p(0x0), so the special value 0 is used.
-- // 0 is the original initialization value of the registers, so by
-- // doing this the multiset simply ignores it. This is acceptable
-- // because the probability is 1/(2^(2^registerSizeInBits)).
-- p_w = 0;
-- } else {
-- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
-- }
--
-- // Short-circuit if the register is being set to zero, since algorithmically
-- // this corresponds to an "unset" register, and "unset" registers aren't
-- // stored to save memory. (The very reason this sparse implementation
-- // exists.) If a register is set to zero it will break the #algorithmCardinality
-- // code.
-- if(p_w == 0) {
-- return;
-- }
--
-- // NOTE: no +1 as in paper since 0-based indexing
-- final int j = (int)(rawValue & mBitsMask);
--
-- probabilisticStorage.setMaxRegister(j, p_w);
-- }
--
-- // ------------------------------------------------------------------------
-- // Storage helper
-- /**
-- * Initializes storage for the specified {@link HLLType} and changes the
-- * instance's {@link #type}.
-- *
-- * @param type the {@link HLLType} to initialize storage for. This cannot be
-- * null
and must be an instantiable type.
-- */
-- private void initializeStorage(final HLLType type) {
-- this.type = type;
-- switch(type) {
-- case EMPTY:
-- // nothing to be done
-- break;
-- case EXPLICIT:
-- this.explicitStorage = new LongOpenHashSet();
-- break;
-- case SPARSE:
-- this.sparseProbabilisticStorage = new IntByteOpenHashMap();
-- break;
-- case FULL:
-- this.probabilisticStorage = new BitVector(regwidth, m);
-- break;
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
-- }
--
-- // ========================================================================
-- // Cardinality
-- /**
-- * Computes the cardinality of the HLL.
-- *
-- * @return the cardinality of HLL. This will never be negative.
-- */
-- public long cardinality() {
-- switch(type) {
-- case EMPTY:
-- return 0/*by definition*/;
-- case EXPLICIT:
-- return explicitStorage.size();
-- case SPARSE:
-- return (long)Math.ceil(sparseProbabilisticAlgorithmCardinality());
-- case FULL:
-- return (long)Math.ceil(fullProbabilisticAlgorithmCardinality());
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
-- }
--
-- // ------------------------------------------------------------------------
-- // Cardinality helpers
-- /**
-- * Computes the exact cardinality value returned by the HLL algorithm when
-- * represented as a {@link HLLType#SPARSE} HLL. Kept
-- * separate from {@link #cardinality()} for testing purposes. {@link #type}
-- * must be {@link HLLType#SPARSE}.
-- *
-- * @return the exact, unrounded cardinality given by the HLL algorithm
-- */
-- /*package, for testing*/ double sparseProbabilisticAlgorithmCardinality() {
-- final int m = this.m/*for performance*/;
--
-- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
-- // 'j'th register value
-- double sum = 0;
-- int numberOfZeroes = 0/*"V" in the paper*/;
-- for(int j=0; jclear does NOT handle
-- * transitions between {@link HLLType}s - a probabilistic type will remain
-- * probabilistic after being cleared.
-- */
-- public void clear() {
-- switch(type) {
-- case EMPTY:
-- return /*do nothing*/;
-- case EXPLICIT:
-- explicitStorage.clear();
-- return;
-- case SPARSE:
-- sparseProbabilisticStorage.clear();
-- return;
-- case FULL:
-- probabilisticStorage.fill(0);
-- return;
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
-- }
--
-- // ========================================================================
-- // Union
-- /**
-- * Computes the union of HLLs and stores the result in this instance.
-- *
-- * @param other the other {@link HLL} instance to union into this one. This
-- * cannot be null
.
-- */
-- public void union(final HLL other) {
-- // TODO: verify HLLs are compatible
-- final HLLType otherType = other.getType();
--
-- if(type.equals(otherType)) {
-- homogeneousUnion(other);
-- return;
-- } else {
-- heterogenousUnion(other);
-- return;
-- }
-- }
--
-- // ------------------------------------------------------------------------
-- // Union helpers
-- /**
-- * Computes the union of two HLLs, of different types, and stores the
-- * result in this instance.
-- *
-- * @param other the other {@link HLL} instance to union into this one. This
-- * cannot be null
.
-- */
-- /*package, for testing*/ void heterogenousUnion(final HLL other) {
-- /*
-- * The logic here is divided into two sections: unions with an EMPTY
-- * HLL, and unions between EXPLICIT/SPARSE/FULL
-- * HLL.
-- *
-- * Between those two sections, all possible heterogeneous unions are
-- * covered. Should another type be added to HLLType whose unions
-- * are not easily reduced (say, as EMPTY's are below) this may be more
-- * easily implemented as Strategies. However, that is unnecessary as it
-- * stands.
-- */
--
-- // ....................................................................
-- // Union with an EMPTY
-- if(HLLType.EMPTY.equals(type)) {
-- // NOTE: The union of empty with non-empty HLL is just a
-- // clone of the non-empty.
--
-- switch(other.getType()) {
-- case EXPLICIT: {
-- // src: EXPLICIT
-- // dest: EMPTY
--
-- if(other.explicitStorage.size() <= explicitThreshold) {
-- type = HLLType.EXPLICIT;
-- explicitStorage = other.explicitStorage.clone();
-- } else {
-- if(!sparseOff) {
-- initializeStorage(HLLType.SPARSE);
-- } else {
-- initializeStorage(HLLType.FULL);
-- }
-- for(LongCursor c : other.explicitStorage) {
-- addRaw(c.value);
-- }
-- }
-- return;
-- }
-- case SPARSE: {
-- // src: SPARSE
-- // dest: EMPTY
--
-- if(!sparseOff) {
-- type = HLLType.SPARSE;
-- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
-- } else {
-- initializeStorage(HLLType.FULL);
-- for(IntByteCursor c : other.sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- }
-- return;
-- }
-- default/*case FULL*/: {
-- // src: FULL
-- // dest: EMPTY
--
-- type = HLLType.FULL;
-- probabilisticStorage = other.probabilisticStorage.clone();
-- return;
-- }
-- }
-- } else if (HLLType.EMPTY.equals(other.getType())) {
-- // source is empty, so just return destination since it is unchanged
-- return;
-- } /* else -- both of the sets are not empty */
--
-- // ....................................................................
-- // NOTE: Since EMPTY is handled above, the HLLs are non-EMPTY below
-- switch(type) {
-- case EXPLICIT: {
-- // src: FULL/SPARSE
-- // dest: EXPLICIT
-- // "Storing into destination" cannot be done (since destination
-- // is by definition of smaller capacity than source), so a clone
-- // of source is made and values from destination are inserted
-- // into that.
--
-- // Determine source and destination storage.
-- // NOTE: destination storage may change through promotion if
-- // source is SPARSE.
-- if(HLLType.SPARSE.equals(other.getType())) {
-- if(!sparseOff) {
-- type = HLLType.SPARSE;
-- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
-- } else {
-- initializeStorage(HLLType.FULL);
-- for(IntByteCursor c : other.sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- }
-- } else /*source is HLLType.FULL*/ {
-- type = HLLType.FULL;
-- probabilisticStorage = other.probabilisticStorage.clone();
-- }
-- for(LongCursor c : explicitStorage) {
-- addRaw(c.value);
-- }
-- explicitStorage = null;
-- return;
-- }
-- case SPARSE: {
-- if(HLLType.EXPLICIT.equals(other.getType())) {
-- // src: EXPLICIT
-- // dest: SPARSE
-- // Add the raw values from the source to the destination.
--
-- for(LongCursor c : other.explicitStorage) {
-- addRaw(c.value);
-- }
-- // NOTE: addRaw will handle promotion cleanup
-- } else /*source is HLLType.FULL*/ {
-- // src: FULL
-- // dest: SPARSE
-- // "Storing into destination" cannot be done (since destination
-- // is by definition of smaller capacity than source), so a
-- // clone of source is made and registers from the destination
-- // are merged into the clone.
--
-- type = HLLType.FULL;
-- probabilisticStorage = other.probabilisticStorage.clone();
-- for(IntByteCursor c : sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- sparseProbabilisticStorage = null;
-- }
-- return;
-- }
-- default/*destination is HLLType.FULL*/: {
-- if(HLLType.EXPLICIT.equals(other.getType())) {
-- // src: EXPLICIT
-- // dest: FULL
-- // Add the raw values from the source to the destination.
-- // Promotion is not possible, so don't bother checking.
--
-- for(LongCursor c : other.explicitStorage) {
-- addRaw(c.value);
-- }
-- } else /*source is HLLType.SPARSE*/ {
-- // src: SPARSE
-- // dest: FULL
-- // Merge the registers from the source into the destination.
-- // Promotion is not possible, so don't bother checking.
--
-- for(IntByteCursor c : other.sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- }
-- }
-- }
-- }
--
-- /**
-- * Computes the union of two HLLs of the same type, and stores the
-- * result in this instance.
-- *
-- * @param other the other {@link HLL} instance to union into this one. This
-- * cannot be null
.
-- */
-- private void homogeneousUnion(final HLL other) {
-- switch(type) {
-- case EMPTY:
-- // union of empty and empty is empty
-- return;
-- case EXPLICIT:
-- for(LongCursor c : other.explicitStorage) {
-- addRaw(c.value);
-- }
-- // NOTE: #addRaw() will handle promotion, if necessary
-- return;
-- case SPARSE:
-- for(IntByteCursor c : other.sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- final byte currentRegisterValue = sparseProbabilisticStorage.get(registerIndex);
-- if(registerValue > currentRegisterValue) {
-- sparseProbabilisticStorage.put(registerIndex, registerValue);
-- }
-- }
--
-- // promotion, if necessary
-- if(sparseProbabilisticStorage.size() > sparseThreshold) {
-- initializeStorage(HLLType.FULL);
-- for(IntByteCursor c : sparseProbabilisticStorage) {
-- final int registerIndex = c.key;
-- final byte registerValue = c.value;
-- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
-- }
-- sparseProbabilisticStorage = null;
-- }
-- return;
-- case FULL:
-- for(int i=0; inull or empty.
-- */
-- public byte[] toBytes() {
-- return toBytes(SerializationUtil.DEFAULT_SCHEMA_VERSION);
-- }
--
-- /**
-- * Serializes the HLL to an array of bytes in correspondence with the format
-- * of the specified schema version.
-- *
-- * @param schemaVersion the schema version dictating the serialization format
-- * @return the array of bytes representing the HLL. This will never be
-- * null
or empty.
-- */
-- public byte[] toBytes(final ISchemaVersion schemaVersion) {
-- final byte[] bytes;
-- switch(type) {
-- case EMPTY:
-- bytes = new byte[schemaVersion.paddingBytes(type)];
-- break;
-- case EXPLICIT: {
-- final IWordSerializer serializer =
-- schemaVersion.getSerializer(type, Long.SIZE, explicitStorage.size());
--
-- final long[] values = explicitStorage.toArray();
-- Arrays.sort(values);
-- for(final long value : values) {
-- serializer.writeWord(value);
-- }
--
-- bytes = serializer.getBytes();
-- break;
-- }
-- case SPARSE: {
-- final IWordSerializer serializer =
-- schemaVersion.getSerializer(type, shortWordLength, sparseProbabilisticStorage.size());
--
-- final int[] indices = sparseProbabilisticStorage.keys().toArray();
-- Arrays.sort(indices);
-- for(final int registerIndex : indices) {
-- assert sparseProbabilisticStorage.containsKey(registerIndex);
-- final long registerValue = sparseProbabilisticStorage.get(registerIndex);
-- // pack index and value into "short word"
-- final long shortWord = ((registerIndex << regwidth) | registerValue);
-- serializer.writeWord(shortWord);
-- }
--
-- bytes = serializer.getBytes();
-- break;
-- }
-- case FULL: {
-- final IWordSerializer serializer = schemaVersion.getSerializer(type, regwidth, m);
-- probabilisticStorage.getRegisterContents(serializer);
--
-- bytes = serializer.getBytes();
-- break;
-- }
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
--
-- final IHLLMetadata metadata = new HLLMetadata(schemaVersion.schemaVersionNumber(),
-- type,
-- log2m,
-- regwidth,
-- (int)NumberUtil.log2(explicitThreshold),
-- explicitOff,
-- explicitAuto,
-- !sparseOff);
-- schemaVersion.writeMetadata(bytes, metadata);
--
-- return bytes;
-- }
--
-- /**
-- * Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized
-- * into bytes
.
-- *
-- * @param bytes the serialized bytes of new HLL
-- * @return the deserialized HLL. This will never be null
.
-- *
-- * @see #toBytes(ISchemaVersion)
-- */
-- public static HLL fromBytes(final byte[] bytes) {
-- final ISchemaVersion schemaVersion = SerializationUtil.getSchemaVersion(bytes);
-- final IHLLMetadata metadata = schemaVersion.readMetadata(bytes);
--
-- final HLLType type = metadata.HLLType();
-- final int regwidth = metadata.registerWidth();
-- final int log2m = metadata.registerCountLog2();
-- final boolean sparseon = metadata.sparseEnabled();
--
-- final int expthresh;
-- if(metadata.explicitAuto()) {
-- expthresh = -1;
-- } else if(metadata.explicitOff()) {
-- expthresh = 0;
-- } else {
-- // NOTE: take into account that the postgres-compatible constructor
-- // subtracts one before taking a power of two.
-- expthresh = metadata.log2ExplicitCutoff() + 1;
-- }
--
-- final HLL hll = new HLL(log2m, regwidth, expthresh, sparseon, type);
--
-- // Short-circuit on empty, which needs no other deserialization.
-- if(HLLType.EMPTY.equals(type)) {
-- return hll;
-- }
--
-- final int wordLength;
-- switch(type) {
-- case EXPLICIT:
-- wordLength = Long.SIZE;
-- break;
-- case SPARSE:
-- wordLength = hll.shortWordLength;
-- break;
-- case FULL:
-- wordLength = hll.regwidth;
-- break;
-- default:
-- throw new RuntimeException("Unsupported HLL type " + type);
-- }
--
-- final IWordDeserializer deserializer =
-- schemaVersion.getDeserializer(type, wordLength, bytes);
-- switch(type) {
-- case EXPLICIT:
-- // NOTE: This should not exceed expthresh and this will always
-- // be exactly the number of words that were encoded,
-- // because the word length is at least a byte wide.
-- // SEE: IWordDeserializer#totalWordCount()
-- for(int i=0; i>> hll.regwidth), registerValue);
-- }
-- }
-- break;
-- case FULL:
-- // NOTE: Iteration is done using m (register count) and NOT
-- // deserializer#totalWordCount() because regwidth may be
-- // less than 8 and as such the padding on the 'last' byte
-- // may be larger than regwidth, causing an extra register
-- // to be read.
-- // SEE: IWordDeserializer#totalWordCount()
-- for(long i=0; inull.
-- * @param registerCountLog2 the log-base-2 register count parameter for
-- * probabilistic HLLs. This must be greater than or equal to zero.
-- * @param registerWidth the register width parameter for probabilistic
-- * HLLs. This must be greater than or equal to zero.
-- * @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff,
-- * if it is explicitly defined. (If explicitOff
or
-- * explicitAuto
is true
then this has no
-- * meaning.)
-- * @param explicitOff the flag for 'explicit off'-mode, where the
-- * {@link HLLType#EXPLICIT} representation is not used. Both this and
-- * explicitAuto
cannot be true
at the same
-- * time.
-- * @param explicitAuto the flag for 'explicit auto'-mode, where the
-- * {@link HLLType#EXPLICIT} representation's promotion cutoff is
-- * determined based on in-memory size automatically. Both this and
-- * explicitOff
cannot be true
at the same
-- * time.
-- * @param sparseEnabled the flag for 'sparse-enabled'-mode, where the
-- * {@link HLLType#SPARSE} representation is used.
-- */
-- public HLLMetadata(final int schemaVersion,
-- final HLLType type,
-- final int registerCountLog2,
-- final int registerWidth,
-- final int log2ExplicitCutoff,
-- final boolean explicitOff,
-- final boolean explicitAuto,
-- final boolean sparseEnabled) {
-- this.schemaVersion = schemaVersion;
-- this.type = type;
-- this.registerCountLog2 = registerCountLog2;
-- this.registerWidth = registerWidth;
-- this.log2ExplicitCutoff = log2ExplicitCutoff;
-- this.explicitOff = explicitOff;
-- this.explicitAuto = explicitAuto;
-- this.sparseEnabled = sparseEnabled;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion()
-- */
-- @Override
-- public int schemaVersion() { return schemaVersion; }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#HLLType()
-- */
-- @Override
-- public HLLType HLLType() { return type; }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2()
-- */
-- @Override
-- public int registerCountLog2() { return registerCountLog2; }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#registerWidth()
-- */
-- @Override
-- public int registerWidth() { return registerWidth; }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
-- */
-- @Override
-- public int log2ExplicitCutoff() { return log2ExplicitCutoff; }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#explicitOff()
-- */
-- @Override
-- public boolean explicitOff() {
-- return explicitOff;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto()
-- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
-- */
-- @Override
-- public boolean explicitAuto() {
-- return explicitAuto;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled()
-- */
-- @Override
-- public boolean sparseEnabled() { return sparseEnabled; }
--
-- /* (non-Javadoc)
-- * @see java.lang.Object#toString()
-- */
-- @Override
-- public String toString() {
-- return "";
-- }
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,29 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * The types of algorithm/data structure that {@link HLL} can utilize. For more
-- * information, see the Javadoc for {@link HLL}.
-- */
--public enum HLLType {
-- EMPTY,
-- EXPLICIT,
-- SPARSE,
-- FULL;
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,199 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * Static functions for computing constants and parameters used in the HLL
-- * algorithm.
-- */
--final class HLLUtil {
-- /**
-- * Precomputed pwMaxMask
values indexed by registerSizeInBits
.
-- * Calculated with this formula:
-- *
-- * int maxRegisterValue = (1 << registerSizeInBits) - 1;
-- * // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
-- * return ~((1L << (maxRegisterValue - 1)) - 1);
-- *
-- *
-- * @see #pwMaxMask(int)
-- */
-- private static final long[] PW_MASK = {
-- ~((1L << (((1 << 0) - 1) - 1)) - 1),
-- ~((1L << (((1 << 1) - 1) - 1)) - 1),
-- ~((1L << (((1 << 2) - 1) - 1)) - 1),
-- ~((1L << (((1 << 3) - 1) - 1)) - 1),
-- ~((1L << (((1 << 4) - 1) - 1)) - 1),
-- ~((1L << (((1 << 5) - 1) - 1)) - 1),
-- ~((1L << (((1 << 6) - 1) - 1)) - 1),
-- ~((1L << (((1 << 7) - 1) - 1)) - 1),
-- ~((1L << (((1 << 8) - 1) - 1)) - 1)
-- };
--
-- /**
-- * Precomputed twoToL
values indexed by a linear combination of
-- * regWidth
and log2m
.
-- *
-- * The array is one-dimensional and can be accessed by using index
-- * (REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m
-- * for regWidth
and log2m
between the specified
-- * HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM
constants.
-- *
-- * @see #largeEstimator(int, int, double)
-- * @see #largeEstimatorCutoff(int, int)
-- * @see "Blog post with section on 2^L"
-- */
-- private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)];
--
-- /**
-- * Spacing constant used to compute offsets into {@link #TWO_TO_L}.
-- */
-- private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1;
--
-- static {
-- for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) {
-- for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) {
-- int maxRegisterValue = (1 << regWidth) - 1;
--
-- // Since 1 is added to p(w) in the insertion algorithm, only
-- // (maxRegisterValue - 1) bits are inspected hence the hash
-- // space is one power of two smaller.
-- final int pwBits = (maxRegisterValue - 1);
-- final int totalBits = (pwBits + log2m);
-- final double twoToL = Math.pow(2, totalBits);
-- TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL;
-- }
-- }
-- }
--
-- // ************************************************************************
-- /**
-- * Computes the bit-width of HLL registers necessary to estimate a set of
-- * the specified cardinality.
-- *
-- * @param expectedUniqueElements an upper bound on the number of unique
-- * elements that are expected. This must be greater than zero.
-- * @return a register size in bits (i.e. log2(log2(n))
)
-- */
-- public static int registerBitSize(final long expectedUniqueElements) {
-- return Math.max(HLL.MINIMUM_REGWIDTH_PARAM,
-- (int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements))));
-- }
--
-- // ========================================================================
-- /**
-- * Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm.
-- *
-- * @param m this must be a power of two, cannot be less than
-- * 16 (24), and cannot be greater than 65536 (216).
-- * @return gamma times registerCount
squared where gamma is
-- * based on the value of registerCount
.
-- * @throws IllegalArgumentException if registerCount
is less
-- * than 16.
-- */
-- public static double alphaMSquared(final int m) {
-- switch(m) {
-- case 1/*2^0*/:
-- case 2/*2^1*/:
-- case 4/*2^2*/:
-- case 8/*2^3*/:
-- throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16).");
--
-- case 16/*2^4*/:
-- return 0.673 * m * m;
--
-- case 32/*2^5*/:
-- return 0.697 * m * m;
--
-- case 64/*2^6*/:
-- return 0.709 * m * m;
--
-- default/*>2^6*/:
-- return (0.7213 / (1.0 + 1.079 / m)) * m * m;
-- }
-- }
--
-- // ========================================================================
-- /**
-- * Computes a mask that prevents overflow of HyperLogLog registers.
-- *
-- * @param registerSizeInBits the size of the HLL registers, in bits.
-- * @return mask a long
mask to prevent overflow of the registers
-- * @see #registerBitSize(long)
-- */
-- public static long pwMaxMask(final int registerSizeInBits) {
-- return PW_MASK[registerSizeInBits];
-- }
--
-- // ========================================================================
-- /**
-- * The cutoff for using the "small range correction" formula, in the
-- * HyperLogLog algorithm.
-- *
-- * @param m the number of registers in the HLL. m in the paper.
-- * @return the cutoff for the small range correction.
-- * @see #smallEstimator(int, int)
-- */
-- public static double smallEstimatorCutoff(final int m) {
-- return ((double)m * 5) / 2;
-- }
--
-- /**
-- * The "small range correction" formula from the HyperLogLog algorithm. Only
-- * appropriate if both the estimator is smaller than (5/2) * m
and
-- * there are still registers that have the zero value.
-- *
-- * @param m the number of registers in the HLL. m in the paper.
-- * @param numberOfZeroes the number of registers with value zero. V
-- * in the paper.
-- * @return a corrected cardinality estimate.
-- */
-- public static double smallEstimator(final int m, final int numberOfZeroes) {
-- return m * Math.log((double)m / numberOfZeroes);
-- }
--
-- /**
-- * The cutoff for using the "large range correction" formula, from the
-- * HyperLogLog algorithm, adapted for 64 bit hashes.
-- *
-- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
-- * @param registerSizeInBits the size of the HLL registers, in bits.
-- * @return the cutoff for the large range correction.
-- * @see #largeEstimator(int, int, double)
-- * @see "Blog post with section on 64 bit hashes and 'large range correction' cutoff"
-- */
-- public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) {
-- return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0;
-- }
--
-- /**
-- * The "large range correction" formula from the HyperLogLog algorithm, adapted
-- * for 64 bit hashes. Only appropriate for estimators whose value exceeds
-- * the return of {@link #largeEstimatorCutoff(int, int)}.
-- *
-- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
-- * @param registerSizeInBits the size of the HLL registers, in bits.
-- * @param estimator the original estimator ("E" in the paper).
-- * @return a corrected cardinality estimate.
-- * @see "Blog post with section on 64 bit hashes and 'large range correction'"
-- */
-- public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) {
-- final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m];
-- return -1 * twoToL * Math.log(1.0 - (estimator/twoToL));
-- }
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,71 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * The metadata and parameters associated with a HLL.
-- */
--interface IHLLMetadata {
-- /**
-- * @return the schema version of the HLL. This will never be null
.
-- */
-- int schemaVersion();
--
-- /**
-- * @return the type of the HLL. This will never be null
.
-- */
-- HLLType HLLType();
--
-- /**
-- * @return the log-base-2 of the register count parameter of the HLL. This
-- * will always be greater than or equal to 4 and less than or equal
-- * to 31.
-- */
-- int registerCountLog2();
--
-- /**
-- * @return the register width parameter of the HLL. This will always be
-- * greater than or equal to 1 and less than or equal to 8.
-- */
-- int registerWidth();
--
-- /**
-- * @return the log-base-2 of the explicit cutoff cardinality. This will always
-- * be greater than or equal to zero and less than 31, per the specification.
-- */
-- int log2ExplicitCutoff();
--
-- /**
-- * @return true
if the {@link HLLType#EXPLICIT} representation
-- * has been disabled. false
otherwise.
-- */
-- boolean explicitOff();
--
-- /**
-- * @return true
if the {@link HLLType#EXPLICIT} representation
-- * cutoff cardinality is set to be automatically chosen,
-- * false
otherwise.
-- */
-- boolean explicitAuto();
--
-- /**
-- * @return true
if the {@link HLLType#SPARSE} representation
-- * is enabled.
-- */
-- boolean sparseEnabled();
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 2015-07-16 13:22:50.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,85 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A serialization schema for HLLs. Reads and writes HLL metadata to
-- * and from byte[]
representations.
-- */
--interface ISchemaVersion {
-- /**
-- * The number of metadata bytes required for a serialized HLL of the
-- * specified type.
-- *
-- * @param type the type of the serialized HLL
-- * @return the number of padding bytes needed in order to fully accommodate
-- * the needed metadata.
-- */
-- int paddingBytes(HLLType type);
--
-- /**
-- * Writes metadata bytes to serialized HLL.
-- *
-- * @param bytes the padded data bytes of the HLL
-- * @param metadata the metadata to write to the padding bytes
-- */
-- void writeMetadata(byte[] bytes, IHLLMetadata metadata);
--
-- /**
-- * Reads the metadata bytes of the serialized HLL.
-- *
-- * @param bytes the serialized HLL
-- * @return the HLL metadata
-- */
-- IHLLMetadata readMetadata(byte[] bytes);
--
-- /**
-- * Builds an HLL serializer that matches this schema version.
-- *
-- * @param type the HLL type that will be serialized. This cannot be
-- * null
.
-- * @param wordLength the length of the 'words' that comprise the data of the
-- * HLL. Words must be at least 5 bits and at most 64 bits long.
-- * @param wordCount the number of 'words' in the HLL's data.
-- * @return a byte array serializer used to serialize a HLL according
-- * to this schema version's specification.
-- * @see #paddingBytes(HLLType)
-- * @see IWordSerializer
-- */
-- IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount);
--
-- /**
-- * Builds an HLL deserializer that matches this schema version.
-- *
-- * @param type the HLL type that will be deserialized. This cannot be
-- * null
.
-- * @param wordLength the length of the 'words' that comprise the data of the
-- * serialized HLL. Words must be at least 5 bits and at most 64
-- * bits long.
-- * @param bytes the serialized HLL to deserialize. This cannot be
-- * null
.
-- * @return a byte array deserializer used to deserialize a HLL serialized
-- * according to this schema version's specification.
-- */
-- IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes);
--
-- /**
-- * @return the schema version number.
-- */
-- int schemaVersionNumber();
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 2015-07-16 13:14:59.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,41 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * Reads 'words' of a fixed width, in sequence, from a byte array.
-- */
--public interface IWordDeserializer {
-- /**
-- * @return the next word in the sequence. Should not be called more than
-- * {@link #totalWordCount()} times.
-- */
-- long readWord();
--
-- /**
-- * Returns the number of words that could be encoded in the sequence.
-- *
-- * NOTE: the sequence that was encoded may be shorter than the value this
-- * method returns due to padding issues within bytes. This guarantees
-- * only an upper bound on the number of times {@link #readWord()}
-- * can be called.
-- *
-- * @return the maximum number of words that could be read from the sequence.
-- */
-- int totalWordCount();
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,39 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * Writes 'words' of fixed width, in sequence, to a byte array.
-- */
--interface IWordSerializer {
--
-- /**
-- * Writes the word to the backing array.
-- *
-- * @param word the word to write.
-- */
-- void writeWord(final long word);
--
-- /**
-- * Returns the backing array of byte
s that contain the serialized
-- * words.
-- * @return the serialized words as a byte[]
.
-- */
-- byte[] getBytes();
--
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,35 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A long
-based iterator. This is not is-a {@link java.util.Iterator}
-- * to prevent autoboxing between Long
and long
.
-- */
--interface LongIterator {
-- /**
-- * @return true
if and only if there are more elements to
-- * iterate over. false
otherwise.
-- */
-- boolean hasNext();
--
-- /**
-- * @return the next long
in the collection.
-- */
-- long next();
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,172 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A collection of utilities to work with numbers.
-- */
--class NumberUtil {
-- // loge(2) (log-base e of 2)
-- public static final double LOGE_2 = 0.6931471805599453;
--
-- // ************************************************************************
-- /**
-- * Computes the log2
(log-base-two) of the specified value.
-- *
-- * @param value the double
for which the log2
is
-- * desired.
-- * @return the log2
of the specified value
-- */
-- public static double log2(final double value) {
-- // REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases)
-- return Math.log(value) / LOGE_2;
-- }
--
-- // ========================================================================
-- // the hex characters
-- private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7',
-- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
--
-- // ------------------------------------------------------------------------
-- /**
-- * Converts the specified array of byte
s into a string of
-- * hex characters (low byte
first).
-- *
-- * @param bytes the array of byte
s that are to be converted.
-- * This cannot be null
though it may be empty.
-- * @param offset the offset in bytes
at which the bytes will
-- * be taken. This cannot be negative and must be less than
-- * bytes.length - 1
.
-- * @param count the number of bytes to be retrieved from the specified array.
-- * This cannot be negative. If greater than bytes.length - offset
-- * then that value is used.
-- * @return a string of at most count
characters that represents
-- * the specified byte array in hex. This will never be null
-- * though it may be empty if bytes
is empty or count
-- * is zero.
-- * @throws IllegalArgumentException if offset
is greater than
-- * or equal to bytes.length
.
-- * @see #fromHex(String, int, int)
-- */
-- public static String toHex(final byte[] bytes, final int offset, final int count) {
-- if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/;
-- final int byteCount = Math.min( (bytes.length - offset), count);
-- final int upperBound = byteCount + offset;
--
-- final char[] chars = new char[byteCount * 2/*two chars per byte*/];
-- int charIndex = 0;
-- for(int i=offset; i>> 4) & 0x0F];
-- chars[charIndex++] = HEX[value & 0x0F];
-- }
--
-- return new String(chars);
-- }
--
-- /**
-- * Converts the specified array of hex characters into an array of byte
s
-- * (low byte
first).
-- *
-- * @param string the string of hex characters to be converted into byte
s.
-- * This cannot be null
though it may be blank.
-- * @param offset the offset in the string at which the characters will be
-- * taken. This cannot be negative and must be less than string.length() - 1
.
-- * @param count the number of characters to be retrieved from the specified
-- * string. This cannot be negative and must be divisible by two
-- * (since there are two characters per byte
).
-- * @return the array of byte
s that were converted from the
-- * specified string (in the specified range). This will never be
-- * null
though it may be empty if string
-- * is empty or count
is zero.
-- * @throws IllegalArgumentException if offset
is greater than
-- * or equal to string.length()
or if count
-- * is not divisible by two.
-- * @see #toHex(byte[], int, int)
-- */
-- public static byte[] fromHex(final String string, final int offset, final int count) {
-- if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/;
-- if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/;
-- final int charCount = Math.min((string.length() - offset), count);
-- final int upperBound = offset + charCount;
--
-- final byte[] bytes = new byte[charCount >>> 1/*aka /2*/];
-- int byteIndex = 0/*beginning*/;
-- for(int i=offset; ibyte.
-- * This cannot be a character other than [a-fA-F0-9].
-- * @return the value of the specified character. This will be a value 0
-- * through 15
.
-- * @throws IllegalArgumentException if the specified character is not in
-- * [a-fA-F0-9]
-- */
-- private static final int digit(final char character) {
-- switch(character) {
-- case '0':
-- return 0;
-- case '1':
-- return 1;
-- case '2':
-- return 2;
-- case '3':
-- return 3;
-- case '4':
-- return 4;
-- case '5':
-- return 5;
-- case '6':
-- return 6;
-- case '7':
-- return 7;
-- case '8':
-- return 8;
-- case '9':
-- return 9;
-- case 'a':
-- case 'A':
-- return 10;
-- case 'b':
-- case 'B':
-- return 11;
-- case 'c':
-- case 'C':
-- return 12;
-- case 'd':
-- case 'D':
-- return 13;
-- case 'e':
-- case 'E':
-- return 14;
-- case 'f':
-- case 'F':
-- return 15;
--
-- default:
-- throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "').");
-- }
-- }
--}
-\ Manca newline alla fine del file
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,24 +0,0 @@
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A fork of Java-HyperLogLog package tweaked
-- * not to depend on fastutil and with cleanups to make it lean and clean.
-- */
--package org.apache.solr.util.hll;
--
--
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 2015-07-16 13:22:50.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,154 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A concrete {@link ISchemaVersion} representing schema version one.
-- */
--class SchemaVersionOne implements ISchemaVersion {
-- /**
-- * The schema version number for this instance.
-- */
-- public static final int SCHEMA_VERSION = 1;
--
-- // ------------------------------------------------------------------------
-- // Version-specific ordinals (array position) for each of the HLL types
-- private static final HLLType[] TYPE_ORDINALS = new HLLType[] {
-- HLLType.EMPTY,
-- HLLType.EXPLICIT,
-- HLLType.SPARSE,
-- HLLType.FULL
-- };
--
-- // ------------------------------------------------------------------------
-- // number of header bytes for all HLL types
-- private static final int HEADER_BYTE_COUNT = 3;
--
-- // sentinel values from the spec for explicit off and auto
-- private static final int EXPLICIT_OFF = 0;
-- private static final int EXPLICIT_AUTO = 63;
--
-- // ************************************************************************
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType)
-- */
-- @Override
-- public int paddingBytes(final HLLType type) {
-- return HEADER_BYTE_COUNT;
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata)
-- */
-- @Override
-- public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) {
-- final HLLType type = metadata.HLLType();
-- final int typeOrdinal = getOrdinal(type);
--
-- final int explicitCutoffValue;
-- if(metadata.explicitOff()) {
-- explicitCutoffValue = EXPLICIT_OFF;
-- } else if(metadata.explicitAuto()) {
-- explicitCutoffValue = EXPLICIT_AUTO;
-- } else {
-- explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/;
-- }
--
-- bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal);
-- bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2());
-- bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled());
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[])
-- */
-- @Override
-- public IHLLMetadata readMetadata(final byte[] bytes) {
-- final byte versionByte = bytes[0];
-- final byte parametersByte = bytes[1];
-- final byte cutoffByte = bytes[2];
--
-- final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte);
-- final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte);
-- final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF);
-- final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO);
-- final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/);
--
-- return new HLLMetadata(SCHEMA_VERSION,
-- getType(typeOrdinal),
-- SerializationUtil.registerCountLog2(parametersByte),
-- SerializationUtil.registerWidth(parametersByte),
-- log2ExplicitCutoff,
-- explicitOff,
-- explicitAuto,
-- SerializationUtil.sparseEnabled(cutoffByte));
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int)
-- */
-- @Override
-- public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) {
-- return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type));
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[])
-- */
-- @Override
-- public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) {
-- return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes);
-- }
--
-- /* (non-Javadoc)
-- * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber()
-- */
-- @Override
-- public int schemaVersionNumber() {
-- return SCHEMA_VERSION;
-- }
--
-- // ========================================================================
-- // Type/Ordinal lookups
-- /**
-- * Gets the ordinal for the specified {@link HLLType}.
-- *
-- * @param type the type whose ordinal is desired
-- * @return the ordinal for the specified type, to be used in the version byte.
-- * This will always be non-negative.
-- */
-- private static int getOrdinal(final HLLType type) {
-- for(int i=0; inull.
-- */
-- private static HLLType getType(final int ordinal) {
-- if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) {
-- throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed.");
-- }
-- return TYPE_ORDINALS[ordinal];
-- }
--}
-diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java
---- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 2015-07-16 12:32:07.000000000 +0200
-+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 1970-01-01 01:00:00.000000000 +0100
-@@ -1,277 +0,0 @@
--package org.apache.solr.util.hll;
--
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--
--/**
-- * A collection of constants and utilities for serializing and deserializing
-- * HLLs.
-- *
-- * NOTE: 'package' visibility is used for many methods that only need to be
-- * used by the {@link ISchemaVersion} implementations. The structure of
-- * a serialized HLL's metadata should be opaque to the rest of the
-- * library.
-- */
--class SerializationUtil {
-- /**
-- * The number of bits (of the parameters byte) dedicated to encoding the
-- * width of the registers.
-- */
-- /*package*/ static int REGISTER_WIDTH_BITS = 3;
--
-- /**
-- * A mask to cap the maximum value of the register width.
-- */
-- /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
--
-- /**
-- * The number of bits (of the parameters byte) dedicated to encoding
-- * log2(registerCount)
.
-- */
-- /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
--
-- /**
-- * A mask to cap the maximum value of log2(registerCount)
.
-- */
-- /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
--
-- /**
-- * The number of bits (of the cutoff byte) dedicated to encoding the
-- * log-base-2 of the explicit cutoff or sentinel values for
-- * 'explicit-disabled' or 'auto'.
-- */
-- /*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
--
-- /**
-- * A mask to cap the maximum value of the explicit cutoff choice.
-- */
-- /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
--
-- /**
-- * Number of bits in a nibble.
-- */
-- private static int NIBBLE_BITS = 4;
--
-- /**
-- * A mask to cap the maximum value of a nibble.
-- */
-- private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
--
-- // ************************************************************************
-- // Serialization utilities
--
-- /**
-- * Schema version one (v1).
-- */
-- public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
--
-- /**
-- * The default schema version for serializing HLLs.
-- */
-- public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
--
-- /**
-- * List of registered schema versions, indexed by their version numbers. If
-- * an entry is null
, then no such schema version is registered.
-- * Similarly, registering a new schema version simply entails assigning an
-- * {@link ISchemaVersion} instance to the appropriate index of this array.
-- *
-- * By default, only {@link SchemaVersionOne} is registered. Note that version
-- * zero will always be reserved for internal (e.g. proprietary, legacy) schema
-- * specifications/implementations and will never be assigned to in by this
-- * library.
-- */
-- public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
--
-- static {
-- REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
-- }
--
-- /**
-- * @param schemaVersionNumber the version number of the {@link ISchemaVersion}
-- * desired. This must be a registered schema version number.
-- * @return The {@link ISchemaVersion} for the given number. This will never
-- * be null
.
-- */
-- public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
-- if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
-- throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
-- }
-- final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
-- if(schemaVersion == null) {
-- throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
-- }
-- return schemaVersion;
-- }
--
-- /**
-- * Get the appropriate {@link ISchemaVersion schema version} for the specified
-- * serialized HLL.
-- *
-- * @param bytes the serialized HLL whose schema version is desired.
-- * @return the schema version for the specified HLL. This will never
-- * be null
.
-- */
-- public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
-- final byte versionByte = bytes[0];
-- final int schemaVersionNumber = schemaVersion(versionByte);
--
-- return getSchemaVersion(schemaVersionNumber);
-- }
--
-- // ************************************************************************
-- // Package-specific shared helpers
--
-- /**
-- * Generates a byte that encodes the schema version and the type ordinal
-- * of the HLL.
-- *
-- * The top nibble is the schema version and the bottom nibble is the type
-- * ordinal.
-- *
-- * @param schemaVersion the schema version to encode.
-- * @param typeOrdinal the type ordinal of the HLL to encode.
-- * @return the packed version byte
-- */
-- public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
-- return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
-- }
-- /**
-- * Generates a byte that encodes the log-base-2 of the explicit cutoff
-- * or sentinel values for 'explicit-disabled' or 'auto', as well as the
-- * boolean indicating whether to use {@link HLLType#SPARSE}
-- * in the promotion hierarchy.
-- *
-- * The top bit is always padding, the second highest bit indicates the
-- * 'sparse-enabled' boolean, and the lowest six bits encode the explicit
-- * cutoff value.
-- *
-- * @param explicitCutoff the explicit cutoff value to encode.
-- *
-- * -
-- * If 'explicit-disabled' is chosen, this value should be
0
.
-- *
-- * -
-- * If 'auto' is chosen, this value should be
63
.
-- *
-- * -
-- * If a cutoff of 2n is desired, for
0 <= n < 31
,
-- * this value should be n + 1
.
-- *
-- *
-- * @param sparseEnabled whether {@link HLLType#SPARSE}
-- * should be used in the promotion hierarchy to improve HLL
-- * storage.
-- *
-- * @return the packed cutoff byte
-- */
-- public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
-- final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
-- return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
-- }
--
-- /**
-- * Generates a byte that encodes the parameters of a
-- * {@link HLLType#FULL} or {@link HLLType#SPARSE}
-- * HLL.
-- *
-- * The top 3 bits are used to encode registerWidth - 1
-- * (range of registerWidth
is thus 1-9) and the bottom 5
-- * bits are used to encode registerCountLog2
-- * (range of registerCountLog2
is thus 0-31).
-- *
-- * @param registerWidth the register width (must be at least 1 and at
-- * most 9)
-- * @param registerCountLog2 the log-base-2 of the register count (must
-- * be at least 0 and at most 31)
-- * @return the packed parameters byte
-- */
-- public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
-- final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
-- final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
-- return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
-- }
--
-- /**
-- * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
-- * HLL.
-- *
-- * @param cutoffByte the cutoff byte of the serialized HLL
-- * @return the 'sparse-enabled' boolean
-- */
-- public static boolean sparseEnabled(final byte cutoffByte) {
-- return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
-- }
--
-- /**
-- * Extracts the explicit cutoff value from the cutoff byte of a serialized
-- * HLL.
-- *
-- * @param cutoffByte the cutoff byte of the serialized HLL
-- * @return the explicit cutoff value
-- */
-- public static int explicitCutoff(final byte cutoffByte) {
-- return (cutoffByte & EXPLICIT_CUTOFF_MASK);
-- }
--
-- /**
-- * Extracts the schema version from the version byte of a serialized
-- * HLL.
-- *
-- * @param versionByte the version byte of the serialized HLL
-- * @return the schema version of the serialized HLL
-- */
-- public static int schemaVersion(final byte versionByte) {
-- return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
-- }
--
-- /**
-- * Extracts the type ordinal from the version byte of a serialized HLL.
-- *
-- * @param versionByte the version byte of the serialized HLL
-- * @return the type ordinal of the serialized HLL
-- */
-- public static int typeOrdinal(final byte versionByte) {
-- return (versionByte & NIBBLE_MASK);
-- }
--
-- /**
-- * Extracts the register width from the parameters byte of a serialized
-- * {@link HLLType#FULL} HLL.
-- *
-- * @param parametersByte the parameters byte of the serialized HLL
-- * @return the register width of the serialized HLL
-- *
-- * @see #packParametersByte(int, int)
-- */
-- public static int registerWidth(final byte parametersByte) {
-- return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
-- }
--
-- /**
-- * Extracts the log2(registerCount) from the parameters byte of a
-- * serialized {@link HLLType#FULL} HLL.
-- *
-- * @param parametersByte the parameters byte of the serialized HLL
-- * @return log2(registerCount) of the serialized HLL
-- *
-- * @see #packParametersByte(int, int)
-- */
-- public static int registerCountLog2(final byte parametersByte) {
-- return (parametersByte & LOG2_REGISTER_COUNT_MASK);
-- }
--}
diff --git a/solr.spec b/solr.spec
index de8c3e1..39de3ff 100644
--- a/solr.spec
+++ b/solr.spec
@@ -9,7 +9,7 @@
%endif
Name: solr
-Version: 5.3.1
+Version: 5.4.0
Release: 1%{?dist}
Summary: Ultra-fast Lucene-based Search Server
# MIT/X11 (BSD like) solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java
@@ -18,8 +18,7 @@ URL: http://lucene.apache.org/solr/
# Use solr-repack.sh
Source0: %{name}-%{version}-clean.tar.xz
Source2: solr-repack.sh
-Patch0: solr-5.3.0-use-system-hll.patch
-Patch1: solr-5.3.0-jetty9.3.3.patch
+Patch0: solr-5.3.0-jetty9.3.3.patch
BuildRequires: maven-local
BuildRequires: mvn(com.adobe.xmp:xmpcore)
@@ -60,7 +59,7 @@ BuildRequires: mvn(log4j:log4j:1.2.17)
BuildRequires: mvn(net.agkn:hll)
BuildRequires: mvn(net.arnx:jsonic)
BuildRequires: mvn(net.sourceforge.jmatio:jmatio)
-BuildRequires: mvn(org.antlr:antlr-runtime)
+BuildRequires: mvn(org.antlr:antlr4-runtime)
BuildRequires: mvn(org.apache:apache:pom:)
BuildRequires: mvn(org.apache.ant:ant)
BuildRequires: mvn(org.apache.commons:commons-exec)
@@ -279,9 +278,12 @@ find . -name "*.class" -print -delete
find . -name "*.jar" -print -delete
find . -name "*.js" -print -delete
-%patch0 -p1
-rm -rf solr/core/src/java/org/apache/solr/util/hll
+rm -r solr/core/src/java/org/apache/solr/util/hll
%pom_add_dep net.agkn:hll:1.6.0 solr/core
+find ./solr -name "*.java" -exec sed -i "s/org.apache.solr.util.hll/net.agkn.hll/g" {} +
+
+sed -i "s|return hasher.hashString(v).asLong();|return hasher.hashString(v, null).asLong();|" \
+ solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
%if %{?fedora} >= 23
%patch1 -p1
@@ -447,6 +449,9 @@ sed -i "s|conf.addResource(TEST_CONF);||" \
%license LICENSE.txt NOTICE.txt
%changelog
+* Sat Jan 23 2016 gil cattaneo 5.4.0-1
+- update to 5.4.0
+
* Tue Oct 06 2015 gil cattaneo 5.3.1-1
- update to 5.3.1
diff --git a/sources b/sources
index b351065..a3ceefd 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-060bea064bd392557f7bf384f4a44480 solr-5.3.1-clean.tar.xz
+37eadf14e17e2999832fb921783e9c5f solr-5.4.0-clean.tar.xz