>= (BITS_PER_BYTE - lastByteBitsToConsume);
+- value <<= lastByteBitsToConsume;
+- value |= lastByte;
+- return value;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount()
+- */
+- @Override
+- public int totalWordCount() {
+- return wordCount;
+- }
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,174 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A serializer that writes a sequence of fixed bit-width 'words' to a byte array.
+- * Bitwise OR is used to write words into bytes, so a low bit in a word is also
+- * a low bit in a byte. However, a high byte in a word is written at a lower index
+- * in the array than a low byte in a word. The first word is written at the lowest
+- * array index. Each serializer is one time use and returns its backing byte
+- * array.
+- *
+- * This encoding was chosen so that when reading bytes as octets in the typical
+- * first-octet-is-the-high-nibble fashion, an octet-to-binary conversion
+- * would yield a high-to-low, left-to-right view of the "short words".
+- *
+- * Example:
+- *
+- * Say short words are 5 bits wide. Our word sequence is the values
+- * [31, 1, 5]
. In big-endian binary format, the values are
+- * [0b11111, 0b00001, 0b00101]
. We use 15 of 16 bits in two bytes
+- * and pad the last (lowest) bit of the last byte with a zero:
+- *
+- *
+- * [0b11111000, 0b01001010] = [0xF8, 0x4A]
+- *
.
+- */
+-class BigEndianAscendingWordSerializer implements IWordSerializer {
+- // The number of bits per byte.
+- private static final int BITS_PER_BYTE = 8;
+-
+- // ************************************************************************
+- // The length in bits of the words to be written.
+- private final int wordLength;
+- // The number of words to be written.
+- private final int wordCount;
+-
+- // The byte array to which the words are serialized.
+- private final byte[] bytes;
+-
+- // ------------------------------------------------------------------------
+- // Write state
+- // Number of bits that remain writable in the current byte.
+- private int bitsLeftInByte;
+- // Index of byte currently being written to.
+- private int byteIndex;
+- // Number of words written.
+- private int wordsWritten;
+-
+- // ========================================================================
+- /**
+- * @param wordLength the length in bits of the words to be serialized. Must
+- * be greater than or equal to 1 and less than or equal to 64.
+- * @param wordCount the number of words to be serialized. Must be greater than
+- * or equal to zero.
+- * @param bytePadding the number of leading bytes that should pad the
+- * serialized words. Must be greater than or equal to zero.
+- */
+- public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) {
+- if((wordLength < 1) || (wordLength > 64)) {
+- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
+- }
+- if(wordCount < 0) {
+- throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")");
+- }
+- if(bytePadding < 0) {
+- throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")");
+- }
+-
+- this.wordLength = wordLength;
+- this.wordCount = wordCount;
+-
+- final long bitsRequired = (wordLength * wordCount);
+- final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0);
+- final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding;
+- bytes = new byte[bytesRequired];
+-
+- bitsLeftInByte = BITS_PER_BYTE;
+- byteIndex = bytePadding;
+- wordsWritten = 0;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IWordSerializer#writeWord(long)
+- * @throws RuntimeException if the number of words written is greater than the
+- * wordCount
parameter in the constructor.
+- */
+- @Override
+- public void writeWord(final long word) {
+- if(wordsWritten == wordCount) {
+- throw new RuntimeException("Cannot write more words, backing array full!");
+- }
+-
+- int bitsLeftInWord = wordLength;
+-
+- while(bitsLeftInWord > 0) {
+- // Move to the next byte if the current one is fully packed.
+- if(bitsLeftInByte == 0) {
+- byteIndex++;
+- bitsLeftInByte = BITS_PER_BYTE;
+- }
+-
+- final long consumedMask;
+- if(bitsLeftInWord == 64) {
+- consumedMask = ~0L;
+- } else {
+- consumedMask = ((1L << bitsLeftInWord) - 1L);
+- }
+-
+- // Fix how many bits will be written in this cycle. Choose the
+- // smaller of the remaining bits in the word or byte.
+- final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord);
+- final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite);
+-
+- // In general, we write the highest bits of the word first, so we
+- // strip the highest bits that were consumed in previous cycles.
+- final long remainingBitsOfWordToWrite = (word & consumedMask);
+-
+- final long bitsThatTheByteCanAccept;
+- // If there is more left in the word than can be written to this
+- // byte, shift off the bits that can't be written off the bottom.
+- if(bitsLeftInWord > numberOfBitsToWrite) {
+- bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte));
+- } else {
+- // If the byte can accept all remaining bits, there is no need
+- // to shift off the bits that won't be written in this cycle.
+- bitsThatTheByteCanAccept = remainingBitsOfWordToWrite;
+- }
+-
+- // Align the word bits to write up against the byte bits that have
+- // already been written. This shift may do nothing if the remainder
+- // of the byte is being consumed in this cycle.
+- final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite);
+-
+- // Update the byte with the alignedBits.
+- bytes[byteIndex] |= (byte)alignedBits;
+-
+- // Update state with bit count written.
+- bitsLeftInWord -= numberOfBitsToWrite;
+- bitsLeftInByte = bitsInByteRemainingAfterWrite;
+- }
+-
+- wordsWritten ++;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IWordSerializer#getBytes()
+- * @throws RuntimeException if the number of words written is fewer than the
+- * wordCount
parameter in the constructor.
+- */
+- @Override
+- public byte[] getBytes() {
+- if(wordsWritten < wordCount) {
+- throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")");
+- }
+-
+- return bytes;
+- }
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,71 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A collection of bit utilities.
+- */
+-class BitUtil {
+- /**
+- * The set of least-significant bits for a given byte
. -1
+- * is used if no bits are set (so as to not be confused with "index of zero"
+- * meaning that the least significant bit is the 0th (1st) bit).
+- *
+- * @see #leastSignificantBit(long)
+- */
+- private static final int[] LEAST_SIGNIFICANT_BIT = {
+- -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+- };
+-
+- /**
+- * Computes the least-significant bit of the specified long
+- * that is set to 1
. Zero-indexed.
+- *
+- * @param value the long
whose least-significant bit is desired.
+- * @return the least-significant bit of the specified long
.
+- * -1
is returned if there are no bits set.
+- */
+- // REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
+- // REF: http://www-graphics.stanford.edu/~seander/bithacks.html
+- public static int leastSignificantBit(final long value) {
+- if(value == 0L) return -1/*by contract*/;
+- if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0;
+- if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8;
+- if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16;
+- if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24;
+- if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32;
+- if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40;
+- if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48;
+- return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56;
+- }
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,259 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A vector (array) of bits that is accessed in units ("registers") of width
+- * bits which are stored as 64bit "words" (long
s). In this context
+- * a register is at most 64bits.
+- */
+-class BitVector implements Cloneable {
+- // NOTE: in this context, a word is 64bits
+-
+- // rather than doing division to determine how a bit index fits into 64bit
+- // words (i.e. longs), bit shifting is used
+- private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/;
+- private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD;
+- private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1;
+-
+- // ditto from above but for bytes (for output)
+- private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/;
+- public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE;
+-
+- // ========================================================================
+- public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/;
+-
+- // ************************************************************************
+- // 64bit words
+- private final long[] words;
+- public final long[] words() { return words; }
+- public final int wordCount() { return words.length; }
+- public final int byteCount() { return wordCount() * BYTES_PER_WORD; }
+-
+- // the width of a register in bits (this cannot be more than 64 (the word size))
+- private final int registerWidth;
+- public final int registerWidth() { return registerWidth; }
+-
+- private final long count;
+-
+- // ------------------------------------------------------------------------
+- private final long registerMask;
+-
+- // ========================================================================
+- /**
+- * @param width the width of each register. This cannot be negative or
+- * zero or greater than 63 (the signed word size).
+- * @param count the number of registers. This cannot be negative or zero
+- */
+- public BitVector(final int width, final long count) {
+- // ceil((width * count)/BITS_PER_WORD)
+- this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)];
+- this.registerWidth = width;
+- this.count = count;
+-
+- this.registerMask = (1L << width) - 1;
+- }
+-
+- // ========================================================================
+- /**
+- * @param registerIndex the index of the register whose value is to be
+- * retrieved. This cannot be negative.
+- * @return the value at the specified register index
+- * @see #setRegister(long, long)
+- * @see #setMaxRegister(long, long)
+- */
+- // NOTE: if this changes then setMaxRegister() must change
+- public long getRegister(final long registerIndex) {
+- final long bitIndex = registerIndex * registerWidth;
+- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
+- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
+- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
+-
+- if(firstWordIndex == secondWordIndex)
+- return ((words[firstWordIndex] >>> bitRemainder) & registerMask);
+- /* else -- register spans words */
+- return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
+- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
+- }
+-
+- /**
+- * @param registerIndex the index of the register whose value is to be set.
+- * This cannot be negative
+- * @param value the value to set in the register
+- * @see #getRegister(long)
+- * @see #setMaxRegister(long, long)
+- */
+- // NOTE: if this changes then setMaxRegister() must change
+- public void setRegister(final long registerIndex, final long value) {
+- final long bitIndex = registerIndex * registerWidth;
+- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
+- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
+- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
+-
+- final long words[] = this.words/*for convenience/performance*/;
+- if(firstWordIndex == secondWordIndex) {
+- // clear then set
+- words[firstWordIndex] &= ~(registerMask << bitRemainder);
+- words[firstWordIndex] |= (value << bitRemainder);
+- } else {/*register spans words*/
+- // clear then set each partial word
+- words[firstWordIndex] &= (1L << bitRemainder) - 1;
+- words[firstWordIndex] |= (value << bitRemainder);
+-
+- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
+- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
+- }
+- }
+-
+- // ------------------------------------------------------------------------
+- /**
+- * @return a LongIterator
for iterating starting at the register
+- * with index zero. This will never be null
.
+- */
+- public LongIterator registerIterator() {
+- return new LongIterator() {
+- final int registerWidth = BitVector.this.registerWidth;
+- final long[] words = BitVector.this.words;
+- final long registerMask = BitVector.this.registerMask;
+-
+- // register setup
+- long registerIndex = 0;
+- int wordIndex = 0;
+- int remainingWordBits = BITS_PER_WORD;
+- long word = words[wordIndex];
+-
+- @Override public long next() {
+- long register;
+- if(remainingWordBits >= registerWidth) {
+- register = word & registerMask;
+-
+- // shift to the next register
+- word >>>= registerWidth;
+- remainingWordBits -= registerWidth;
+- } else { /*insufficient bits remaining in current word*/
+- wordIndex++/*move to the next word*/;
+-
+- register = (word | (words[wordIndex] << remainingWordBits)) & registerMask;
+-
+- // shift to the next partial register (word)
+- word = words[wordIndex] >>> (registerWidth - remainingWordBits);
+- remainingWordBits += BITS_PER_WORD - registerWidth;
+- }
+- registerIndex++;
+- return register;
+- }
+-
+- @Override public boolean hasNext() {
+- return registerIndex < count;
+- }
+- };
+- }
+-
+- // ------------------------------------------------------------------------
+- // composite accessors
+- /**
+- * Sets the value of the specified index register if and only if the specified
+- * value is greater than the current value in the register. This is equivalent
+- * to but much more performant than:
+- *
+- * vector.setRegister(index, Math.max(vector.getRegister(index), value));
+- *
+- * @param registerIndex the index of the register whose value is to be set.
+- * This cannot be negative
+- * @param value the value to set in the register if and only if this value
+- * is greater than the current value in the register
+- * @return true
if and only if the specified value is greater
+- * than or equal to the current register value. false
+- * otherwise.
+- * @see #getRegister(long)
+- * @see #setRegister(long, long)
+- * @see java.lang.Math#max(long, long)
+- */
+- // NOTE: if this changes then setRegister() must change
+- public boolean setMaxRegister(final long registerIndex, final long value) {
+- final long bitIndex = registerIndex * registerWidth;
+- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
+- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
+- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
+-
+- // NOTE: matches getRegister()
+- final long registerValue;
+- final long words[] = this.words/*for convenience/performance*/;
+- if(firstWordIndex == secondWordIndex)
+- registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask);
+- else /*register spans words*/
+- registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
+- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
+-
+- // determine which is the larger and update as necessary
+- if(value > registerValue) {
+- // NOTE: matches setRegister()
+- if(firstWordIndex == secondWordIndex) {
+- // clear then set
+- words[firstWordIndex] &= ~(registerMask << bitRemainder);
+- words[firstWordIndex] |= (value << bitRemainder);
+- } else {/*register spans words*/
+- // clear then set each partial word
+- words[firstWordIndex] &= (1L << bitRemainder) - 1;
+- words[firstWordIndex] |= (value << bitRemainder);
+-
+- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
+- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
+- }
+- } /* else -- the register value is greater (or equal) so nothing needs to be done */
+-
+- return (value >= registerValue);
+- }
+-
+- // ========================================================================
+- /**
+- * Fills this bit vector with the specified bit value. This can be used to
+- * clear the vector by specifying 0
.
+- *
+- * @param value the value to set all bits to (only the lowest bit is used)
+- */
+- public void fill(final long value) {
+- for(long i=0; inull.
+- */
+- public void getRegisterContents(final IWordSerializer serializer) {
+- for(final LongIterator iter = registerIterator(); iter.hasNext();) {
+- serializer.writeWord(iter.next());
+- }
+- }
+-
+- /**
+- * Creates a deep copy of this vector.
+- *
+- * @see java.lang.Object#clone()
+- */
+- @Override
+- public BitVector clone() {
+- final BitVector copy = new BitVector(registerWidth, count);
+- System.arraycopy(words, 0, copy.words, 0, words.length);
+- return copy;
+- }
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java 2015-07-16 13:14:59.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,1071 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-import java.util.Arrays;
+-
+-import com.carrotsearch.hppc.IntByteOpenHashMap;
+-import com.carrotsearch.hppc.LongOpenHashSet;
+-import com.carrotsearch.hppc.cursors.IntByteCursor;
+-import com.carrotsearch.hppc.cursors.LongCursor;
+-
+-/**
+- * A probabilistic set of hashed long
elements. Useful for computing
+- * the approximate cardinality of a stream of data in very small storage.
+- *
+- * A modified version of the
+- * 'HyperLogLog' data structure and algorithm is used, which combines both
+- * probabilistic and non-probabilistic techniques to improve the accuracy and
+- * storage requirements of the original algorithm.
+- *
+- * More specifically, initializing and storing a new {@link HLL} will
+- * allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}).
+- * After adding the first few values, a sorted list of unique integers is
+- * stored in a {@link HLLType#EXPLICIT} hash set. When configured, accuracy can
+- * be sacrificed for memory footprint: the values in the sorted list are
+- * "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure.
+- * Finally, when enough registers are set, the map-based HLL will be converted
+- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure.
+- *
+- * This data structure is interoperable with the implementations found at:
+- *
+- * when properly serialized.
+- */
+-public class HLL implements Cloneable {
+- // minimum and maximum values for the log-base-2 of the number of registers
+- // in the HLL
+- public static final int MINIMUM_LOG2M_PARAM = 4;
+- public static final int MAXIMUM_LOG2M_PARAM = 30;
+-
+- // minimum and maximum values for the register width of the HLL
+- public static final int MINIMUM_REGWIDTH_PARAM = 1;
+- public static final int MAXIMUM_REGWIDTH_PARAM = 8;
+-
+- // minimum and maximum values for the 'expthresh' parameter of the
+- // constructor that is meant to match the PostgreSQL implementation's
+- // constructor and parameter names
+- public static final int MINIMUM_EXPTHRESH_PARAM = -1;
+- public static final int MAXIMUM_EXPTHRESH_PARAM = 18;
+- public static final int MAXIMUM_EXPLICIT_THRESHOLD = (1 << (MAXIMUM_EXPTHRESH_PARAM - 1)/*per storage spec*/);
+-
+- // ************************************************************************
+- // Storage
+- // storage used when #type is EXPLICIT, null otherwise
+- LongOpenHashSet explicitStorage;
+- // storage used when #type is SPARSE, null otherwise
+- IntByteOpenHashMap sparseProbabilisticStorage;
+- // storage used when #type is FULL, null otherwise
+- BitVector probabilisticStorage;
+-
+- // current type of this HLL instance, if this changes then so should the
+- // storage used (see above)
+- private HLLType type;
+-
+- // ------------------------------------------------------------------------
+- // Characteristic parameters
+- // NOTE: These members are named to match the PostgreSQL implementation's
+- // parameters.
+- // log2(the number of probabilistic HLL registers)
+- private final int log2m;
+- // the size (width) each register in bits
+- private final int regwidth;
+-
+- // ------------------------------------------------------------------------
+- // Computed constants
+- // ........................................................................
+- // EXPLICIT-specific constants
+- // flag indicating if the EXPLICIT representation should NOT be used
+- private final boolean explicitOff;
+- // flag indicating that the promotion threshold from EXPLICIT should be
+- // computed automatically
+- // NOTE: this only has meaning when 'explicitOff' is false
+- private final boolean explicitAuto;
+- // threshold (in element count) at which a EXPLICIT HLL is converted to a
+- // SPARSE or FULL HLL, always greater than or equal to zero and always a
+- // power of two OR simply zero
+- // NOTE: this only has meaning when 'explicitOff' is false
+- private final int explicitThreshold;
+-
+- // ........................................................................
+- // SPARSE-specific constants
+- // the computed width of the short words
+- private final int shortWordLength;
+- // flag indicating if the SPARSE representation should not be used
+- private final boolean sparseOff;
+- // threshold (in register count) at which a SPARSE HLL is converted to a
+- // FULL HLL, always greater than zero
+- private final int sparseThreshold;
+-
+- // ........................................................................
+- // Probabilistic algorithm constants
+- // the number of registers, will always be a power of 2
+- private final int m;
+- // a mask of the log2m bits set to one and the rest to zero
+- private final int mBitsMask;
+- // a mask as wide as a register (see #fromBytes())
+- private final int valueMask;
+- // mask used to ensure that p(w) does not overflow register (see #Constructor() and #addRaw())
+- private final long pwMaxMask;
+- // alpha * m^2 (the constant in the "'raw' HyperLogLog estimator")
+- private final double alphaMSquared;
+- // the cutoff value of the estimator for using the "small" range cardinality
+- // correction formula
+- private final double smallEstimatorCutoff;
+- // the cutoff value of the estimator for using the "large" range cardinality
+- // correction formula
+- private final double largeEstimatorCutoff;
+-
+- // ========================================================================
+- /**
+- * NOTE: Arguments here are named and structured identically to those in the
+- * PostgreSQL implementation, which can be found
+- * here.
+- *
+- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
+- * algorithm. Must be at least 4 and at most 30.
+- * @param regwidth number of bits used per register in the HyperLogLog
+- * algorithm. Must be at least 1 and at most 8.
+- * @param expthresh tunes when the {@link HLLType#EXPLICIT} to
+- * {@link HLLType#SPARSE} promotion occurs,
+- * based on the set's cardinality. Must be at least -1 and at most 18.
+- * @param sparseon Flag indicating if the {@link HLLType#SPARSE}
+- * representation should be used.
+- * @param type the type in the promotion hierarchy which this instance should
+- * start at. This cannot be null
.
+- */
+- public HLL(final int log2m, final int regwidth, final int expthresh, final boolean sparseon, final HLLType type) {
+- this.log2m = log2m;
+- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
+- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
+- }
+-
+- this.regwidth = regwidth;
+- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
+- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
+- }
+-
+- this.m = (1 << log2m);
+- this.mBitsMask = m - 1;
+- this.valueMask = (1 << regwidth) - 1;
+- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
+- this.alphaMSquared = HLLUtil.alphaMSquared(m);
+- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
+- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
+-
+- if(expthresh == -1) {
+- this.explicitAuto = true;
+- this.explicitOff = false;
+-
+- // NOTE: This math matches the size calculation in the PostgreSQL impl.
+- final long fullRepresentationSize = (this.regwidth * (long)this.m + 7/*round up to next whole byte*/)/Byte.SIZE;
+- final int numLongs = (int)(fullRepresentationSize / 8/*integer division to round down*/);
+-
+- if(numLongs > MAXIMUM_EXPLICIT_THRESHOLD) {
+- this.explicitThreshold = MAXIMUM_EXPLICIT_THRESHOLD;
+- } else {
+- this.explicitThreshold = numLongs;
+- }
+- } else if(expthresh == 0) {
+- this.explicitAuto = false;
+- this.explicitOff = true;
+- this.explicitThreshold = 0;
+- } else if((expthresh > 0) && (expthresh <= MAXIMUM_EXPTHRESH_PARAM)){
+- this.explicitAuto = false;
+- this.explicitOff = false;
+- this.explicitThreshold = (1 << (expthresh - 1));
+- } else {
+- throw new IllegalArgumentException("'expthresh' must be at least " + MINIMUM_EXPTHRESH_PARAM + " and at most " + MAXIMUM_EXPTHRESH_PARAM + " (was: " + expthresh + ")");
+- }
+-
+- this.shortWordLength = (regwidth + log2m);
+- this.sparseOff = !sparseon;
+- if(this.sparseOff) {
+- this.sparseThreshold = 0;
+- } else {
+- // TODO improve this cutoff to include the cost overhead of Java
+- // members/objects
+- final int largestPow2LessThanCutoff =
+- (int)NumberUtil.log2((this.m * this.regwidth) / this.shortWordLength);
+- this.sparseThreshold = (1 << largestPow2LessThanCutoff);
+- }
+-
+- initializeStorage(type);
+- }
+-
+- /**
+- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}.
+- *
+- * This is equivalent to calling HLL(log2m, regwidth, -1, true, HLLType.EMPTY)
.
+- *
+- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
+- * algorithm. Must be at least 4 and at most 30.
+- * @param regwidth number of bits used per register in the HyperLogLog
+- * algorithm. Must be at least 1 and at most 8.
+- *
+- * @see #HLL(int, int, int, boolean, HLLType)
+- */
+- public HLL(final int log2m, final int regwidth) {
+- this(log2m, regwidth, -1, true, HLLType.EMPTY);
+- }
+-
+- // -------------------------------------------------------------------------
+- /**
+- * Convenience constructor for testing. Assumes that both {@link HLLType#EXPLICIT}
+- * and {@link HLLType#SPARSE} representations should be enabled.
+- *
+- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
+- * algorithm. Must be at least 4 and at most 30.
+- * @param regwidth number of bits used per register in the HyperLogLog
+- * algorithm. Must be at least 1 and at most 8.
+- * @param explicitThreshold cardinality threshold at which the {@link HLLType#EXPLICIT}
+- * representation should be promoted to {@link HLLType#SPARSE}.
+- * This must be greater than zero and less than or equal to {@value #MAXIMUM_EXPLICIT_THRESHOLD}.
+- * @param sparseThreshold register count threshold at which the {@link HLLType#SPARSE}
+- * representation should be promoted to {@link HLLType#FULL}.
+- * This must be greater than zero.
+- * @param type the type in the promotion hierarchy which this instance should
+- * start at. This cannot be null
.
+- */
+- /*package, for testing*/ HLL(final int log2m, final int regwidth, final int explicitThreshold, final int sparseThreshold, final HLLType type) {
+- this.log2m = log2m;
+- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
+- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
+- }
+-
+- this.regwidth = regwidth;
+- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
+- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
+- }
+-
+- this.m = (1 << log2m);
+- this.mBitsMask = m - 1;
+- this.valueMask = (1 << regwidth) - 1;
+- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
+- this.alphaMSquared = HLLUtil.alphaMSquared(m);
+- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
+- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
+-
+- this.explicitAuto = false;
+- this.explicitOff = false;
+- this.explicitThreshold = explicitThreshold;
+- if((explicitThreshold < 1) || (explicitThreshold > MAXIMUM_EXPLICIT_THRESHOLD)) {
+- throw new IllegalArgumentException("'explicitThreshold' must be at least 1 and at most " + MAXIMUM_EXPLICIT_THRESHOLD + " (was: " + explicitThreshold + ")");
+- }
+-
+- this.shortWordLength = (regwidth + log2m);
+- this.sparseOff = false;
+- this.sparseThreshold = sparseThreshold;
+-
+- initializeStorage(type);
+- }
+-
+- /**
+- * @return the type in the promotion hierarchy of this instance. This will
+- * never be null
.
+- */
+- public HLLType getType() { return type; }
+-
+- // ========================================================================
+- // Add
+- /**
+- * Adds rawValue
directly to the HLL.
+- *
+- * @param rawValue the value to be added. It is very important that this
+- * value already be hashed with a strong (but not
+- * necessarily cryptographic) hash function. For instance, the
+- * Murmur3 implementation in
+- *
+- * Google's Guava library is an excellent hash function for this
+- * purpose and, for seeds greater than zero, matches the output
+- * of the hash provided in the PostgreSQL implementation.
+- */
+- public void addRaw(final long rawValue) {
+- switch(type) {
+- case EMPTY: {
+- // NOTE: EMPTY type is always promoted on #addRaw()
+- if(explicitThreshold > 0) {
+- initializeStorage(HLLType.EXPLICIT);
+- explicitStorage.add(rawValue);
+- } else if(!sparseOff) {
+- initializeStorage(HLLType.SPARSE);
+- addRawSparseProbabilistic(rawValue);
+- } else {
+- initializeStorage(HLLType.FULL);
+- addRawProbabilistic(rawValue);
+- }
+- return;
+- }
+- case EXPLICIT: {
+- explicitStorage.add(rawValue);
+-
+- // promotion, if necessary
+- if(explicitStorage.size() > explicitThreshold) {
+- if(!sparseOff) {
+- initializeStorage(HLLType.SPARSE);
+- for (LongCursor c : explicitStorage) {
+- addRawSparseProbabilistic(c.value);
+- }
+- } else {
+- initializeStorage(HLLType.FULL);
+- for (LongCursor c : explicitStorage) {
+- addRawProbabilistic(c.value);
+- }
+- }
+- explicitStorage = null;
+- }
+- return;
+- }
+- case SPARSE: {
+- addRawSparseProbabilistic(rawValue);
+-
+- // promotion, if necessary
+- if(sparseProbabilisticStorage.size() > sparseThreshold) {
+- initializeStorage(HLLType.FULL);
+- for(IntByteCursor c : sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- sparseProbabilisticStorage = null;
+- }
+- return;
+- }
+- case FULL:
+- addRawProbabilistic(rawValue);
+- return;
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+- }
+-
+- // ------------------------------------------------------------------------
+- // #addRaw(..) helpers
+- /**
+- * Adds the raw value to the {@link #sparseProbabilisticStorage}.
+- * {@link #type} must be {@link HLLType#SPARSE}.
+- *
+- * @param rawValue the raw value to add to the sparse storage.
+- */
+- private void addRawSparseProbabilistic(final long rawValue) {
+- // p(w): position of the least significant set bit (one-indexed)
+- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
+- //
+- // By construction of pwMaxMask (see #Constructor()),
+- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
+- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
+- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
+- final long substreamValue = (rawValue >>> log2m);
+- final byte p_w;
+-
+- if(substreamValue == 0L) {
+- // The paper does not cover p(0x0), so the special value 0 is used.
+- // 0 is the original initialization value of the registers, so by
+- // doing this the multiset simply ignores it. This is acceptable
+- // because the probability is 1/(2^(2^registerSizeInBits)).
+- p_w = 0;
+- } else {
+- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
+- }
+-
+- // Short-circuit if the register is being set to zero, since algorithmically
+- // this corresponds to an "unset" register, and "unset" registers aren't
+- // stored to save memory. (The very reason this sparse implementation
+- // exists.) If a register is set to zero it will break the #algorithmCardinality
+- // code.
+- if(p_w == 0) {
+- return;
+- }
+-
+- // NOTE: no +1 as in paper since 0-based indexing
+- final int j = (int)(rawValue & mBitsMask);
+-
+- final byte currentValue;
+- if (sparseProbabilisticStorage.containsKey(j)) {
+- currentValue = sparseProbabilisticStorage.lget();
+- } else {
+- currentValue = 0;
+- }
+-
+- if(p_w > currentValue) {
+- sparseProbabilisticStorage.put(j, p_w);
+- }
+- }
+-
+- /**
+- * Adds the raw value to the {@link #probabilisticStorage}.
+- * {@link #type} must be {@link HLLType#FULL}.
+- *
+- * @param rawValue the raw value to add to the full probabilistic storage.
+- */
+- private void addRawProbabilistic(final long rawValue) {
+- // p(w): position of the least significant set bit (one-indexed)
+- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
+- //
+- // By construction of pwMaxMask (see #Constructor()),
+- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
+- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
+- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
+- final long substreamValue = (rawValue >>> log2m);
+- final byte p_w;
+-
+- if (substreamValue == 0L) {
+- // The paper does not cover p(0x0), so the special value 0 is used.
+- // 0 is the original initialization value of the registers, so by
+- // doing this the multiset simply ignores it. This is acceptable
+- // because the probability is 1/(2^(2^registerSizeInBits)).
+- p_w = 0;
+- } else {
+- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
+- }
+-
+- // Short-circuit if the register is being set to zero, since algorithmically
+- // this corresponds to an "unset" register, and "unset" registers aren't
+- // stored to save memory. (The very reason this sparse implementation
+- // exists.) If a register is set to zero it will break the #algorithmCardinality
+- // code.
+- if(p_w == 0) {
+- return;
+- }
+-
+- // NOTE: no +1 as in paper since 0-based indexing
+- final int j = (int)(rawValue & mBitsMask);
+-
+- probabilisticStorage.setMaxRegister(j, p_w);
+- }
+-
+- // ------------------------------------------------------------------------
+- // Storage helper
+- /**
+- * Initializes storage for the specified {@link HLLType} and changes the
+- * instance's {@link #type}.
+- *
+- * @param type the {@link HLLType} to initialize storage for. This cannot be
+- * null
and must be an instantiable type.
+- */
+- private void initializeStorage(final HLLType type) {
+- this.type = type;
+- switch(type) {
+- case EMPTY:
+- // nothing to be done
+- break;
+- case EXPLICIT:
+- this.explicitStorage = new LongOpenHashSet();
+- break;
+- case SPARSE:
+- this.sparseProbabilisticStorage = new IntByteOpenHashMap();
+- break;
+- case FULL:
+- this.probabilisticStorage = new BitVector(regwidth, m);
+- break;
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+- }
+-
+- // ========================================================================
+- // Cardinality
+- /**
+- * Computes the cardinality of the HLL.
+- *
+- * @return the cardinality of HLL. This will never be negative.
+- */
+- public long cardinality() {
+- switch(type) {
+- case EMPTY:
+- return 0/*by definition*/;
+- case EXPLICIT:
+- return explicitStorage.size();
+- case SPARSE:
+- return (long)Math.ceil(sparseProbabilisticAlgorithmCardinality());
+- case FULL:
+- return (long)Math.ceil(fullProbabilisticAlgorithmCardinality());
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+- }
+-
+- // ------------------------------------------------------------------------
+- // Cardinality helpers
+- /**
+- * Computes the exact cardinality value returned by the HLL algorithm when
+- * represented as a {@link HLLType#SPARSE} HLL. Kept
+- * separate from {@link #cardinality()} for testing purposes. {@link #type}
+- * must be {@link HLLType#SPARSE}.
+- *
+- * @return the exact, unrounded cardinality given by the HLL algorithm
+- */
+- /*package, for testing*/ double sparseProbabilisticAlgorithmCardinality() {
+- final int m = this.m/*for performance*/;
+-
+- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
+- // 'j'th register value
+- double sum = 0;
+- int numberOfZeroes = 0/*"V" in the paper*/;
+- for(int j=0; jclear does NOT handle
+- * transitions between {@link HLLType}s - a probabilistic type will remain
+- * probabilistic after being cleared.
+- */
+- public void clear() {
+- switch(type) {
+- case EMPTY:
+- return /*do nothing*/;
+- case EXPLICIT:
+- explicitStorage.clear();
+- return;
+- case SPARSE:
+- sparseProbabilisticStorage.clear();
+- return;
+- case FULL:
+- probabilisticStorage.fill(0);
+- return;
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+- }
+-
+- // ========================================================================
+- // Union
+- /**
+- * Computes the union of HLLs and stores the result in this instance.
+- *
+- * @param other the other {@link HLL} instance to union into this one. This
+- * cannot be null
.
+- */
+- public void union(final HLL other) {
+- // TODO: verify HLLs are compatible
+- final HLLType otherType = other.getType();
+-
+- if(type.equals(otherType)) {
+- homogeneousUnion(other);
+- return;
+- } else {
+- heterogenousUnion(other);
+- return;
+- }
+- }
+-
+- // ------------------------------------------------------------------------
+- // Union helpers
+- /**
+- * Computes the union of two HLLs, of different types, and stores the
+- * result in this instance.
+- *
+- * @param other the other {@link HLL} instance to union into this one. This
+- * cannot be null
.
+- */
+- /*package, for testing*/ void heterogenousUnion(final HLL other) {
+- /*
+- * The logic here is divided into two sections: unions with an EMPTY
+- * HLL, and unions between EXPLICIT/SPARSE/FULL
+- * HLL.
+- *
+- * Between those two sections, all possible heterogeneous unions are
+- * covered. Should another type be added to HLLType whose unions
+- * are not easily reduced (say, as EMPTY's are below) this may be more
+- * easily implemented as Strategies. However, that is unnecessary as it
+- * stands.
+- */
+-
+- // ....................................................................
+- // Union with an EMPTY
+- if(HLLType.EMPTY.equals(type)) {
+- // NOTE: The union of empty with non-empty HLL is just a
+- // clone of the non-empty.
+-
+- switch(other.getType()) {
+- case EXPLICIT: {
+- // src: EXPLICIT
+- // dest: EMPTY
+-
+- if(other.explicitStorage.size() <= explicitThreshold) {
+- type = HLLType.EXPLICIT;
+- explicitStorage = other.explicitStorage.clone();
+- } else {
+- if(!sparseOff) {
+- initializeStorage(HLLType.SPARSE);
+- } else {
+- initializeStorage(HLLType.FULL);
+- }
+- for(LongCursor c : other.explicitStorage) {
+- addRaw(c.value);
+- }
+- }
+- return;
+- }
+- case SPARSE: {
+- // src: SPARSE
+- // dest: EMPTY
+-
+- if(!sparseOff) {
+- type = HLLType.SPARSE;
+- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
+- } else {
+- initializeStorage(HLLType.FULL);
+- for(IntByteCursor c : other.sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- }
+- return;
+- }
+- default/*case FULL*/: {
+- // src: FULL
+- // dest: EMPTY
+-
+- type = HLLType.FULL;
+- probabilisticStorage = other.probabilisticStorage.clone();
+- return;
+- }
+- }
+- } else if (HLLType.EMPTY.equals(other.getType())) {
+- // source is empty, so just return destination since it is unchanged
+- return;
+- } /* else -- both of the sets are not empty */
+-
+- // ....................................................................
+- // NOTE: Since EMPTY is handled above, the HLLs are non-EMPTY below
+- switch(type) {
+- case EXPLICIT: {
+- // src: FULL/SPARSE
+- // dest: EXPLICIT
+- // "Storing into destination" cannot be done (since destination
+- // is by definition of smaller capacity than source), so a clone
+- // of source is made and values from destination are inserted
+- // into that.
+-
+- // Determine source and destination storage.
+- // NOTE: destination storage may change through promotion if
+- // source is SPARSE.
+- if(HLLType.SPARSE.equals(other.getType())) {
+- if(!sparseOff) {
+- type = HLLType.SPARSE;
+- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
+- } else {
+- initializeStorage(HLLType.FULL);
+- for(IntByteCursor c : other.sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- }
+- } else /*source is HLLType.FULL*/ {
+- type = HLLType.FULL;
+- probabilisticStorage = other.probabilisticStorage.clone();
+- }
+- for(LongCursor c : explicitStorage) {
+- addRaw(c.value);
+- }
+- explicitStorage = null;
+- return;
+- }
+- case SPARSE: {
+- if(HLLType.EXPLICIT.equals(other.getType())) {
+- // src: EXPLICIT
+- // dest: SPARSE
+- // Add the raw values from the source to the destination.
+-
+- for(LongCursor c : other.explicitStorage) {
+- addRaw(c.value);
+- }
+- // NOTE: addRaw will handle promotion cleanup
+- } else /*source is HLLType.FULL*/ {
+- // src: FULL
+- // dest: SPARSE
+- // "Storing into destination" cannot be done (since destination
+- // is by definition of smaller capacity than source), so a
+- // clone of source is made and registers from the destination
+- // are merged into the clone.
+-
+- type = HLLType.FULL;
+- probabilisticStorage = other.probabilisticStorage.clone();
+- for(IntByteCursor c : sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- sparseProbabilisticStorage = null;
+- }
+- return;
+- }
+- default/*destination is HLLType.FULL*/: {
+- if(HLLType.EXPLICIT.equals(other.getType())) {
+- // src: EXPLICIT
+- // dest: FULL
+- // Add the raw values from the source to the destination.
+- // Promotion is not possible, so don't bother checking.
+-
+- for(LongCursor c : other.explicitStorage) {
+- addRaw(c.value);
+- }
+- } else /*source is HLLType.SPARSE*/ {
+- // src: SPARSE
+- // dest: FULL
+- // Merge the registers from the source into the destination.
+- // Promotion is not possible, so don't bother checking.
+-
+- for(IntByteCursor c : other.sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- }
+- }
+- }
+- }
+-
+- /**
+- * Computes the union of two HLLs of the same type, and stores the
+- * result in this instance.
+- *
+- * @param other the other {@link HLL} instance to union into this one. This
+- * cannot be null
.
+- */
+- private void homogeneousUnion(final HLL other) {
+- switch(type) {
+- case EMPTY:
+- // union of empty and empty is empty
+- return;
+- case EXPLICIT:
+- for(LongCursor c : other.explicitStorage) {
+- addRaw(c.value);
+- }
+- // NOTE: #addRaw() will handle promotion, if necessary
+- return;
+- case SPARSE:
+- for(IntByteCursor c : other.sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- final byte currentRegisterValue = sparseProbabilisticStorage.get(registerIndex);
+- if(registerValue > currentRegisterValue) {
+- sparseProbabilisticStorage.put(registerIndex, registerValue);
+- }
+- }
+-
+- // promotion, if necessary
+- if(sparseProbabilisticStorage.size() > sparseThreshold) {
+- initializeStorage(HLLType.FULL);
+- for(IntByteCursor c : sparseProbabilisticStorage) {
+- final int registerIndex = c.key;
+- final byte registerValue = c.value;
+- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
+- }
+- sparseProbabilisticStorage = null;
+- }
+- return;
+- case FULL:
+- for(int i=0; inull or empty.
+- */
+- public byte[] toBytes() {
+- return toBytes(SerializationUtil.DEFAULT_SCHEMA_VERSION);
+- }
+-
+- /**
+- * Serializes the HLL to an array of bytes in correspondence with the format
+- * of the specified schema version.
+- *
+- * @param schemaVersion the schema version dictating the serialization format
+- * @return the array of bytes representing the HLL. This will never be
+- * null
or empty.
+- */
+- public byte[] toBytes(final ISchemaVersion schemaVersion) {
+- final byte[] bytes;
+- switch(type) {
+- case EMPTY:
+- bytes = new byte[schemaVersion.paddingBytes(type)];
+- break;
+- case EXPLICIT: {
+- final IWordSerializer serializer =
+- schemaVersion.getSerializer(type, Long.SIZE, explicitStorage.size());
+-
+- final long[] values = explicitStorage.toArray();
+- Arrays.sort(values);
+- for(final long value : values) {
+- serializer.writeWord(value);
+- }
+-
+- bytes = serializer.getBytes();
+- break;
+- }
+- case SPARSE: {
+- final IWordSerializer serializer =
+- schemaVersion.getSerializer(type, shortWordLength, sparseProbabilisticStorage.size());
+-
+- final int[] indices = sparseProbabilisticStorage.keys().toArray();
+- Arrays.sort(indices);
+- for(final int registerIndex : indices) {
+- assert sparseProbabilisticStorage.containsKey(registerIndex);
+- final long registerValue = sparseProbabilisticStorage.get(registerIndex);
+- // pack index and value into "short word"
+- final long shortWord = ((registerIndex << regwidth) | registerValue);
+- serializer.writeWord(shortWord);
+- }
+-
+- bytes = serializer.getBytes();
+- break;
+- }
+- case FULL: {
+- final IWordSerializer serializer = schemaVersion.getSerializer(type, regwidth, m);
+- probabilisticStorage.getRegisterContents(serializer);
+-
+- bytes = serializer.getBytes();
+- break;
+- }
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+-
+- final IHLLMetadata metadata = new HLLMetadata(schemaVersion.schemaVersionNumber(),
+- type,
+- log2m,
+- regwidth,
+- (int)NumberUtil.log2(explicitThreshold),
+- explicitOff,
+- explicitAuto,
+- !sparseOff);
+- schemaVersion.writeMetadata(bytes, metadata);
+-
+- return bytes;
+- }
+-
+- /**
+- * Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized
+- * into bytes
.
+- *
+- * @param bytes the serialized bytes of new HLL
+- * @return the deserialized HLL. This will never be null
.
+- *
+- * @see #toBytes(ISchemaVersion)
+- */
+- public static HLL fromBytes(final byte[] bytes) {
+- final ISchemaVersion schemaVersion = SerializationUtil.getSchemaVersion(bytes);
+- final IHLLMetadata metadata = schemaVersion.readMetadata(bytes);
+-
+- final HLLType type = metadata.HLLType();
+- final int regwidth = metadata.registerWidth();
+- final int log2m = metadata.registerCountLog2();
+- final boolean sparseon = metadata.sparseEnabled();
+-
+- final int expthresh;
+- if(metadata.explicitAuto()) {
+- expthresh = -1;
+- } else if(metadata.explicitOff()) {
+- expthresh = 0;
+- } else {
+- // NOTE: take into account that the postgres-compatible constructor
+- // subtracts one before taking a power of two.
+- expthresh = metadata.log2ExplicitCutoff() + 1;
+- }
+-
+- final HLL hll = new HLL(log2m, regwidth, expthresh, sparseon, type);
+-
+- // Short-circuit on empty, which needs no other deserialization.
+- if(HLLType.EMPTY.equals(type)) {
+- return hll;
+- }
+-
+- final int wordLength;
+- switch(type) {
+- case EXPLICIT:
+- wordLength = Long.SIZE;
+- break;
+- case SPARSE:
+- wordLength = hll.shortWordLength;
+- break;
+- case FULL:
+- wordLength = hll.regwidth;
+- break;
+- default:
+- throw new RuntimeException("Unsupported HLL type " + type);
+- }
+-
+- final IWordDeserializer deserializer =
+- schemaVersion.getDeserializer(type, wordLength, bytes);
+- switch(type) {
+- case EXPLICIT:
+- // NOTE: This should not exceed expthresh and this will always
+- // be exactly the number of words that were encoded,
+- // because the word length is at least a byte wide.
+- // SEE: IWordDeserializer#totalWordCount()
+- for(int i=0; i>> hll.regwidth), registerValue);
+- }
+- }
+- break;
+- case FULL:
+- // NOTE: Iteration is done using m (register count) and NOT
+- // deserializer#totalWordCount() because regwidth may be
+- // less than 8 and as such the padding on the 'last' byte
+- // may be larger than regwidth, causing an extra register
+- // to be read.
+- // SEE: IWordDeserializer#totalWordCount()
+- for(long i=0; inull.
+- * @param registerCountLog2 the log-base-2 register count parameter for
+- * probabilistic HLLs. This must be greater than or equal to zero.
+- * @param registerWidth the register width parameter for probabilistic
+- * HLLs. This must be greater than or equal to zero.
+- * @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff,
+- * if it is explicitly defined. (If explicitOff
or
+- * explicitAuto
is true
then this has no
+- * meaning.)
+- * @param explicitOff the flag for 'explicit off'-mode, where the
+- * {@link HLLType#EXPLICIT} representation is not used. Both this and
+- * explicitAuto
cannot be true
at the same
+- * time.
+- * @param explicitAuto the flag for 'explicit auto'-mode, where the
+- * {@link HLLType#EXPLICIT} representation's promotion cutoff is
+- * determined based on in-memory size automatically. Both this and
+- * explicitOff
cannot be true
at the same
+- * time.
+- * @param sparseEnabled the flag for 'sparse-enabled'-mode, where the
+- * {@link HLLType#SPARSE} representation is used.
+- */
+- public HLLMetadata(final int schemaVersion,
+- final HLLType type,
+- final int registerCountLog2,
+- final int registerWidth,
+- final int log2ExplicitCutoff,
+- final boolean explicitOff,
+- final boolean explicitAuto,
+- final boolean sparseEnabled) {
+- this.schemaVersion = schemaVersion;
+- this.type = type;
+- this.registerCountLog2 = registerCountLog2;
+- this.registerWidth = registerWidth;
+- this.log2ExplicitCutoff = log2ExplicitCutoff;
+- this.explicitOff = explicitOff;
+- this.explicitAuto = explicitAuto;
+- this.sparseEnabled = sparseEnabled;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion()
+- */
+- @Override
+- public int schemaVersion() { return schemaVersion; }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#HLLType()
+- */
+- @Override
+- public HLLType HLLType() { return type; }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2()
+- */
+- @Override
+- public int registerCountLog2() { return registerCountLog2; }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#registerWidth()
+- */
+- @Override
+- public int registerWidth() { return registerWidth; }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
+- */
+- @Override
+- public int log2ExplicitCutoff() { return log2ExplicitCutoff; }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#explicitOff()
+- */
+- @Override
+- public boolean explicitOff() {
+- return explicitOff;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto()
+- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
+- */
+- @Override
+- public boolean explicitAuto() {
+- return explicitAuto;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled()
+- */
+- @Override
+- public boolean sparseEnabled() { return sparseEnabled; }
+-
+- /* (non-Javadoc)
+- * @see java.lang.Object#toString()
+- */
+- @Override
+- public String toString() {
+- return "";
+- }
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,29 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * The types of algorithm/data structure that {@link HLL} can utilize. For more
+- * information, see the Javadoc for {@link HLL}.
+- */
+-public enum HLLType {
+- EMPTY,
+- EXPLICIT,
+- SPARSE,
+- FULL;
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,199 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * Static functions for computing constants and parameters used in the HLL
+- * algorithm.
+- */
+-final class HLLUtil {
+- /**
+- * Precomputed pwMaxMask
values indexed by registerSizeInBits
.
+- * Calculated with this formula:
+- *
+- * int maxRegisterValue = (1 << registerSizeInBits) - 1;
+- * // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
+- * return ~((1L << (maxRegisterValue - 1)) - 1);
+- *
+- *
+- * @see #pwMaxMask(int)
+- */
+- private static final long[] PW_MASK = {
+- ~((1L << (((1 << 0) - 1) - 1)) - 1),
+- ~((1L << (((1 << 1) - 1) - 1)) - 1),
+- ~((1L << (((1 << 2) - 1) - 1)) - 1),
+- ~((1L << (((1 << 3) - 1) - 1)) - 1),
+- ~((1L << (((1 << 4) - 1) - 1)) - 1),
+- ~((1L << (((1 << 5) - 1) - 1)) - 1),
+- ~((1L << (((1 << 6) - 1) - 1)) - 1),
+- ~((1L << (((1 << 7) - 1) - 1)) - 1),
+- ~((1L << (((1 << 8) - 1) - 1)) - 1)
+- };
+-
+- /**
+- * Precomputed twoToL
values indexed by a linear combination of
+- * regWidth
and log2m
.
+- *
+- * The array is one-dimensional and can be accessed by using index
+- * (REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m
+- * for regWidth
and log2m
between the specified
+- * HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM
constants.
+- *
+- * @see #largeEstimator(int, int, double)
+- * @see #largeEstimatorCutoff(int, int)
+- * @see "Blog post with section on 2^L"
+- */
+- private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)];
+-
+- /**
+- * Spacing constant used to compute offsets into {@link #TWO_TO_L}.
+- */
+- private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1;
+-
+- static {
+- for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) {
+- for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) {
+- int maxRegisterValue = (1 << regWidth) - 1;
+-
+- // Since 1 is added to p(w) in the insertion algorithm, only
+- // (maxRegisterValue - 1) bits are inspected hence the hash
+- // space is one power of two smaller.
+- final int pwBits = (maxRegisterValue - 1);
+- final int totalBits = (pwBits + log2m);
+- final double twoToL = Math.pow(2, totalBits);
+- TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL;
+- }
+- }
+- }
+-
+- // ************************************************************************
+- /**
+- * Computes the bit-width of HLL registers necessary to estimate a set of
+- * the specified cardinality.
+- *
+- * @param expectedUniqueElements an upper bound on the number of unique
+- * elements that are expected. This must be greater than zero.
+- * @return a register size in bits (i.e. log2(log2(n))
)
+- */
+- public static int registerBitSize(final long expectedUniqueElements) {
+- return Math.max(HLL.MINIMUM_REGWIDTH_PARAM,
+- (int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements))));
+- }
+-
+- // ========================================================================
+- /**
+- * Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm.
+- *
+- * @param m this must be a power of two, cannot be less than
+- * 16 (24), and cannot be greater than 65536 (216).
+- * @return gamma times registerCount
squared where gamma is
+- * based on the value of registerCount
.
+- * @throws IllegalArgumentException if registerCount
is less
+- * than 16.
+- */
+- public static double alphaMSquared(final int m) {
+- switch(m) {
+- case 1/*2^0*/:
+- case 2/*2^1*/:
+- case 4/*2^2*/:
+- case 8/*2^3*/:
+- throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16).");
+-
+- case 16/*2^4*/:
+- return 0.673 * m * m;
+-
+- case 32/*2^5*/:
+- return 0.697 * m * m;
+-
+- case 64/*2^6*/:
+- return 0.709 * m * m;
+-
+- default/*>2^6*/:
+- return (0.7213 / (1.0 + 1.079 / m)) * m * m;
+- }
+- }
+-
+- // ========================================================================
+- /**
+- * Computes a mask that prevents overflow of HyperLogLog registers.
+- *
+- * @param registerSizeInBits the size of the HLL registers, in bits.
+- * @return mask a long
mask to prevent overflow of the registers
+- * @see #registerBitSize(long)
+- */
+- public static long pwMaxMask(final int registerSizeInBits) {
+- return PW_MASK[registerSizeInBits];
+- }
+-
+- // ========================================================================
+- /**
+- * The cutoff for using the "small range correction" formula, in the
+- * HyperLogLog algorithm.
+- *
+- * @param m the number of registers in the HLL. m in the paper.
+- * @return the cutoff for the small range correction.
+- * @see #smallEstimator(int, int)
+- */
+- public static double smallEstimatorCutoff(final int m) {
+- return ((double)m * 5) / 2;
+- }
+-
+- /**
+- * The "small range correction" formula from the HyperLogLog algorithm. Only
+- * appropriate if both the estimator is smaller than (5/2) * m
and
+- * there are still registers that have the zero value.
+- *
+- * @param m the number of registers in the HLL. m in the paper.
+- * @param numberOfZeroes the number of registers with value zero. V
+- * in the paper.
+- * @return a corrected cardinality estimate.
+- */
+- public static double smallEstimator(final int m, final int numberOfZeroes) {
+- return m * Math.log((double)m / numberOfZeroes);
+- }
+-
+- /**
+- * The cutoff for using the "large range correction" formula, from the
+- * HyperLogLog algorithm, adapted for 64 bit hashes.
+- *
+- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
+- * @param registerSizeInBits the size of the HLL registers, in bits.
+- * @return the cutoff for the large range correction.
+- * @see #largeEstimator(int, int, double)
+- * @see "Blog post with section on 64 bit hashes and 'large range correction' cutoff"
+- */
+- public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) {
+- return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0;
+- }
+-
+- /**
+- * The "large range correction" formula from the HyperLogLog algorithm, adapted
+- * for 64 bit hashes. Only appropriate for estimators whose value exceeds
+- * the return of {@link #largeEstimatorCutoff(int, int)}.
+- *
+- * @param log2m log-base-2 of the number of registers in the HLL. b in the paper.
+- * @param registerSizeInBits the size of the HLL registers, in bits.
+- * @param estimator the original estimator ("E" in the paper).
+- * @return a corrected cardinality estimate.
+- * @see "Blog post with section on 64 bit hashes and 'large range correction'"
+- */
+- public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) {
+- final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m];
+- return -1 * twoToL * Math.log(1.0 - (estimator/twoToL));
+- }
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,71 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * The metadata and parameters associated with a HLL.
+- */
+-interface IHLLMetadata {
+- /**
+- * @return the schema version of the HLL. This will never be null
.
+- */
+- int schemaVersion();
+-
+- /**
+- * @return the type of the HLL. This will never be null
.
+- */
+- HLLType HLLType();
+-
+- /**
+- * @return the log-base-2 of the register count parameter of the HLL. This
+- * will always be greater than or equal to 4 and less than or equal
+- * to 31.
+- */
+- int registerCountLog2();
+-
+- /**
+- * @return the register width parameter of the HLL. This will always be
+- * greater than or equal to 1 and less than or equal to 8.
+- */
+- int registerWidth();
+-
+- /**
+- * @return the log-base-2 of the explicit cutoff cardinality. This will always
+- * be greater than or equal to zero and less than 31, per the specification.
+- */
+- int log2ExplicitCutoff();
+-
+- /**
+- * @return true
if the {@link HLLType#EXPLICIT} representation
+- * has been disabled. false
otherwise.
+- */
+- boolean explicitOff();
+-
+- /**
+- * @return true
if the {@link HLLType#EXPLICIT} representation
+- * cutoff cardinality is set to be automatically chosen,
+- * false
otherwise.
+- */
+- boolean explicitAuto();
+-
+- /**
+- * @return true
if the {@link HLLType#SPARSE} representation
+- * is enabled.
+- */
+- boolean sparseEnabled();
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 2015-07-16 13:22:50.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,85 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A serialization schema for HLLs. Reads and writes HLL metadata to
+- * and from byte[]
representations.
+- */
+-interface ISchemaVersion {
+- /**
+- * The number of metadata bytes required for a serialized HLL of the
+- * specified type.
+- *
+- * @param type the type of the serialized HLL
+- * @return the number of padding bytes needed in order to fully accommodate
+- * the needed metadata.
+- */
+- int paddingBytes(HLLType type);
+-
+- /**
+- * Writes metadata bytes to serialized HLL.
+- *
+- * @param bytes the padded data bytes of the HLL
+- * @param metadata the metadata to write to the padding bytes
+- */
+- void writeMetadata(byte[] bytes, IHLLMetadata metadata);
+-
+- /**
+- * Reads the metadata bytes of the serialized HLL.
+- *
+- * @param bytes the serialized HLL
+- * @return the HLL metadata
+- */
+- IHLLMetadata readMetadata(byte[] bytes);
+-
+- /**
+- * Builds an HLL serializer that matches this schema version.
+- *
+- * @param type the HLL type that will be serialized. This cannot be
+- * null
.
+- * @param wordLength the length of the 'words' that comprise the data of the
+- * HLL. Words must be at least 5 bits and at most 64 bits long.
+- * @param wordCount the number of 'words' in the HLL's data.
+- * @return a byte array serializer used to serialize a HLL according
+- * to this schema version's specification.
+- * @see #paddingBytes(HLLType)
+- * @see IWordSerializer
+- */
+- IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount);
+-
+- /**
+- * Builds an HLL deserializer that matches this schema version.
+- *
+- * @param type the HLL type that will be deserialized. This cannot be
+- * null
.
+- * @param wordLength the length of the 'words' that comprise the data of the
+- * serialized HLL. Words must be at least 5 bits and at most 64
+- * bits long.
+- * @param bytes the serialized HLL to deserialize. This cannot be
+- * null
.
+- * @return a byte array deserializer used to deserialize a HLL serialized
+- * according to this schema version's specification.
+- */
+- IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes);
+-
+- /**
+- * @return the schema version number.
+- */
+- int schemaVersionNumber();
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 2015-07-16 13:14:59.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,41 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * Reads 'words' of a fixed width, in sequence, from a byte array.
+- */
+-public interface IWordDeserializer {
+- /**
+- * @return the next word in the sequence. Should not be called more than
+- * {@link #totalWordCount()} times.
+- */
+- long readWord();
+-
+- /**
+- * Returns the number of words that could be encoded in the sequence.
+- *
+- * NOTE: the sequence that was encoded may be shorter than the value this
+- * method returns due to padding issues within bytes. This guarantees
+- * only an upper bound on the number of times {@link #readWord()}
+- * can be called.
+- *
+- * @return the maximum number of words that could be read from the sequence.
+- */
+- int totalWordCount();
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,39 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * Writes 'words' of fixed width, in sequence, to a byte array.
+- */
+-interface IWordSerializer {
+-
+- /**
+- * Writes the word to the backing array.
+- *
+- * @param word the word to write.
+- */
+- void writeWord(final long word);
+-
+- /**
+- * Returns the backing array of byte
s that contain the serialized
+- * words.
+- * @return the serialized words as a byte[]
.
+- */
+- byte[] getBytes();
+-
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,35 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A long
-based iterator. This is not is-a {@link java.util.Iterator}
+- * to prevent autoboxing between Long
and long
.
+- */
+-interface LongIterator {
+- /**
+- * @return true
if and only if there are more elements to
+- * iterate over. false
otherwise.
+- */
+- boolean hasNext();
+-
+- /**
+- * @return the next long
in the collection.
+- */
+- long next();
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,172 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A collection of utilities to work with numbers.
+- */
+-class NumberUtil {
+- // loge(2) (log-base e of 2)
+- public static final double LOGE_2 = 0.6931471805599453;
+-
+- // ************************************************************************
+- /**
+- * Computes the log2
(log-base-two) of the specified value.
+- *
+- * @param value the double
for which the log2
is
+- * desired.
+- * @return the log2
of the specified value
+- */
+- public static double log2(final double value) {
+- // REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases)
+- return Math.log(value) / LOGE_2;
+- }
+-
+- // ========================================================================
+- // the hex characters
+- private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7',
+- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+-
+- // ------------------------------------------------------------------------
+- /**
+- * Converts the specified array of byte
s into a string of
+- * hex characters (low byte
first).
+- *
+- * @param bytes the array of byte
s that are to be converted.
+- * This cannot be null
though it may be empty.
+- * @param offset the offset in bytes
at which the bytes will
+- * be taken. This cannot be negative and must be less than
+- * bytes.length - 1
.
+- * @param count the number of bytes to be retrieved from the specified array.
+- * This cannot be negative. If greater than bytes.length - offset
+- * then that value is used.
+- * @return a string of at most count
characters that represents
+- * the specified byte array in hex. This will never be null
+- * though it may be empty if bytes
is empty or count
+- * is zero.
+- * @throws IllegalArgumentException if offset
is greater than
+- * or equal to bytes.length
.
+- * @see #fromHex(String, int, int)
+- */
+- public static String toHex(final byte[] bytes, final int offset, final int count) {
+- if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/;
+- final int byteCount = Math.min( (bytes.length - offset), count);
+- final int upperBound = byteCount + offset;
+-
+- final char[] chars = new char[byteCount * 2/*two chars per byte*/];
+- int charIndex = 0;
+- for(int i=offset; i>> 4) & 0x0F];
+- chars[charIndex++] = HEX[value & 0x0F];
+- }
+-
+- return new String(chars);
+- }
+-
+- /**
+- * Converts the specified array of hex characters into an array of byte
s
+- * (low byte
first).
+- *
+- * @param string the string of hex characters to be converted into byte
s.
+- * This cannot be null
though it may be blank.
+- * @param offset the offset in the string at which the characters will be
+- * taken. This cannot be negative and must be less than string.length() - 1
.
+- * @param count the number of characters to be retrieved from the specified
+- * string. This cannot be negative and must be divisible by two
+- * (since there are two characters per byte
).
+- * @return the array of byte
s that were converted from the
+- * specified string (in the specified range). This will never be
+- * null
though it may be empty if string
+- * is empty or count
is zero.
+- * @throws IllegalArgumentException if offset
is greater than
+- * or equal to string.length()
or if count
+- * is not divisible by two.
+- * @see #toHex(byte[], int, int)
+- */
+- public static byte[] fromHex(final String string, final int offset, final int count) {
+- if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/;
+- if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/;
+- final int charCount = Math.min((string.length() - offset), count);
+- final int upperBound = offset + charCount;
+-
+- final byte[] bytes = new byte[charCount >>> 1/*aka /2*/];
+- int byteIndex = 0/*beginning*/;
+- for(int i=offset; ibyte.
+- * This cannot be a character other than [a-fA-F0-9].
+- * @return the value of the specified character. This will be a value 0
+- * through 15
.
+- * @throws IllegalArgumentException if the specified character is not in
+- * [a-fA-F0-9]
+- */
+- private static final int digit(final char character) {
+- switch(character) {
+- case '0':
+- return 0;
+- case '1':
+- return 1;
+- case '2':
+- return 2;
+- case '3':
+- return 3;
+- case '4':
+- return 4;
+- case '5':
+- return 5;
+- case '6':
+- return 6;
+- case '7':
+- return 7;
+- case '8':
+- return 8;
+- case '9':
+- return 9;
+- case 'a':
+- case 'A':
+- return 10;
+- case 'b':
+- case 'B':
+- return 11;
+- case 'c':
+- case 'C':
+- return 12;
+- case 'd':
+- case 'D':
+- return 13;
+- case 'e':
+- case 'E':
+- return 14;
+- case 'f':
+- case 'F':
+- return 15;
+-
+- default:
+- throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "').");
+- }
+- }
+-}
+\ Manca newline alla fine del file
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,24 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A fork of Java-HyperLogLog package tweaked
+- * not to depend on fastutil and with cleanups to make it lean and clean.
+- */
+-package org.apache.solr.util.hll;
+-
+-
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 2015-07-16 13:22:50.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,154 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A concrete {@link ISchemaVersion} representing schema version one.
+- */
+-class SchemaVersionOne implements ISchemaVersion {
+- /**
+- * The schema version number for this instance.
+- */
+- public static final int SCHEMA_VERSION = 1;
+-
+- // ------------------------------------------------------------------------
+- // Version-specific ordinals (array position) for each of the HLL types
+- private static final HLLType[] TYPE_ORDINALS = new HLLType[] {
+- HLLType.EMPTY,
+- HLLType.EXPLICIT,
+- HLLType.SPARSE,
+- HLLType.FULL
+- };
+-
+- // ------------------------------------------------------------------------
+- // number of header bytes for all HLL types
+- private static final int HEADER_BYTE_COUNT = 3;
+-
+- // sentinel values from the spec for explicit off and auto
+- private static final int EXPLICIT_OFF = 0;
+- private static final int EXPLICIT_AUTO = 63;
+-
+- // ************************************************************************
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType)
+- */
+- @Override
+- public int paddingBytes(final HLLType type) {
+- return HEADER_BYTE_COUNT;
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata)
+- */
+- @Override
+- public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) {
+- final HLLType type = metadata.HLLType();
+- final int typeOrdinal = getOrdinal(type);
+-
+- final int explicitCutoffValue;
+- if(metadata.explicitOff()) {
+- explicitCutoffValue = EXPLICIT_OFF;
+- } else if(metadata.explicitAuto()) {
+- explicitCutoffValue = EXPLICIT_AUTO;
+- } else {
+- explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/;
+- }
+-
+- bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal);
+- bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2());
+- bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled());
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[])
+- */
+- @Override
+- public IHLLMetadata readMetadata(final byte[] bytes) {
+- final byte versionByte = bytes[0];
+- final byte parametersByte = bytes[1];
+- final byte cutoffByte = bytes[2];
+-
+- final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte);
+- final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte);
+- final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF);
+- final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO);
+- final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/);
+-
+- return new HLLMetadata(SCHEMA_VERSION,
+- getType(typeOrdinal),
+- SerializationUtil.registerCountLog2(parametersByte),
+- SerializationUtil.registerWidth(parametersByte),
+- log2ExplicitCutoff,
+- explicitOff,
+- explicitAuto,
+- SerializationUtil.sparseEnabled(cutoffByte));
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int)
+- */
+- @Override
+- public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) {
+- return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type));
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[])
+- */
+- @Override
+- public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) {
+- return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes);
+- }
+-
+- /* (non-Javadoc)
+- * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber()
+- */
+- @Override
+- public int schemaVersionNumber() {
+- return SCHEMA_VERSION;
+- }
+-
+- // ========================================================================
+- // Type/Ordinal lookups
+- /**
+- * Gets the ordinal for the specified {@link HLLType}.
+- *
+- * @param type the type whose ordinal is desired
+- * @return the ordinal for the specified type, to be used in the version byte.
+- * This will always be non-negative.
+- */
+- private static int getOrdinal(final HLLType type) {
+- for(int i=0; inull.
+- */
+- private static HLLType getType(final int ordinal) {
+- if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) {
+- throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed.");
+- }
+- return TYPE_ORDINALS[ordinal];
+- }
+-}
+diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java
+--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 2015-07-16 12:32:07.000000000 +0200
++++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 1970-01-01 01:00:00.000000000 +0100
+@@ -1,277 +0,0 @@
+-package org.apache.solr.util.hll;
+-
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-/**
+- * A collection of constants and utilities for serializing and deserializing
+- * HLLs.
+- *
+- * NOTE: 'package' visibility is used for many methods that only need to be
+- * used by the {@link ISchemaVersion} implementations. The structure of
+- * a serialized HLL's metadata should be opaque to the rest of the
+- * library.
+- */
+-class SerializationUtil {
+- /**
+- * The number of bits (of the parameters byte) dedicated to encoding the
+- * width of the registers.
+- */
+- /*package*/ static int REGISTER_WIDTH_BITS = 3;
+-
+- /**
+- * A mask to cap the maximum value of the register width.
+- */
+- /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
+-
+- /**
+- * The number of bits (of the parameters byte) dedicated to encoding
+- * log2(registerCount)
.
+- */
+- /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
+-
+- /**
+- * A mask to cap the maximum value of log2(registerCount)
.
+- */
+- /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
+-
+- /**
+- * The number of bits (of the cutoff byte) dedicated to encoding the
+- * log-base-2 of the explicit cutoff or sentinel values for
+- * 'explicit-disabled' or 'auto'.
+- */
+- /*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
+-
+- /**
+- * A mask to cap the maximum value of the explicit cutoff choice.
+- */
+- /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
+-
+- /**
+- * Number of bits in a nibble.
+- */
+- private static int NIBBLE_BITS = 4;
+-
+- /**
+- * A mask to cap the maximum value of a nibble.
+- */
+- private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
+-
+- // ************************************************************************
+- // Serialization utilities
+-
+- /**
+- * Schema version one (v1).
+- */
+- public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
+-
+- /**
+- * The default schema version for serializing HLLs.
+- */
+- public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
+-
+- /**
+- * List of registered schema versions, indexed by their version numbers. If
+- * an entry is null
, then no such schema version is registered.
+- * Similarly, registering a new schema version simply entails assigning an
+- * {@link ISchemaVersion} instance to the appropriate index of this array.
+- *
+- * By default, only {@link SchemaVersionOne} is registered. Note that version
+- * zero will always be reserved for internal (e.g. proprietary, legacy) schema
+- * specifications/implementations and will never be assigned to in by this
+- * library.
+- */
+- public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
+-
+- static {
+- REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
+- }
+-
+- /**
+- * @param schemaVersionNumber the version number of the {@link ISchemaVersion}
+- * desired. This must be a registered schema version number.
+- * @return The {@link ISchemaVersion} for the given number. This will never
+- * be null
.
+- */
+- public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
+- if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
+- throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
+- }
+- final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
+- if(schemaVersion == null) {
+- throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
+- }
+- return schemaVersion;
+- }
+-
+- /**
+- * Get the appropriate {@link ISchemaVersion schema version} for the specified
+- * serialized HLL.
+- *
+- * @param bytes the serialized HLL whose schema version is desired.
+- * @return the schema version for the specified HLL. This will never
+- * be null
.
+- */
+- public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
+- final byte versionByte = bytes[0];
+- final int schemaVersionNumber = schemaVersion(versionByte);
+-
+- return getSchemaVersion(schemaVersionNumber);
+- }
+-
+- // ************************************************************************
+- // Package-specific shared helpers
+-
+- /**
+- * Generates a byte that encodes the schema version and the type ordinal
+- * of the HLL.
+- *
+- * The top nibble is the schema version and the bottom nibble is the type
+- * ordinal.
+- *
+- * @param schemaVersion the schema version to encode.
+- * @param typeOrdinal the type ordinal of the HLL to encode.
+- * @return the packed version byte
+- */
+- public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
+- return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
+- }
+- /**
+- * Generates a byte that encodes the log-base-2 of the explicit cutoff
+- * or sentinel values for 'explicit-disabled' or 'auto', as well as the
+- * boolean indicating whether to use {@link HLLType#SPARSE}
+- * in the promotion hierarchy.
+- *
+- * The top bit is always padding, the second highest bit indicates the
+- * 'sparse-enabled' boolean, and the lowest six bits encode the explicit
+- * cutoff value.
+- *
+- * @param explicitCutoff the explicit cutoff value to encode.
+- *
+- * -
+- * If 'explicit-disabled' is chosen, this value should be
0
.
+- *
+- * -
+- * If 'auto' is chosen, this value should be
63
.
+- *
+- * -
+- * If a cutoff of 2n is desired, for
0 <= n < 31
,
+- * this value should be n + 1
.
+- *
+- *
+- * @param sparseEnabled whether {@link HLLType#SPARSE}
+- * should be used in the promotion hierarchy to improve HLL
+- * storage.
+- *
+- * @return the packed cutoff byte
+- */
+- public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
+- final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
+- return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
+- }
+-
+- /**
+- * Generates a byte that encodes the parameters of a
+- * {@link HLLType#FULL} or {@link HLLType#SPARSE}
+- * HLL.
+- *
+- * The top 3 bits are used to encode registerWidth - 1
+- * (range of registerWidth
is thus 1-9) and the bottom 5
+- * bits are used to encode registerCountLog2
+- * (range of registerCountLog2
is thus 0-31).
+- *
+- * @param registerWidth the register width (must be at least 1 and at
+- * most 9)
+- * @param registerCountLog2 the log-base-2 of the register count (must
+- * be at least 0 and at most 31)
+- * @return the packed parameters byte
+- */
+- public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
+- final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
+- final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
+- return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
+- }
+-
+- /**
+- * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
+- * HLL.
+- *
+- * @param cutoffByte the cutoff byte of the serialized HLL
+- * @return the 'sparse-enabled' boolean
+- */
+- public static boolean sparseEnabled(final byte cutoffByte) {
+- return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
+- }
+-
+- /**
+- * Extracts the explicit cutoff value from the cutoff byte of a serialized
+- * HLL.
+- *
+- * @param cutoffByte the cutoff byte of the serialized HLL
+- * @return the explicit cutoff value
+- */
+- public static int explicitCutoff(final byte cutoffByte) {
+- return (cutoffByte & EXPLICIT_CUTOFF_MASK);
+- }
+-
+- /**
+- * Extracts the schema version from the version byte of a serialized
+- * HLL.
+- *
+- * @param versionByte the version byte of the serialized HLL
+- * @return the schema version of the serialized HLL
+- */
+- public static int schemaVersion(final byte versionByte) {
+- return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
+- }
+-
+- /**
+- * Extracts the type ordinal from the version byte of a serialized HLL.
+- *
+- * @param versionByte the version byte of the serialized HLL
+- * @return the type ordinal of the serialized HLL
+- */
+- public static int typeOrdinal(final byte versionByte) {
+- return (versionByte & NIBBLE_MASK);
+- }
+-
+- /**
+- * Extracts the register width from the parameters byte of a serialized
+- * {@link HLLType#FULL} HLL.
+- *
+- * @param parametersByte the parameters byte of the serialized HLL
+- * @return the register width of the serialized HLL
+- *
+- * @see #packParametersByte(int, int)
+- */
+- public static int registerWidth(final byte parametersByte) {
+- return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
+- }
+-
+- /**
+- * Extracts the log2(registerCount) from the parameters byte of a
+- * serialized {@link HLLType#FULL} HLL.
+- *
+- * @param parametersByte the parameters byte of the serialized HLL
+- * @return log2(registerCount) of the serialized HLL
+- *
+- * @see #packParametersByte(int, int)
+- */
+- public static int registerCountLog2(final byte parametersByte) {
+- return (parametersByte & LOG2_REGISTER_COUNT_MASK);
+- }
+-}
diff --git a/solr-repack.sh b/solr-repack.sh
index b50c285..1df1a0b 100644
--- a/solr-repack.sh
+++ b/solr-repack.sh
@@ -6,7 +6,7 @@ if [ $# -ne 1 ] ; then
fi
VERSION=$1
rm -Rf solr-$VERSION-clean.tar.xz
-#wget http://www.apache.org/dist/lucene/solr/$VERSION/solr-$VERSION-src.tgz
+wget http://www.apache.org/dist/lucene/solr/$VERSION/solr-$VERSION-src.tgz
tar -xf solr-$VERSION-src.tgz
find solr-$VERSION -name "*.class" -print -delete
@@ -23,8 +23,8 @@ pushd solr-$VERSION
wget -O pom.xml http://central.maven.org/maven2/org/apache/solr/solr-${p}/${VERSION}/solr-${p}-${VERSION}.pom
popd
done
-# analytics
- for p in analysis-extras clustering dataimporthandler dataimporthandler-extras \
+
+ for p in analysis-extras analytics clustering dataimporthandler dataimporthandler-extras \
langid map-reduce morphlines-cell morphlines-core uima velocity; do
mkdir -p solr/contrib/${p}
pushd solr/contrib/${p}
@@ -40,4 +40,4 @@ pushd solr-$VERSION
popd
tar -cJf solr-$VERSION-clean.tar.xz solr-$VERSION
-#rm -rf solr-$VERSION
+rm -rf solr-$VERSION
diff --git a/solr.spec b/solr.spec
index 2c27dc3..b270ca9 100644
--- a/solr.spec
+++ b/solr.spec
@@ -4,14 +4,13 @@
# Unavailable deps
%bcond_with randomizedtesting
%bcond_with uima
-%bcond_with webapp
%bcond_with kite
%endif
Name: solr
-Version: 4.10.4
-Release: 2%{?dist}
+Version: 5.3.0
+Release: 1%{?dist}
Summary: Ultra-fast Lucene-based Search Server
# MIT/X11 (BSD like) solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/AlphaNumericComparator.java
License: ASL 2.0 and BSD
@@ -19,50 +18,131 @@ URL: http://lucene.apache.org/solr/
# Use solr-repack.sh
Source0: %{name}-%{version}-clean.tar.xz
Source2: solr-repack.sh
-# https://issues.apache.org/jira/browse/SOLR-4839
-Patch0: solr-4.10.4-SOLR-4839.patch
+Patch0: solr-5.3.0-use-system-hll.patch
+Patch1: solr-5.3.0-jetty9.3.3.patch
+BuildRequires: maven-local
+BuildRequires: mvn(com.adobe.xmp:xmpcore)
+BuildRequires: mvn(com.carrotsearch:hppc)
BuildRequires: mvn(com.cybozu.labs:langdetect)
+BuildRequires: mvn(com.drewnoakes:metadata-extractor:2)
+BuildRequires: mvn(com.fasterxml.jackson.core:jackson-core)
+BuildRequires: mvn(com.fasterxml.jackson.dataformat:jackson-dataformat-smile)
BuildRequires: mvn(com.google.guava:guava)
+BuildRequires: mvn(com.google.protobuf:protobuf-java)
BuildRequires: mvn(com.googlecode.concurrentlinkedhashmap:concurrentlinkedhashmap-lru)
+BuildRequires: mvn(com.googlecode.juniversalchardet:juniversalchardet)
BuildRequires: mvn(com.ibm.icu:icu4j)
+BuildRequires: mvn(com.pff:java-libpst)
BuildRequires: mvn(com.spatial4j:spatial4j)
BuildRequires: mvn(com.sun.mail:gimap)
BuildRequires: mvn(com.sun.mail:javax.mail)
+# https://bugzilla.redhat.com/show_bug.cgi?id=1242405
+BuildRequires: mvn(com.tdunning:t-digest)
+BuildRequires: mvn(com.thoughtworks.paranamer:paranamer)
+BuildRequires: mvn(de.l3s.boilerpipe:boilerpipe)
+BuildRequires: mvn(dom4j:dom4j)
BuildRequires: mvn(org.aspectj:aspectjrt)
-BuildRequires: mvn(org.codelibs:jhighlight)
BuildRequires: mvn(commons-cli:commons-cli)
BuildRequires: mvn(commons-codec:commons-codec)
+BuildRequires: mvn(commons-collections:commons-collections)
+BuildRequires: mvn(commons-configuration:commons-configuration)
BuildRequires: mvn(commons-fileupload:commons-fileupload)
BuildRequires: mvn(commons-io:commons-io)
BuildRequires: mvn(commons-lang:commons-lang)
+BuildRequires: mvn(dom4j:dom4j)
+BuildRequires: mvn(io.netty:netty:3)
BuildRequires: mvn(jakarta-regexp:jakarta-regexp)
-BuildRequires: mvn(javax.servlet:servlet-api)
+BuildRequires: mvn(javax.servlet:javax.servlet-api)
+BuildRequires: mvn(jdom:jdom)
BuildRequires: mvn(joda-time:joda-time)
BuildRequires: mvn(log4j:log4j:1.2.17)
+BuildRequires: mvn(net.agkn:hll)
BuildRequires: mvn(net.arnx:jsonic)
-BuildRequires: mvn(net.sourceforge.nekohtml:nekohtml)
+BuildRequires: mvn(net.sourceforge.jmatio:jmatio)
BuildRequires: mvn(org.antlr:antlr-runtime)
+BuildRequires: mvn(org.apache:apache:pom:)
+BuildRequires: mvn(org.apache.ant:ant)
+BuildRequires: mvn(org.apache.commons:commons-exec)
BuildRequires: mvn(org.apache.commons:commons-compress)
+BuildRequires: mvn(org.apache.felix:maven-bundle-plugin)
+# https://bugzilla.redhat.com/show_bug.cgi?id=1235420
BuildRequires: mvn(org.apache.hadoop:hadoop-annotations)
BuildRequires: mvn(org.apache.hadoop:hadoop-auth)
BuildRequires: mvn(org.apache.hadoop:hadoop-common)
BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs)
BuildRequires: mvn(org.apache.httpcomponents:httpclient)
BuildRequires: mvn(org.apache.httpcomponents:httpmime)
+BuildRequires: mvn(org.apache.lucene:lucene-analysis-modules-aggregator:pom:)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-common)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-icu)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-kuromoji)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-morfologik)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-phonetic)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-smartcn)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-stempel)
+BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima)
+BuildRequires: mvn(org.apache.lucene:lucene-backward-codecs)
+BuildRequires: mvn(org.apache.lucene:lucene-codecs)
+BuildRequires: mvn(org.apache.lucene:lucene-core)
+BuildRequires: mvn(org.apache.lucene:lucene-expressions)
+BuildRequires: mvn(org.apache.lucene:lucene-grouping)
+BuildRequires: mvn(org.apache.lucene:lucene-highlighter)
+BuildRequires: mvn(org.apache.lucene:lucene-join)
+BuildRequires: mvn(org.apache.lucene:lucene-memory)
+BuildRequires: mvn(org.apache.lucene:lucene-misc)
+BuildRequires: mvn(org.apache.lucene:lucene-parent:pom:)
+BuildRequires: mvn(org.apache.lucene:lucene-queries)
+BuildRequires: mvn(org.apache.lucene:lucene-queryparser)
+BuildRequires: mvn(org.apache.lucene:lucene-replicator)
+BuildRequires: mvn(org.apache.lucene:lucene-sandbox)
+BuildRequires: mvn(org.apache.lucene:lucene-spatial)
+BuildRequires: mvn(org.apache.lucene:lucene-suggest)
+BuildRequires: mvn(org.apache.lucene:lucene-test-framework)
+BuildRequires: mvn(org.apache.maven.plugins:maven-enforcer-plugin)
+BuildRequires: mvn(org.apache.maven.plugins:maven-remote-resources-plugin)
+BuildRequires: mvn(org.apache.maven.plugins:maven-site-plugin)
+BuildRequires: mvn(org.apache.james:apache-mime4j-core)
+BuildRequires: mvn(org.apache.james:apache-mime4j-dom)
+BuildRequires: mvn(org.apache.james:james-project:pom:)
+BuildRequires: mvn(org.apache.pdfbox:fontbox)
+BuildRequires: mvn(org.apache.pdfbox:jempbox)
+BuildRequires: mvn(org.apache.pdfbox:pdfbox)
+BuildRequires: mvn(org.apache.poi:poi)
+BuildRequires: mvn(org.apache.poi:poi-ooxml)
+BuildRequires: mvn(org.apache.poi:poi-ooxml-schemas)
+BuildRequires: mvn(org.apache.poi:poi-scratchpad)
+BuildRequires: mvn(org.apache.tika:tika-core)
BuildRequires: mvn(org.apache.tika:tika-parsers)
BuildRequires: mvn(org.apache.velocity:velocity)
BuildRequires: mvn(org.apache.velocity:velocity-tools)
BuildRequires: mvn(org.apache.zookeeper:zookeeper)
+BuildRequires: mvn(org.apache.xmlbeans:xmlbeans)
+BuildRequires: mvn(org.aspectj:aspectjrt)
+BuildRequires: mvn(org.carrot2:morfologik-fsa)
BuildRequires: mvn(org.carrot2:morfologik-polish)
+BuildRequires: mvn(org.carrot2:morfologik-stemming)
+BuildRequires: mvn(org.ccil.cowan.tagsoup:tagsoup)
+# https://bugzilla.redhat.com/show_bug.cgi?id=1237324
+BuildRequires: mvn(org.cloudera.htrace:htrace-core)
+BuildRequires: mvn(org.codehaus.mojo:buildnumber-maven-plugin)
+BuildRequires: mvn(org.codehaus.woodstox:stax2-api)
BuildRequires: mvn(org.codehaus.woodstox:woodstox-core-asl)
+BuildRequires: mvn(org.eclipse.jetty:jetty-continuation)
BuildRequires: mvn(org.eclipse.jetty:jetty-deploy)
+BuildRequires: mvn(org.eclipse.jetty:jetty-http)
+BuildRequires: mvn(org.eclipse.jetty:jetty-io)
BuildRequires: mvn(org.eclipse.jetty:jetty-jmx)
+BuildRequires: mvn(org.eclipse.jetty:jetty-rewrite)
+BuildRequires: mvn(org.eclipse.jetty:jetty-security)
BuildRequires: mvn(org.eclipse.jetty:jetty-server)
BuildRequires: mvn(org.eclipse.jetty:jetty-servlet)
BuildRequires: mvn(org.eclipse.jetty:jetty-servlets)
BuildRequires: mvn(org.eclipse.jetty:jetty-util)
BuildRequires: mvn(org.eclipse.jetty:jetty-webapp)
+BuildRequires: mvn(org.eclipse.jetty:jetty-xml)
+BuildRequires: mvn(org.gagravarr:vorbis-java-tika)
+BuildRequires: mvn(org.hamcrest:hamcrest-core)
BuildRequires: mvn(org.noggit:noggit)
BuildRequires: mvn(org.ow2.asm:asm)
BuildRequires: mvn(org.ow2.asm:asm-commons)
@@ -72,13 +152,10 @@ BuildRequires: mvn(org.slf4j:jcl-over-slf4j)
BuildRequires: mvn(org.slf4j:jul-to-slf4j)
BuildRequires: mvn(org.slf4j:slf4j-api)
BuildRequires: mvn(org.slf4j:slf4j-log4j12)
+BuildRequires: mvn(org.tukaani:xz)
+BuildRequires: mvn(rome:rome)
BuildRequires: mvn(xerces:xercesImpl)
-%if %{?fedora} > 20
-BuildRequires: mvn(io.netty:netty:3)
-%else
-BuildRequires: mvn(io.netty:netty)
-%endif
# Optional?
%if %{with carrot2}
@@ -86,12 +163,14 @@ BuildRequires: mvn(io.netty:netty)
# work in progress ... circular deps
BuildRequires: mvn(org.carrot2:carrot2-mini:3.8.0)
%endif
+
%if %{without randomizedtesting}
# {lucene,solr}/test-framework
BuildRequires: mvn(com.carrotsearch.randomizedtesting:junit4-ant)
BuildRequires: mvn(com.carrotsearch.randomizedtesting:randomizedtesting-runner)
BuildRequires: mvn(junit:junit)
%endif
+
%if %{without uima}
# {lucene/analysis,solr/contrib}/uima
BuildRequires: mvn(org.apache.uima:uimaj-core)
@@ -102,10 +181,7 @@ BuildRequires: mvn(org.apache.uima:WhitespaceTokenizer)
BuildRequires: mvn(org.apache.uima:parent-pom:pom:)
BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima)
%endif
-%if %{with webapp}
-# solr/webapp
-BuildRequires: mvn(org.eclipse.jetty.orbit:javax.servlet)
-%endif
+
%if %{with kite}
# solr/contrib/{map-reduce,morphlines-cell,morphlines-core}
BuildRequires: mvn(com.codahale.metrics:metrics-core)
@@ -129,52 +205,57 @@ BuildRequires: mvn(org.kitesdk:kite-morphlines-saxon)
%endif
%endif
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-common)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-icu)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-kuromoji)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-morfologik)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-phonetic)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-smartcn)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-stempel)
-BuildRequires: mvn(org.apache.lucene:lucene-analyzers-uima)
-BuildRequires: mvn(org.apache.lucene:lucene-codecs)
-BuildRequires: mvn(org.apache.lucene:lucene-core)
-BuildRequires: mvn(org.apache.lucene:lucene-expressions)
-BuildRequires: mvn(org.apache.lucene:lucene-grouping)
-BuildRequires: mvn(org.apache.lucene:lucene-highlighter)
-BuildRequires: mvn(org.apache.lucene:lucene-join)
-BuildRequires: mvn(org.apache.lucene:lucene-memory)
-BuildRequires: mvn(org.apache.lucene:lucene-misc)
-BuildRequires: mvn(org.apache.lucene:lucene-queries)
-BuildRequires: mvn(org.apache.lucene:lucene-queryparser)
-BuildRequires: mvn(org.apache.lucene:lucene-replicator)
-BuildRequires: mvn(org.apache.lucene:lucene-spatial)
-BuildRequires: mvn(org.apache.lucene:lucene-suggest)
-
-BuildRequires: mvn(org.apache.lucene:lucene-analysis-modules-aggregator:pom:)
-BuildRequires: mvn(org.apache.lucene:lucene-parent:pom:)
-BuildRequires: mvn(org.apache.lucene:lucene-test-framework)
-
%if 0
# test deps
-BuildRequires: mvn(dom4j:dom4j)
+BuildRequires: mvn(aopalliance:aopalliance)
+BuildRequires: mvn(com.fasterxml.jackson.core:jackson-annotations)
+BuildRequires: mvn(com.fasterxml.jackson.core:jackson-databind)
+BuildRequires: mvn(com.google.inject:guice)
+BuildRequires: mvn(com.google.inject.extensions:guice-servlet)
+BuildRequires: mvn(com.sun.jersey:jersey-bundle:1)
+BuildRequires: mvn(com.sun.jersey:jersey-core:1)
+BuildRequires: mvn(com.sun.jersey:jersey-json:1)
+BuildRequires: mvn(com.sun.jersey:jersey-server:1)
+BuildRequires: mvn(com.sun.jersey.contribs:jersey-guice:1)
+BuildRequires: mvn(com.sun.xml.bind:jaxb-impl)
BuildRequires: mvn(hsqldb:hsqldb:1.8.0.10)
-BuildRequires: mvn(org.apache.ant:ant:1.8.2)
+BuildRequires: mvn(org.apache.avro:avro)
+BuildRequires: mvn(org.apache.curator:curator-client)
+BuildRequires: mvn(org.apache.curator:curator-framework)
BuildRequires: mvn(org.apache.derby:derby:10.9.1.0)
-BuildRequires: mvn(org.apache.hadoop:hadoop-common:tests:2.0.5-alpha)
-BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs:tests:2.0.5-alpha)
+BuildRequires: mvn(org.apache.hadoop:hadoop-common:2.6.0:tests:)
+BuildRequires: mvn(org.apache.hadoop:hadoop-hdfs:2.6.0:tests:)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-app)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-common)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-hs)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-jobclient)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.0:tests:)
+BuildRequires: mvn(org.apache.hadoop:hadoop-mapreduce-client-shuffle)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-api)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-client)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-common)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-applicationhistoryservice)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-common)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-nodemanager)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-resourcemanager)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-tests:2.6.0:tests:)
+BuildRequires: mvn(org.apache.hadoop:hadoop-yarn-server-web-proxy)
+BuildRequires: mvn(org.apache.mrunit:mrunit::hadoop2:)
+BuildRequires: mvn(org.codehaus.jackson:jackson-core-asl)
+BuildRequires: mvn(org.codehaus.jackson:jackson-jaxrs)
+BuildRequires: mvn(org.codehaus.jackson:jackson-mapper-asl)
BuildRequires: mvn(org.easymock:easymock:3.0)
+BuildRequires: mvn(org.fusesource.leveldbjni:leveldbjni)
+BuildRequires: mvn(org.iq80.leveldb:leveldb)
+BuildRequires: mvn(org.iq80.leveldb:leveldb-api)
+BuildRequires: mvn(org.kitesdk:kite-morphlines-core::tests:)
+BuildRequires: mvn(org.mockito:mockito-core)
BuildRequires: mvn(org.mortbay.jetty:jetty:6.1.26)
BuildRequires: mvn(org.mortbay.jetty:jetty-util:6.1.26)
-%endif
-BuildRequires: mvn(org.hamcrest:hamcrest-core)
+BuildRequires: mvn(org.objenesis:objenesis)
+BuildRequires: mvn(org.xerial.snappy:snappy-java)
-BuildRequires: buildnumber-maven-plugin
-BuildRequires: maven-local
-BuildRequires: maven-enforcer-plugin
-BuildRequires: maven-plugin-bundle
-BuildRequires: maven-remote-resources-plugin
-BuildRequires: maven-site-plugin
+%endif
BuildArch: noarch
@@ -198,10 +279,17 @@ find . -name "*.class" -print -delete
find . -name "*.jar" -print -delete
find . -name "*.js" -print -delete
+%patch0 -p1
+rm -rf solr/core/src/java/org/apache/solr/util/hll
+%pom_add_dep net.agkn:hll:1.6.0 solr/core
+
+%if %{?fedora} > 23
+%patch1 -p1
+%endif
+
cp -p dev-tools/maven/solr/pom.xml.template solr/pom.xml
cp -p dev-tools/maven/solr/contrib/pom.xml.template solr/contrib/pom.xml
-cp -p dev-tools/maven/solr/webapp/pom.xml.template solr/webapp/pom.xml
-sed -i "s/@version@/%{version}/g" solr/pom.xml solr/contrib/pom.xml solr/webapp/pom.xml
+sed -i "s/@version@/%{version}/g" solr/pom.xml solr/contrib/pom.xml
# Fix parent pom
sed -i "s|../../../pom.xml|../pom.xml|" $(find solr -name "pom.xml")
@@ -210,7 +298,7 @@ sed -i "s|../../../pom.xml|../pom.xml
sed -i 's|${module-path}|${basedir}/src/java|' \
solr/core/pom.xml solr/solrj/pom.xml
-for p in solr/test-framework/pom.xml solr/contrib/analysis-extras/pom.xml \
+for p in solr/test-framework/pom.xml solr/contrib/analytics/pom.xml solr/contrib/analysis-extras/pom.xml \
solr/contrib/clustering/pom.xml solr/contrib/dataimporthandler/pom.xml solr/contrib/dataimporthandler-extras/pom.xml \
solr/contrib/extraction/pom.xml solr/contrib/langid/pom.xml solr/contrib/uima/pom.xml solr/contrib/velocity/pom.xml \
solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml; do
@@ -220,7 +308,7 @@ done
sed -i 's|${module-path}/src/resources|${basedir}/src/resources|' \
solr/contrib/uima/pom.xml \
- solr/contrib/clustering/pom.xml \
+ solr/contrib/analytics/pom.xml solr/contrib/clustering/pom.xml \
solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml
sed -i 's|${module-path}/src/test-files|${basedir}/src/test-files|' \
@@ -234,16 +322,15 @@ sed -i 's|${module-path}|${basedir}/src/java
sed -i 's|${module-path}|${basedir}/src/test|' \
solr/solrj/pom.xml
-# Remove unavailable plugins
-%pom_remove_plugin org.codehaus.gmaven:gmaven-plugin
-%pom_remove_plugin de.thetaphi:forbiddenapis
-for m in solr solr/core solr/solrj solr/test-framework \
- solr/contrib/extraction solr/contrib/velocity solr/contrib/uima solr/contrib/langid solr/webapp \
- solr/contrib/morphlines-core/pom.xml solr/contrib/morphlines-cell/pom.xml solr/contrib/map-reduce/pom.xml; do
+sed -i 's|now.timestamp|maven.build.timestamp|' pom.xml
+sed -i 's|@spec.version@|${project.version}|' pom.xml
-%pom_remove_plugin de.thetaphi:forbiddenapis ${m}
+# Disable lucene
+%pom_disable_module lucene
-done
+# Remove unavailable plugins
+%pom_remove_plugin org.codehaus.gmaven:gmaven-plugin
+%pom_remove_plugin -r de.thetaphi:forbiddenapis
%pom_xpath_inject "pom:build/pom:pluginManagement/pom:plugins/pom:plugin[pom:artifactId = 'maven-javadoc-plugin' ]" '
@@ -274,39 +361,18 @@ done
%if %{with uima}
%pom_disable_module uima solr/contrib
%endif
-%if %{without webapp}
-# Require JQuery.js and other js libraries https://bugzilla.redhat.com/show_bug.cgi?id=857992
-%pom_disable_module webapp solr
-%endif
+
%if %{without kite}
%pom_disable_module map-reduce solr/contrib
%pom_disable_module morphlines-cell solr/contrib
%pom_disable_module morphlines-core solr/contrib
%endif
-# Fix aId
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/core
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/solrj
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/test-framework
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/analysis-extras
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/langid
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/dataimporthandler
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/dataimporthandler-extras
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/extraction
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/uima
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/velocity
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/morphlines-core
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/morphlines-cell
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:groupId = 'org.codehaus.woodstox']/pom:artifactId" woodstox-core-asl solr/contrib/map-reduce
-
# Use system jvm apis
-#%%pom_remove_dep javax.activation:activation solr/contrib/dataimporthandler
%pom_remove_dep javax.activation:activation solr/contrib/dataimporthandler-extras
+
# Remove fake BR
-%pom_remove_dep com.googlecode.mp4parser:isoparser
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/extraction
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/langid
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/dataimporthandler-extras
+%pom_remove_dep -r com.googlecode.mp4parser:isoparser
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/dataimporthandler-extras
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/extraction
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/langid
@@ -321,76 +387,69 @@ done
%pom_remove_dep org.bouncycastle:bcprov-jdk15 solr/contrib/langid
%pom_remove_dep org.bouncycastle: solr/contrib/morphlines-core
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/morphlines-core
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/morphlines-core
%pom_remove_dep org.bouncycastle: solr/contrib/morphlines-cell
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/morphlines-cell
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/morphlines-cell
%pom_remove_dep org.bouncycastle: solr/contrib/map-reduce
%pom_remove_dep com.drewnoakes:metadata-extractor solr/contrib/map-reduce
-%pom_remove_dep com.googlecode.mp4parser:isoparser solr/contrib/map-reduce
%pom_remove_dep net.sf.saxon:Saxon-HE solr/contrib/map-reduce
%pom_remove_dep org.kitesdk:kite-morphlines-saxon solr/contrib/map-reduce
%pom_remove_dep org.apache.tika:tika-xmp solr/contrib/map-reduce
%pom_remove_dep org.apache.tika:tika-xmp solr/contrib/morphlines-cell
%pom_remove_dep org.apache.tika:tika-xmp solr/contrib/morphlines-core
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/dataimporthandler-extras
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/extraction
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/langid
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/map-reduce
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/morphlines-cell
+%pom_remove_dep org.apache.tika:tika-java7 solr/contrib/morphlines-core
-%if %{?fedora} > 20
-%pom_xpath_set "pom:dependencyManagement/pom:dependencies/pom:dependency[pom:groupId = 'io.netty']/pom:version" 3
-%endif
-
-# Disable lucene
-%pom_disable_module lucene
-
-# Add jetty9 support
-%patch0 -p0
-%pom_xpath_set "pom:properties/pom:jetty.version" 9.0.5.v20130815
-%pom_add_dep org.eclipse.jetty:jetty-servlets solr/core
-%pom_remove_dep org.eclipse.jetty.orbit:javax.servlet solr/core
-%pom_add_dep javax.servlet:javax.servlet-api solr/core
-
+%pom_xpath_set "pom:dependency[pom:groupId = 'io.netty']/pom:version" 3
# fix log4j version
-sed -i "s|1.2.16|1.2.17|" pom.xml
-for p in solr/core \
- solr/test-framework \
- solr/contrib/analysis-extras \
- solr/contrib/dataimporthandler \
- solr/contrib/dataimporthandler-extras \
- solr/contrib/extraction \
- solr/contrib/langid \
- solr/contrib/uima \
- solr/contrib/velocity \
- solr/contrib/morphlines-core \
- solr/contrib/morphlines-cell \
- solr/contrib/map-reduce;do
-%pom_xpath_inject "pom:dependencies/pom:dependency[pom:artifactId = 'log4j']" "1.2.17" ${p}
+for p in core \
+ test-framework \
+ contrib/analysis-extras \
+ contrib/analytics \
+ contrib/clustering \
+ contrib/dataimporthandler \
+ contrib/dataimporthandler-extras \
+ contrib/extraction \
+ contrib/langid \
+ contrib/uima \
+ contrib/velocity \
+ contrib/morphlines-core \
+ contrib/morphlines-cell \
+ contrib/map-reduce;do
+%pom_xpath_inject "pom:dependency[pom:artifactId = 'log4j']" "1.2.17" solr/${p}
done
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/dataimporthandler-extras
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/extraction
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/langid
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/map-reduce
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/morphlines-cell
-%pom_xpath_set "pom:dependencies/pom:dependency[pom:artifactId = 'jhighlight']/pom:groupId" org.codelibs solr/contrib/morphlines-core
+# Use htrace >= 3.0.4
+%pom_xpath_set -r "pom:dependency[pom:artifactId = 'htrace-core']/pom:groupId" org.cloudera.htrace
+# Use hadoop >= 2.6.0
+rm -r solr/core/src/java/org/apache/solr/security/KerberosFilter.java \
+ solr/core/src/java/org/apache/solr/security/KerberosPlugin.java
+sed -i "s|conf.addResource(TEST_CONF);||" \
+ solr/core/src/java/org/apache/solr/util/HdfsUtil.java
%build
-# Test skipped for unavailable test deps {lucene,solr}/test-framework
+# Test skipped for unavailable test deps
%mvn_build -f
%install
%mvn_install
%files -f .mfiles
-%dir %{_javadir}/%{name}
-%doc solr/CHANGES.txt README.txt
+%doc solr/CHANGES.txt solr/README.txt
%license LICENSE.txt NOTICE.txt
%files javadoc -f .mfiles-javadoc
%license LICENSE.txt NOTICE.txt
%changelog
+* Wed Sep 16 2015 gil cattaneo 5.3.0-1
+- update to 5.3.0 (rhbz#1240013,1235424)
+
* Fri Jun 19 2015 Fedora Release Engineering - 4.10.4-2
- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild
diff --git a/sources b/sources
index bae9599..8e88dee 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-4376764b3f78579766ee7518b7f21e18 solr-4.10.4-clean.tar.xz
+436cc205b1e58a4b6afb236042a355a5 solr-5.3.0-clean.tar.xz