diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-07-17 14:10:40.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsField.java 2015-09-14 18:11:56.330769125 +0200
@@ -55,8 +55,8 @@
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
-import org.apache.solr.util.hll.HLL;
-import org.apache.solr.util.hll.HLLType;
+import net.agkn.hll.HLL;
+import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-07-17 14:10:40.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java 2015-09-14 18:12:12.835927178 +0200
@@ -33,12 +33,12 @@
import org.apache.solr.schema.*;
import com.tdunning.math.stats.AVLTreeDigest;
+
+import net.agkn.hll.HLL;
+import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
-import org.apache.solr.util.hll.HLL;
-import org.apache.solr.util.hll.HLLType;
-
/**
* Factory class for creating instance of
* {@link org.apache.solr.handler.component.StatsValues}
@@ -824,7 +824,7 @@
@Override
public long hash(String v) {
// NOTE: renamed hashUnencodedChars starting with guava 15
- return hasher.hashString(v).asLong();
+ return hasher.hashUnencodedChars(v).asLong();
}
@Override
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-07-17 14:10:40.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java 2015-09-15 07:26:56.977497188 +0200
@@ -23,8 +23,8 @@
import java.util.List;
import java.util.Set;
-import org.apache.solr.util.hll.HLL;
-import org.apache.solr.util.hll.HLLType;
+import net.agkn.hll.HLL;
+import net.agkn.hll.HLLType;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-07-17 14:10:40.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java 2015-09-16 09:31:52.773252909 +0200
@@ -21,7 +21,7 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.solr.util.hll.HLL;
+import net.agkn.hll.HLL;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,173 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A corresponding deserializer for {@link BigEndianAscendingWordSerializer}.
- */
-class BigEndianAscendingWordDeserializer implements IWordDeserializer {
- // The number of bits per byte.
- private static final int BITS_PER_BYTE = 8;
-
- // long mask for the maximum value stored in a byte
- private static final long BYTE_MASK = (1L << BITS_PER_BYTE) - 1L;
-
- // ************************************************************************
- // The length in bits of the words to be read.
- private final int wordLength;
-
- // The byte array to which the words are serialized.
- private final byte[] bytes;
-
- // The number of leading padding bytes in 'bytes' to be ignored.
- private final int bytePadding;
-
- // The number of words that the byte array contains.
- private final int wordCount;
-
- // The current read state.
- private int currentWordIndex;
-
- // ========================================================================
- /**
- * @param wordLength the length in bits of the words to be deserialized. Must
- * be less than or equal to 64 and greater than or equal to 1.
- * @param bytePadding the number of leading bytes that pad the serialized words.
- * Must be greater than or equal to zero.
- * @param bytes the byte array containing the serialized words. Cannot be
- * <code>null</code>.
- */
- public BigEndianAscendingWordDeserializer(final int wordLength, final int bytePadding, final byte[] bytes) {
- if((wordLength < 1) || (wordLength > 64)) {
- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
- }
-
- if(bytePadding < 0) {
- throw new IllegalArgumentException("Byte padding must be >= zero. (was: " + bytePadding + ")");
- }
-
- this.wordLength = wordLength;
- this.bytes = bytes;
- this.bytePadding = bytePadding;
-
- final int dataBytes = (bytes.length - bytePadding);
- final long dataBits = (dataBytes * BITS_PER_BYTE);
-
- this.wordCount = (int)(dataBits/wordLength);
-
- currentWordIndex = 0;
- }
-
- // ========================================================================
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IWordDeserializer#readWord()
- */
- @Override
- public long readWord() {
- final long word = readWord(currentWordIndex);
- currentWordIndex++;
-
- return word;
- }
-
- // ------------------------------------------------------------------------
- /**
- * Reads the word at the specified sequence position (zero-indexed).
- *
- * @param position the zero-indexed position of the word to be read. This
- * must be greater than or equal to zero.
- * @return the value of the serialized word at the specified position.
- */
- private long readWord(final int position) {
- if(position < 0) {
- throw new ArrayIndexOutOfBoundsException(position);
- }
-
- // First bit of the word
- final long firstBitIndex = (position * wordLength);
- final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE));
- final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE);
-
- // Last bit of the word
- final long lastBitIndex = (firstBitIndex + wordLength - 1);
- final int lastByteIndex = (bytePadding + (int)(lastBitIndex / BITS_PER_BYTE));
- final int lastByteBitsToConsume;
-
- final int bitsAfterByteBoundary = (int)((lastBitIndex + 1) % BITS_PER_BYTE);
- // If the word terminates at the end of the last byte, consume the whole
- // last byte.
- if(bitsAfterByteBoundary == 0) {
- lastByteBitsToConsume = BITS_PER_BYTE;
- } else {
- // Otherwise, only consume what is necessary.
- lastByteBitsToConsume = bitsAfterByteBoundary;
- }
-
- if(lastByteIndex >= bytes.length) {
- throw new ArrayIndexOutOfBoundsException("Word out of bounds of backing array.");
- }
-
- // Accumulator
- long value = 0;
-
- // --------------------------------------------------------------------
- // First byte
- final int bitsRemainingInFirstByte = (BITS_PER_BYTE - firstByteSkipBits);
- final int bitsToConsumeInFirstByte = Math.min(bitsRemainingInFirstByte, wordLength);
- long firstByte = (long)bytes[firstByteIndex];
-
- // Mask off the bits to skip in the first byte.
- final long firstByteMask = ((1L << bitsRemainingInFirstByte) - 1L);
- firstByte &= firstByteMask;
- // Right-align relevant bits of first byte.
- firstByte >>>= (bitsRemainingInFirstByte - bitsToConsumeInFirstByte);
-
- value |= firstByte;
-
- // If the first byte contains the whole word, short-circuit.
- if(firstByteIndex == lastByteIndex) {
- return value;
- }
-
- // --------------------------------------------------------------------
- // Middle bytes
- final int middleByteCount = (lastByteIndex - firstByteIndex - 1);
- for(int i=0; i<middleByteCount; i++) {
- final long middleByte = (bytes[firstByteIndex + i + 1] & BYTE_MASK);
- // Push middle byte onto accumulator.
- value <<= BITS_PER_BYTE;
- value |= middleByte;
- }
-
- // --------------------------------------------------------------------
- // Last byte
- long lastByte = (bytes[lastByteIndex] & BYTE_MASK);
- lastByte >>= (BITS_PER_BYTE - lastByteBitsToConsume);
- value <<= lastByteBitsToConsume;
- value |= lastByte;
- return value;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IWordDeserializer#totalWordCount()
- */
- @Override
- public int totalWordCount() {
- return wordCount;
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,174 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A serializer that writes a sequence of fixed bit-width 'words' to a byte array.
- * Bitwise OR is used to write words into bytes, so a low bit in a word is also
- * a low bit in a byte. However, a high byte in a word is written at a lower index
- * in the array than a low byte in a word. The first word is written at the lowest
- * array index. Each serializer is one time use and returns its backing byte
- * array.<p/>
- *
- * This encoding was chosen so that when reading bytes as octets in the typical
- * first-octet-is-the-high-nibble fashion, an octet-to-binary conversion
- * would yield a high-to-low, left-to-right view of the "short words".<p/>
- *
- * Example:<p/>
- *
- * Say short words are 5 bits wide. Our word sequence is the values
- * <code>[31, 1, 5]</code>. In big-endian binary format, the values are
- * <code>[0b11111, 0b00001, 0b00101]</code>. We use 15 of 16 bits in two bytes
- * and pad the last (lowest) bit of the last byte with a zero:
- *
- * <code>
- * [0b11111000, 0b01001010] = [0xF8, 0x4A]
- * </code>.
- */
-class BigEndianAscendingWordSerializer implements IWordSerializer {
- // The number of bits per byte.
- private static final int BITS_PER_BYTE = 8;
-
- // ************************************************************************
- // The length in bits of the words to be written.
- private final int wordLength;
- // The number of words to be written.
- private final int wordCount;
-
- // The byte array to which the words are serialized.
- private final byte[] bytes;
-
- // ------------------------------------------------------------------------
- // Write state
- // Number of bits that remain writable in the current byte.
- private int bitsLeftInByte;
- // Index of byte currently being written to.
- private int byteIndex;
- // Number of words written.
- private int wordsWritten;
-
- // ========================================================================
- /**
- * @param wordLength the length in bits of the words to be serialized. Must
- * be greater than or equal to 1 and less than or equal to 64.
- * @param wordCount the number of words to be serialized. Must be greater than
- * or equal to zero.
- * @param bytePadding the number of leading bytes that should pad the
- * serialized words. Must be greater than or equal to zero.
- */
- public BigEndianAscendingWordSerializer(final int wordLength, final int wordCount, final int bytePadding) {
- if((wordLength < 1) || (wordLength > 64)) {
- throw new IllegalArgumentException("Word length must be >= 1 and <= 64. (was: " + wordLength + ")");
- }
- if(wordCount < 0) {
- throw new IllegalArgumentException("Word count must be >= 0. (was: " + wordCount + ")");
- }
- if(bytePadding < 0) {
- throw new IllegalArgumentException("Byte padding must be must be >= 0. (was: " + bytePadding + ")");
- }
-
- this.wordLength = wordLength;
- this.wordCount = wordCount;
-
- final long bitsRequired = (wordLength * wordCount);
- final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0);
- final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding;
- bytes = new byte[bytesRequired];
-
- bitsLeftInByte = BITS_PER_BYTE;
- byteIndex = bytePadding;
- wordsWritten = 0;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IWordSerializer#writeWord(long)
- * @throws RuntimeException if the number of words written is greater than the
- * <code>wordCount</code> parameter in the constructor.
- */
- @Override
- public void writeWord(final long word) {
- if(wordsWritten == wordCount) {
- throw new RuntimeException("Cannot write more words, backing array full!");
- }
-
- int bitsLeftInWord = wordLength;
-
- while(bitsLeftInWord > 0) {
- // Move to the next byte if the current one is fully packed.
- if(bitsLeftInByte == 0) {
- byteIndex++;
- bitsLeftInByte = BITS_PER_BYTE;
- }
-
- final long consumedMask;
- if(bitsLeftInWord == 64) {
- consumedMask = ~0L;
- } else {
- consumedMask = ((1L << bitsLeftInWord) - 1L);
- }
-
- // Fix how many bits will be written in this cycle. Choose the
- // smaller of the remaining bits in the word or byte.
- final int numberOfBitsToWrite = Math.min(bitsLeftInByte, bitsLeftInWord);
- final int bitsInByteRemainingAfterWrite = (bitsLeftInByte - numberOfBitsToWrite);
-
- // In general, we write the highest bits of the word first, so we
- // strip the highest bits that were consumed in previous cycles.
- final long remainingBitsOfWordToWrite = (word & consumedMask);
-
- final long bitsThatTheByteCanAccept;
- // If there is more left in the word than can be written to this
- // byte, shift off the bits that can't be written off the bottom.
- if(bitsLeftInWord > numberOfBitsToWrite) {
- bitsThatTheByteCanAccept = (remainingBitsOfWordToWrite >>> (bitsLeftInWord - bitsLeftInByte));
- } else {
- // If the byte can accept all remaining bits, there is no need
- // to shift off the bits that won't be written in this cycle.
- bitsThatTheByteCanAccept = remainingBitsOfWordToWrite;
- }
-
- // Align the word bits to write up against the byte bits that have
- // already been written. This shift may do nothing if the remainder
- // of the byte is being consumed in this cycle.
- final long alignedBits = (bitsThatTheByteCanAccept << bitsInByteRemainingAfterWrite);
-
- // Update the byte with the alignedBits.
- bytes[byteIndex] |= (byte)alignedBits;
-
- // Update state with bit count written.
- bitsLeftInWord -= numberOfBitsToWrite;
- bitsLeftInByte = bitsInByteRemainingAfterWrite;
- }
-
- wordsWritten ++;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IWordSerializer#getBytes()
- * @throws RuntimeException if the number of words written is fewer than the
- * <code>wordCount</code> parameter in the constructor.
- */
- @Override
- public byte[] getBytes() {
- if(wordsWritten < wordCount) {
- throw new RuntimeException("Not all words have been written! (" + wordsWritten + "/" + wordCount + ")");
- }
-
- return bytes;
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitUtil.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,71 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A collection of bit utilities.
- */
-class BitUtil {
- /**
- * The set of least-significant bits for a given <code>byte</code>. <code>-1</code>
- * is used if no bits are set (so as to not be confused with "index of zero"
- * meaning that the least significant bit is the 0th (1st) bit).
- *
- * @see #leastSignificantBit(long)
- */
- private static final int[] LEAST_SIGNIFICANT_BIT = {
- -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
- };
-
- /**
- * Computes the least-significant bit of the specified <code>long</code>
- * that is set to <code>1</code>. Zero-indexed.
- *
- * @param value the <code>long</code> whose least-significant bit is desired.
- * @return the least-significant bit of the specified <code>long</code>.
- * <code>-1</code> is returned if there are no bits set.
- */
- // REF: http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
- // REF: http://www-graphics.stanford.edu/~seander/bithacks.html
- public static int leastSignificantBit(final long value) {
- if(value == 0L) return -1/*by contract*/;
- if((value & 0xFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 0) & 0xFF)] + 0;
- if((value & 0xFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 8) & 0xFF)] + 8;
- if((value & 0xFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 16) & 0xFF)] + 16;
- if((value & 0xFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 24) & 0xFF)] + 24;
- if((value & 0xFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 32) & 0xFF)] + 32;
- if((value & 0xFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 40) & 0xFF)] + 40;
- if((value & 0xFFFFFFFFFFFFFFL) != 0) return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 48) & 0xFF)] + 48;
- return LEAST_SIGNIFICANT_BIT[(int)( (value >>> 56) & 0xFFL)] + 56;
- }
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/BitVector.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,259 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A vector (array) of bits that is accessed in units ("registers") of <code>width</code>
- * bits which are stored as 64bit "words" (<code>long</code>s). In this context
- * a register is at most 64bits.
- */
-class BitVector implements Cloneable {
- // NOTE: in this context, a word is 64bits
-
- // rather than doing division to determine how a bit index fits into 64bit
- // words (i.e. longs), bit shifting is used
- private static final int LOG2_BITS_PER_WORD = 6/*=>64bits*/;
- private static final int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD;
- private static final int BITS_PER_WORD_MASK = BITS_PER_WORD - 1;
-
- // ditto from above but for bytes (for output)
- private static final int LOG2_BITS_PER_BYTE = 3/*=>8bits*/;
- public static final int BITS_PER_BYTE = 1 << LOG2_BITS_PER_BYTE;
-
- // ========================================================================
- public static final int BYTES_PER_WORD = 8/*8 bytes in a long*/;
-
- // ************************************************************************
- // 64bit words
- private final long[] words;
- public final long[] words() { return words; }
- public final int wordCount() { return words.length; }
- public final int byteCount() { return wordCount() * BYTES_PER_WORD; }
-
- // the width of a register in bits (this cannot be more than 64 (the word size))
- private final int registerWidth;
- public final int registerWidth() { return registerWidth; }
-
- private final long count;
-
- // ------------------------------------------------------------------------
- private final long registerMask;
-
- // ========================================================================
- /**
- * @param width the width of each register. This cannot be negative or
- * zero or greater than 63 (the signed word size).
- * @param count the number of registers. This cannot be negative or zero
- */
- public BitVector(final int width, final long count) {
- // ceil((width * count)/BITS_PER_WORD)
- this.words = new long[(int)(((width * count) + BITS_PER_WORD_MASK) >>> LOG2_BITS_PER_WORD)];
- this.registerWidth = width;
- this.count = count;
-
- this.registerMask = (1L << width) - 1;
- }
-
- // ========================================================================
- /**
- * @param registerIndex the index of the register whose value is to be
- * retrieved. This cannot be negative.
- * @return the value at the specified register index
- * @see #setRegister(long, long)
- * @see #setMaxRegister(long, long)
- */
- // NOTE: if this changes then setMaxRegister() must change
- public long getRegister(final long registerIndex) {
- final long bitIndex = registerIndex * registerWidth;
- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
-
- if(firstWordIndex == secondWordIndex)
- return ((words[firstWordIndex] >>> bitRemainder) & registerMask);
- /* else -- register spans words */
- return (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
- }
-
- /**
- * @param registerIndex the index of the register whose value is to be set.
- * This cannot be negative
- * @param value the value to set in the register
- * @see #getRegister(long)
- * @see #setMaxRegister(long, long)
- */
- // NOTE: if this changes then setMaxRegister() must change
- public void setRegister(final long registerIndex, final long value) {
- final long bitIndex = registerIndex * registerWidth;
- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
-
- final long words[] = this.words/*for convenience/performance*/;
- if(firstWordIndex == secondWordIndex) {
- // clear then set
- words[firstWordIndex] &= ~(registerMask << bitRemainder);
- words[firstWordIndex] |= (value << bitRemainder);
- } else {/*register spans words*/
- // clear then set each partial word
- words[firstWordIndex] &= (1L << bitRemainder) - 1;
- words[firstWordIndex] |= (value << bitRemainder);
-
- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
- }
- }
-
- // ------------------------------------------------------------------------
- /**
- * @return a <code>LongIterator</code> for iterating starting at the register
- * with index zero. This will never be <code>null</code>.
- */
- public LongIterator registerIterator() {
- return new LongIterator() {
- final int registerWidth = BitVector.this.registerWidth;
- final long[] words = BitVector.this.words;
- final long registerMask = BitVector.this.registerMask;
-
- // register setup
- long registerIndex = 0;
- int wordIndex = 0;
- int remainingWordBits = BITS_PER_WORD;
- long word = words[wordIndex];
-
- @Override public long next() {
- long register;
- if(remainingWordBits >= registerWidth) {
- register = word & registerMask;
-
- // shift to the next register
- word >>>= registerWidth;
- remainingWordBits -= registerWidth;
- } else { /*insufficient bits remaining in current word*/
- wordIndex++/*move to the next word*/;
-
- register = (word | (words[wordIndex] << remainingWordBits)) & registerMask;
-
- // shift to the next partial register (word)
- word = words[wordIndex] >>> (registerWidth - remainingWordBits);
- remainingWordBits += BITS_PER_WORD - registerWidth;
- }
- registerIndex++;
- return register;
- }
-
- @Override public boolean hasNext() {
- return registerIndex < count;
- }
- };
- }
-
- // ------------------------------------------------------------------------
- // composite accessors
- /**
- * Sets the value of the specified index register if and only if the specified
- * value is greater than the current value in the register. This is equivalent
- * to but much more performant than:<p/>
- *
- * <pre>vector.setRegister(index, Math.max(vector.getRegister(index), value));</pre>
- *
- * @param registerIndex the index of the register whose value is to be set.
- * This cannot be negative
- * @param value the value to set in the register if and only if this value
- * is greater than the current value in the register
- * @return <code>true</code> if and only if the specified value is greater
- * than or equal to the current register value. <code>false</code>
- * otherwise.
- * @see #getRegister(long)
- * @see #setRegister(long, long)
- * @see java.lang.Math#max(long, long)
- */
- // NOTE: if this changes then setRegister() must change
- public boolean setMaxRegister(final long registerIndex, final long value) {
- final long bitIndex = registerIndex * registerWidth;
- final int firstWordIndex = (int)(bitIndex >>> LOG2_BITS_PER_WORD)/*aka (bitIndex / BITS_PER_WORD)*/;
- final int secondWordIndex = (int)((bitIndex + registerWidth - 1) >>> LOG2_BITS_PER_WORD)/*see above*/;
- final int bitRemainder = (int)(bitIndex & BITS_PER_WORD_MASK)/*aka (bitIndex % BITS_PER_WORD)*/;
-
- // NOTE: matches getRegister()
- final long registerValue;
- final long words[] = this.words/*for convenience/performance*/;
- if(firstWordIndex == secondWordIndex)
- registerValue = ((words[firstWordIndex] >>> bitRemainder) & registerMask);
- else /*register spans words*/
- registerValue = (words[firstWordIndex] >>> bitRemainder)/*no need to mask since at top of word*/
- | (words[secondWordIndex] << (BITS_PER_WORD - bitRemainder)) & registerMask;
-
- // determine which is the larger and update as necessary
- if(value > registerValue) {
- // NOTE: matches setRegister()
- if(firstWordIndex == secondWordIndex) {
- // clear then set
- words[firstWordIndex] &= ~(registerMask << bitRemainder);
- words[firstWordIndex] |= (value << bitRemainder);
- } else {/*register spans words*/
- // clear then set each partial word
- words[firstWordIndex] &= (1L << bitRemainder) - 1;
- words[firstWordIndex] |= (value << bitRemainder);
-
- words[secondWordIndex] &= ~(registerMask >>> (BITS_PER_WORD - bitRemainder));
- words[secondWordIndex] |= (value >>> (BITS_PER_WORD - bitRemainder));
- }
- } /* else -- the register value is greater (or equal) so nothing needs to be done */
-
- return (value >= registerValue);
- }
-
- // ========================================================================
- /**
- * Fills this bit vector with the specified bit value. This can be used to
- * clear the vector by specifying <code>0</code>.
- *
- * @param value the value to set all bits to (only the lowest bit is used)
- */
- public void fill(final long value) {
- for(long i=0; i<count; i++) {
- setRegister(i, value);
- }
- }
-
- // ------------------------------------------------------------------------
- /**
- * Serializes the registers of the vector using the specified serializer.
- *
- * @param serializer the serializer to use. This cannot be <code>null</code>.
- */
- public void getRegisterContents(final IWordSerializer serializer) {
- for(final LongIterator iter = registerIterator(); iter.hasNext();) {
- serializer.writeWord(iter.next());
- }
- }
-
- /**
- * Creates a deep copy of this vector.
- *
- * @see java.lang.Object#clone()
- */
- @Override
- public BitVector clone() {
- final BitVector copy = new BitVector(registerWidth, count);
- System.arraycopy(words, 0, copy.words, 0, words.length);
- return copy;
- }
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLL.java 2015-07-16 13:14:59.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLL.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,1071 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Arrays;
-
-import com.carrotsearch.hppc.IntByteOpenHashMap;
-import com.carrotsearch.hppc.LongOpenHashSet;
-import com.carrotsearch.hppc.cursors.IntByteCursor;
-import com.carrotsearch.hppc.cursors.LongCursor;
-
-/**
- * A probabilistic set of hashed <code>long</code> elements. Useful for computing
- * the approximate cardinality of a stream of data in very small storage.
- *
- * A modified version of the <a href="http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf">
- * 'HyperLogLog' data structure and algorithm</a> is used, which combines both
- * probabilistic and non-probabilistic techniques to improve the accuracy and
- * storage requirements of the original algorithm.
- *
- * More specifically, initializing and storing a new {@link HLL} will
- * allocate a sentinel value symbolizing the empty set ({@link HLLType#EMPTY}).
- * After adding the first few values, a sorted list of unique integers is
- * stored in a {@link HLLType#EXPLICIT} hash set. When configured, accuracy can
- * be sacrificed for memory footprint: the values in the sorted list are
- * "promoted" to a "{@link HLLType#SPARSE}" map-based HyperLogLog structure.
- * Finally, when enough registers are set, the map-based HLL will be converted
- * to a bit-packed "{@link HLLType#FULL}" HyperLogLog structure.
- *
- * This data structure is interoperable with the implementations found at:
- * <ul>
- * <li><a href="https://github.com/aggregateknowledge/postgresql-hll">postgresql-hll</a>, and</li>
- * <li><a href="https://github.com/aggregateknowledge/js-hll">js-hll</a></li>
- * </ul>
- * when <a href="https://github.com/aggregateknowledge/postgresql-hll/blob/master/STORAGE.markdown">properly serialized</a>.
- */
-public class HLL implements Cloneable {
- // minimum and maximum values for the log-base-2 of the number of registers
- // in the HLL
- public static final int MINIMUM_LOG2M_PARAM = 4;
- public static final int MAXIMUM_LOG2M_PARAM = 30;
-
- // minimum and maximum values for the register width of the HLL
- public static final int MINIMUM_REGWIDTH_PARAM = 1;
- public static final int MAXIMUM_REGWIDTH_PARAM = 8;
-
- // minimum and maximum values for the 'expthresh' parameter of the
- // constructor that is meant to match the PostgreSQL implementation's
- // constructor and parameter names
- public static final int MINIMUM_EXPTHRESH_PARAM = -1;
- public static final int MAXIMUM_EXPTHRESH_PARAM = 18;
- public static final int MAXIMUM_EXPLICIT_THRESHOLD = (1 << (MAXIMUM_EXPTHRESH_PARAM - 1)/*per storage spec*/);
-
- // ************************************************************************
- // Storage
- // storage used when #type is EXPLICIT, null otherwise
- LongOpenHashSet explicitStorage;
- // storage used when #type is SPARSE, null otherwise
- IntByteOpenHashMap sparseProbabilisticStorage;
- // storage used when #type is FULL, null otherwise
- BitVector probabilisticStorage;
-
- // current type of this HLL instance, if this changes then so should the
- // storage used (see above)
- private HLLType type;
-
- // ------------------------------------------------------------------------
- // Characteristic parameters
- // NOTE: These members are named to match the PostgreSQL implementation's
- // parameters.
- // log2(the number of probabilistic HLL registers)
- private final int log2m;
- // the size (width) each register in bits
- private final int regwidth;
-
- // ------------------------------------------------------------------------
- // Computed constants
- // ........................................................................
- // EXPLICIT-specific constants
- // flag indicating if the EXPLICIT representation should NOT be used
- private final boolean explicitOff;
- // flag indicating that the promotion threshold from EXPLICIT should be
- // computed automatically
- // NOTE: this only has meaning when 'explicitOff' is false
- private final boolean explicitAuto;
- // threshold (in element count) at which a EXPLICIT HLL is converted to a
- // SPARSE or FULL HLL, always greater than or equal to zero and always a
- // power of two OR simply zero
- // NOTE: this only has meaning when 'explicitOff' is false
- private final int explicitThreshold;
-
- // ........................................................................
- // SPARSE-specific constants
- // the computed width of the short words
- private final int shortWordLength;
- // flag indicating if the SPARSE representation should not be used
- private final boolean sparseOff;
- // threshold (in register count) at which a SPARSE HLL is converted to a
- // FULL HLL, always greater than zero
- private final int sparseThreshold;
-
- // ........................................................................
- // Probabilistic algorithm constants
- // the number of registers, will always be a power of 2
- private final int m;
- // a mask of the log2m bits set to one and the rest to zero
- private final int mBitsMask;
- // a mask as wide as a register (see #fromBytes())
- private final int valueMask;
- // mask used to ensure that p(w) does not overflow register (see #Constructor() and #addRaw())
- private final long pwMaxMask;
- // alpha * m^2 (the constant in the "'raw' HyperLogLog estimator")
- private final double alphaMSquared;
- // the cutoff value of the estimator for using the "small" range cardinality
- // correction formula
- private final double smallEstimatorCutoff;
- // the cutoff value of the estimator for using the "large" range cardinality
- // correction formula
- private final double largeEstimatorCutoff;
-
- // ========================================================================
- /**
- * NOTE: Arguments here are named and structured identically to those in the
- * PostgreSQL implementation, which can be found
- * <a href="https://github.com/aggregateknowledge/postgresql-hll/blob/master/README.markdown#explanation-of-parameters-and-tuning">here</a>.
- *
- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
- * algorithm. Must be at least 4 and at most 30.
- * @param regwidth number of bits used per register in the HyperLogLog
- * algorithm. Must be at least 1 and at most 8.
- * @param expthresh tunes when the {@link HLLType#EXPLICIT} to
- * {@link HLLType#SPARSE} promotion occurs,
- * based on the set's cardinality. Must be at least -1 and at most 18.
- * @param sparseon Flag indicating if the {@link HLLType#SPARSE}
- * representation should be used.
- * @param type the type in the promotion hierarchy which this instance should
- * start at. This cannot be <code>null</code>.
- */
- public HLL(final int log2m, final int regwidth, final int expthresh, final boolean sparseon, final HLLType type) {
- this.log2m = log2m;
- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
- }
-
- this.regwidth = regwidth;
- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
- }
-
- this.m = (1 << log2m);
- this.mBitsMask = m - 1;
- this.valueMask = (1 << regwidth) - 1;
- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
- this.alphaMSquared = HLLUtil.alphaMSquared(m);
- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
-
- if(expthresh == -1) {
- this.explicitAuto = true;
- this.explicitOff = false;
-
- // NOTE: This math matches the size calculation in the PostgreSQL impl.
- final long fullRepresentationSize = (this.regwidth * (long)this.m + 7/*round up to next whole byte*/)/Byte.SIZE;
- final int numLongs = (int)(fullRepresentationSize / 8/*integer division to round down*/);
-
- if(numLongs > MAXIMUM_EXPLICIT_THRESHOLD) {
- this.explicitThreshold = MAXIMUM_EXPLICIT_THRESHOLD;
- } else {
- this.explicitThreshold = numLongs;
- }
- } else if(expthresh == 0) {
- this.explicitAuto = false;
- this.explicitOff = true;
- this.explicitThreshold = 0;
- } else if((expthresh > 0) && (expthresh <= MAXIMUM_EXPTHRESH_PARAM)){
- this.explicitAuto = false;
- this.explicitOff = false;
- this.explicitThreshold = (1 << (expthresh - 1));
- } else {
- throw new IllegalArgumentException("'expthresh' must be at least " + MINIMUM_EXPTHRESH_PARAM + " and at most " + MAXIMUM_EXPTHRESH_PARAM + " (was: " + expthresh + ")");
- }
-
- this.shortWordLength = (regwidth + log2m);
- this.sparseOff = !sparseon;
- if(this.sparseOff) {
- this.sparseThreshold = 0;
- } else {
- // TODO improve this cutoff to include the cost overhead of Java
- // members/objects
- final int largestPow2LessThanCutoff =
- (int)NumberUtil.log2((this.m * this.regwidth) / this.shortWordLength);
- this.sparseThreshold = (1 << largestPow2LessThanCutoff);
- }
-
- initializeStorage(type);
- }
-
- /**
- * Construct an empty HLL with the given {@code log2m} and {@code regwidth}.
- *
- * This is equivalent to calling <code>HLL(log2m, regwidth, -1, true, HLLType.EMPTY)</code>.
- *
- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
- * algorithm. Must be at least 4 and at most 30.
- * @param regwidth number of bits used per register in the HyperLogLog
- * algorithm. Must be at least 1 and at most 8.
- *
- * @see #HLL(int, int, int, boolean, HLLType)
- */
- public HLL(final int log2m, final int regwidth) {
- this(log2m, regwidth, -1, true, HLLType.EMPTY);
- }
-
- // -------------------------------------------------------------------------
- /**
- * Convenience constructor for testing. Assumes that both {@link HLLType#EXPLICIT}
- * and {@link HLLType#SPARSE} representations should be enabled.
- *
- * @param log2m log-base-2 of the number of registers used in the HyperLogLog
- * algorithm. Must be at least 4 and at most 30.
- * @param regwidth number of bits used per register in the HyperLogLog
- * algorithm. Must be at least 1 and at most 8.
- * @param explicitThreshold cardinality threshold at which the {@link HLLType#EXPLICIT}
- * representation should be promoted to {@link HLLType#SPARSE}.
- * This must be greater than zero and less than or equal to {@value #MAXIMUM_EXPLICIT_THRESHOLD}.
- * @param sparseThreshold register count threshold at which the {@link HLLType#SPARSE}
- * representation should be promoted to {@link HLLType#FULL}.
- * This must be greater than zero.
- * @param type the type in the promotion hierarchy which this instance should
- * start at. This cannot be <code>null</code>.
- */
- /*package, for testing*/ HLL(final int log2m, final int regwidth, final int explicitThreshold, final int sparseThreshold, final HLLType type) {
- this.log2m = log2m;
- if((log2m < MINIMUM_LOG2M_PARAM) || (log2m > MAXIMUM_LOG2M_PARAM)) {
- throw new IllegalArgumentException("'log2m' must be at least " + MINIMUM_LOG2M_PARAM + " and at most " + MAXIMUM_LOG2M_PARAM + " (was: " + log2m + ")");
- }
-
- this.regwidth = regwidth;
- if((regwidth < MINIMUM_REGWIDTH_PARAM) || (regwidth > MAXIMUM_REGWIDTH_PARAM)) {
- throw new IllegalArgumentException("'regwidth' must be at least " + MINIMUM_REGWIDTH_PARAM + " and at most " + MAXIMUM_REGWIDTH_PARAM + " (was: " + regwidth + ")");
- }
-
- this.m = (1 << log2m);
- this.mBitsMask = m - 1;
- this.valueMask = (1 << regwidth) - 1;
- this.pwMaxMask = HLLUtil.pwMaxMask(regwidth);
- this.alphaMSquared = HLLUtil.alphaMSquared(m);
- this.smallEstimatorCutoff = HLLUtil.smallEstimatorCutoff(m);
- this.largeEstimatorCutoff = HLLUtil.largeEstimatorCutoff(log2m, regwidth);
-
- this.explicitAuto = false;
- this.explicitOff = false;
- this.explicitThreshold = explicitThreshold;
- if((explicitThreshold < 1) || (explicitThreshold > MAXIMUM_EXPLICIT_THRESHOLD)) {
- throw new IllegalArgumentException("'explicitThreshold' must be at least 1 and at most " + MAXIMUM_EXPLICIT_THRESHOLD + " (was: " + explicitThreshold + ")");
- }
-
- this.shortWordLength = (regwidth + log2m);
- this.sparseOff = false;
- this.sparseThreshold = sparseThreshold;
-
- initializeStorage(type);
- }
-
- /**
- * @return the type in the promotion hierarchy of this instance. This will
- * never be <code>null</code>.
- */
- public HLLType getType() { return type; }
-
- // ========================================================================
- // Add
- /**
- * Adds <code>rawValue</code> directly to the HLL.
- *
- * @param rawValue the value to be added. It is very important that this
- * value <em>already be hashed</em> with a strong (but not
- * necessarily cryptographic) hash function. For instance, the
- * Murmur3 implementation in
- * <a href="http://guava-libraries.googlecode.com/git/guava/src/com/google/common/hash/Murmur3_128HashFunction.java">
- * Google's Guava</a> library is an excellent hash function for this
- * purpose and, for seeds greater than zero, matches the output
- * of the hash provided in the PostgreSQL implementation.
- */
- public void addRaw(final long rawValue) {
- switch(type) {
- case EMPTY: {
- // NOTE: EMPTY type is always promoted on #addRaw()
- if(explicitThreshold > 0) {
- initializeStorage(HLLType.EXPLICIT);
- explicitStorage.add(rawValue);
- } else if(!sparseOff) {
- initializeStorage(HLLType.SPARSE);
- addRawSparseProbabilistic(rawValue);
- } else {
- initializeStorage(HLLType.FULL);
- addRawProbabilistic(rawValue);
- }
- return;
- }
- case EXPLICIT: {
- explicitStorage.add(rawValue);
-
- // promotion, if necessary
- if(explicitStorage.size() > explicitThreshold) {
- if(!sparseOff) {
- initializeStorage(HLLType.SPARSE);
- for (LongCursor c : explicitStorage) {
- addRawSparseProbabilistic(c.value);
- }
- } else {
- initializeStorage(HLLType.FULL);
- for (LongCursor c : explicitStorage) {
- addRawProbabilistic(c.value);
- }
- }
- explicitStorage = null;
- }
- return;
- }
- case SPARSE: {
- addRawSparseProbabilistic(rawValue);
-
- // promotion, if necessary
- if(sparseProbabilisticStorage.size() > sparseThreshold) {
- initializeStorage(HLLType.FULL);
- for(IntByteCursor c : sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- sparseProbabilisticStorage = null;
- }
- return;
- }
- case FULL:
- addRawProbabilistic(rawValue);
- return;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- }
-
- // ------------------------------------------------------------------------
- // #addRaw(..) helpers
- /**
- * Adds the raw value to the {@link #sparseProbabilisticStorage}.
- * {@link #type} must be {@link HLLType#SPARSE}.
- *
- * @param rawValue the raw value to add to the sparse storage.
- */
- private void addRawSparseProbabilistic(final long rawValue) {
- // p(w): position of the least significant set bit (one-indexed)
- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
- //
- // By construction of pwMaxMask (see #Constructor()),
- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
- final long substreamValue = (rawValue >>> log2m);
- final byte p_w;
-
- if(substreamValue == 0L) {
- // The paper does not cover p(0x0), so the special value 0 is used.
- // 0 is the original initialization value of the registers, so by
- // doing this the multiset simply ignores it. This is acceptable
- // because the probability is 1/(2^(2^registerSizeInBits)).
- p_w = 0;
- } else {
- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
- }
-
- // Short-circuit if the register is being set to zero, since algorithmically
- // this corresponds to an "unset" register, and "unset" registers aren't
- // stored to save memory. (The very reason this sparse implementation
- // exists.) If a register is set to zero it will break the #algorithmCardinality
- // code.
- if(p_w == 0) {
- return;
- }
-
- // NOTE: no +1 as in paper since 0-based indexing
- final int j = (int)(rawValue & mBitsMask);
-
- final byte currentValue;
- if (sparseProbabilisticStorage.containsKey(j)) {
- currentValue = sparseProbabilisticStorage.lget();
- } else {
- currentValue = 0;
- }
-
- if(p_w > currentValue) {
- sparseProbabilisticStorage.put(j, p_w);
- }
- }
-
- /**
- * Adds the raw value to the {@link #probabilisticStorage}.
- * {@link #type} must be {@link HLLType#FULL}.
- *
- * @param rawValue the raw value to add to the full probabilistic storage.
- */
- private void addRawProbabilistic(final long rawValue) {
- // p(w): position of the least significant set bit (one-indexed)
- // By contract: p(w) <= 2^(registerValueInBits) - 1 (the max register value)
- //
- // By construction of pwMaxMask (see #Constructor()),
- // lsb(pwMaxMask) = 2^(registerValueInBits) - 2,
- // thus lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) - 2,
- // thus 1 + lsb(any_long | pwMaxMask) <= 2^(registerValueInBits) -1.
- final long substreamValue = (rawValue >>> log2m);
- final byte p_w;
-
- if (substreamValue == 0L) {
- // The paper does not cover p(0x0), so the special value 0 is used.
- // 0 is the original initialization value of the registers, so by
- // doing this the multiset simply ignores it. This is acceptable
- // because the probability is 1/(2^(2^registerSizeInBits)).
- p_w = 0;
- } else {
- p_w = (byte)(1 + BitUtil.leastSignificantBit(substreamValue| pwMaxMask));
- }
-
- // Short-circuit if the register is being set to zero, since algorithmically
- // this corresponds to an "unset" register, and "unset" registers aren't
- // stored to save memory. (The very reason this sparse implementation
- // exists.) If a register is set to zero it will break the #algorithmCardinality
- // code.
- if(p_w == 0) {
- return;
- }
-
- // NOTE: no +1 as in paper since 0-based indexing
- final int j = (int)(rawValue & mBitsMask);
-
- probabilisticStorage.setMaxRegister(j, p_w);
- }
-
- // ------------------------------------------------------------------------
- // Storage helper
- /**
- * Initializes storage for the specified {@link HLLType} and changes the
- * instance's {@link #type}.
- *
- * @param type the {@link HLLType} to initialize storage for. This cannot be
- * <code>null</code> and must be an instantiable type.
- */
- private void initializeStorage(final HLLType type) {
- this.type = type;
- switch(type) {
- case EMPTY:
- // nothing to be done
- break;
- case EXPLICIT:
- this.explicitStorage = new LongOpenHashSet();
- break;
- case SPARSE:
- this.sparseProbabilisticStorage = new IntByteOpenHashMap();
- break;
- case FULL:
- this.probabilisticStorage = new BitVector(regwidth, m);
- break;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- }
-
- // ========================================================================
- // Cardinality
- /**
- * Computes the cardinality of the HLL.
- *
- * @return the cardinality of HLL. This will never be negative.
- */
- public long cardinality() {
- switch(type) {
- case EMPTY:
- return 0/*by definition*/;
- case EXPLICIT:
- return explicitStorage.size();
- case SPARSE:
- return (long)Math.ceil(sparseProbabilisticAlgorithmCardinality());
- case FULL:
- return (long)Math.ceil(fullProbabilisticAlgorithmCardinality());
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- }
-
- // ------------------------------------------------------------------------
- // Cardinality helpers
- /**
- * Computes the exact cardinality value returned by the HLL algorithm when
- * represented as a {@link HLLType#SPARSE} HLL. Kept
- * separate from {@link #cardinality()} for testing purposes. {@link #type}
- * must be {@link HLLType#SPARSE}.
- *
- * @return the exact, unrounded cardinality given by the HLL algorithm
- */
- /*package, for testing*/ double sparseProbabilisticAlgorithmCardinality() {
- final int m = this.m/*for performance*/;
-
- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
- // 'j'th register value
- double sum = 0;
- int numberOfZeroes = 0/*"V" in the paper*/;
- for(int j=0; j<m; j++) {
- final long register;
- if (sparseProbabilisticStorage.containsKey(j)) {
- register = sparseProbabilisticStorage.lget();
- } else {
- register = 0;
- }
-
- sum += 1.0 / (1L << register);
- if(register == 0L) numberOfZeroes++;
- }
-
- // apply the estimate and correction to the indicator function
- final double estimator = alphaMSquared / sum;
- if((numberOfZeroes != 0) && (estimator < smallEstimatorCutoff)) {
- return HLLUtil.smallEstimator(m, numberOfZeroes);
- } else if(estimator <= largeEstimatorCutoff) {
- return estimator;
- } else {
- return HLLUtil.largeEstimator(log2m, regwidth, estimator);
- }
- }
-
- /**
- * Computes the exact cardinality value returned by the HLL algorithm when
- * represented as a {@link HLLType#FULL} HLL. Kept
- * separate from {@link #cardinality()} for testing purposes. {@link #type}
- * must be {@link HLLType#FULL}.
- *
- * @return the exact, unrounded cardinality given by the HLL algorithm
- */
- /*package, for testing*/ double fullProbabilisticAlgorithmCardinality() {
- final int m = this.m/*for performance*/;
-
- // compute the "indicator function" -- sum(2^(-M[j])) where M[j] is the
- // 'j'th register value
- double sum = 0;
- int numberOfZeroes = 0/*"V" in the paper*/;
- final LongIterator iterator = probabilisticStorage.registerIterator();
- while(iterator.hasNext()) {
- final long register = iterator.next();
-
- sum += 1.0 / (1L << register);
- if(register == 0L) numberOfZeroes++;
- }
-
- // apply the estimate and correction to the indicator function
- final double estimator = alphaMSquared / sum;
- if((numberOfZeroes != 0) && (estimator < smallEstimatorCutoff)) {
- return HLLUtil.smallEstimator(m, numberOfZeroes);
- } else if(estimator <= largeEstimatorCutoff) {
- return estimator;
- } else {
- return HLLUtil.largeEstimator(log2m, regwidth, estimator);
- }
- }
-
- // ========================================================================
- // Clear
- /**
- * Clears the HLL. The HLL will have cardinality zero and will act as if no
- * elements have been added.
- *
- * NOTE: Unlike {@link #addRaw(long)}, <code>clear</code> does NOT handle
- * transitions between {@link HLLType}s - a probabilistic type will remain
- * probabilistic after being cleared.
- */
- public void clear() {
- switch(type) {
- case EMPTY:
- return /*do nothing*/;
- case EXPLICIT:
- explicitStorage.clear();
- return;
- case SPARSE:
- sparseProbabilisticStorage.clear();
- return;
- case FULL:
- probabilisticStorage.fill(0);
- return;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- }
-
- // ========================================================================
- // Union
- /**
- * Computes the union of HLLs and stores the result in this instance.
- *
- * @param other the other {@link HLL} instance to union into this one. This
- * cannot be <code>null</code>.
- */
- public void union(final HLL other) {
- // TODO: verify HLLs are compatible
- final HLLType otherType = other.getType();
-
- if(type.equals(otherType)) {
- homogeneousUnion(other);
- return;
- } else {
- heterogenousUnion(other);
- return;
- }
- }
-
- // ------------------------------------------------------------------------
- // Union helpers
- /**
- * Computes the union of two HLLs, of different types, and stores the
- * result in this instance.
- *
- * @param other the other {@link HLL} instance to union into this one. This
- * cannot be <code>null</code>.
- */
- /*package, for testing*/ void heterogenousUnion(final HLL other) {
- /*
- * The logic here is divided into two sections: unions with an EMPTY
- * HLL, and unions between EXPLICIT/SPARSE/FULL
- * HLL.
- *
- * Between those two sections, all possible heterogeneous unions are
- * covered. Should another type be added to HLLType whose unions
- * are not easily reduced (say, as EMPTY's are below) this may be more
- * easily implemented as Strategies. However, that is unnecessary as it
- * stands.
- */
-
- // ....................................................................
- // Union with an EMPTY
- if(HLLType.EMPTY.equals(type)) {
- // NOTE: The union of empty with non-empty HLL is just a
- // clone of the non-empty.
-
- switch(other.getType()) {
- case EXPLICIT: {
- // src: EXPLICIT
- // dest: EMPTY
-
- if(other.explicitStorage.size() <= explicitThreshold) {
- type = HLLType.EXPLICIT;
- explicitStorage = other.explicitStorage.clone();
- } else {
- if(!sparseOff) {
- initializeStorage(HLLType.SPARSE);
- } else {
- initializeStorage(HLLType.FULL);
- }
- for(LongCursor c : other.explicitStorage) {
- addRaw(c.value);
- }
- }
- return;
- }
- case SPARSE: {
- // src: SPARSE
- // dest: EMPTY
-
- if(!sparseOff) {
- type = HLLType.SPARSE;
- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
- } else {
- initializeStorage(HLLType.FULL);
- for(IntByteCursor c : other.sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- }
- return;
- }
- default/*case FULL*/: {
- // src: FULL
- // dest: EMPTY
-
- type = HLLType.FULL;
- probabilisticStorage = other.probabilisticStorage.clone();
- return;
- }
- }
- } else if (HLLType.EMPTY.equals(other.getType())) {
- // source is empty, so just return destination since it is unchanged
- return;
- } /* else -- both of the sets are not empty */
-
- // ....................................................................
- // NOTE: Since EMPTY is handled above, the HLLs are non-EMPTY below
- switch(type) {
- case EXPLICIT: {
- // src: FULL/SPARSE
- // dest: EXPLICIT
- // "Storing into destination" cannot be done (since destination
- // is by definition of smaller capacity than source), so a clone
- // of source is made and values from destination are inserted
- // into that.
-
- // Determine source and destination storage.
- // NOTE: destination storage may change through promotion if
- // source is SPARSE.
- if(HLLType.SPARSE.equals(other.getType())) {
- if(!sparseOff) {
- type = HLLType.SPARSE;
- sparseProbabilisticStorage = other.sparseProbabilisticStorage.clone();
- } else {
- initializeStorage(HLLType.FULL);
- for(IntByteCursor c : other.sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- }
- } else /*source is HLLType.FULL*/ {
- type = HLLType.FULL;
- probabilisticStorage = other.probabilisticStorage.clone();
- }
- for(LongCursor c : explicitStorage) {
- addRaw(c.value);
- }
- explicitStorage = null;
- return;
- }
- case SPARSE: {
- if(HLLType.EXPLICIT.equals(other.getType())) {
- // src: EXPLICIT
- // dest: SPARSE
- // Add the raw values from the source to the destination.
-
- for(LongCursor c : other.explicitStorage) {
- addRaw(c.value);
- }
- // NOTE: addRaw will handle promotion cleanup
- } else /*source is HLLType.FULL*/ {
- // src: FULL
- // dest: SPARSE
- // "Storing into destination" cannot be done (since destination
- // is by definition of smaller capacity than source), so a
- // clone of source is made and registers from the destination
- // are merged into the clone.
-
- type = HLLType.FULL;
- probabilisticStorage = other.probabilisticStorage.clone();
- for(IntByteCursor c : sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- sparseProbabilisticStorage = null;
- }
- return;
- }
- default/*destination is HLLType.FULL*/: {
- if(HLLType.EXPLICIT.equals(other.getType())) {
- // src: EXPLICIT
- // dest: FULL
- // Add the raw values from the source to the destination.
- // Promotion is not possible, so don't bother checking.
-
- for(LongCursor c : other.explicitStorage) {
- addRaw(c.value);
- }
- } else /*source is HLLType.SPARSE*/ {
- // src: SPARSE
- // dest: FULL
- // Merge the registers from the source into the destination.
- // Promotion is not possible, so don't bother checking.
-
- for(IntByteCursor c : other.sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- }
- }
- }
- }
-
- /**
- * Computes the union of two HLLs of the same type, and stores the
- * result in this instance.
- *
- * @param other the other {@link HLL} instance to union into this one. This
- * cannot be <code>null</code>.
- */
- private void homogeneousUnion(final HLL other) {
- switch(type) {
- case EMPTY:
- // union of empty and empty is empty
- return;
- case EXPLICIT:
- for(LongCursor c : other.explicitStorage) {
- addRaw(c.value);
- }
- // NOTE: #addRaw() will handle promotion, if necessary
- return;
- case SPARSE:
- for(IntByteCursor c : other.sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- final byte currentRegisterValue = sparseProbabilisticStorage.get(registerIndex);
- if(registerValue > currentRegisterValue) {
- sparseProbabilisticStorage.put(registerIndex, registerValue);
- }
- }
-
- // promotion, if necessary
- if(sparseProbabilisticStorage.size() > sparseThreshold) {
- initializeStorage(HLLType.FULL);
- for(IntByteCursor c : sparseProbabilisticStorage) {
- final int registerIndex = c.key;
- final byte registerValue = c.value;
- probabilisticStorage.setMaxRegister(registerIndex, registerValue);
- }
- sparseProbabilisticStorage = null;
- }
- return;
- case FULL:
- for(int i=0; i<m; i++) {
- final long registerValue = other.probabilisticStorage.getRegister(i);
- probabilisticStorage.setMaxRegister(i, registerValue);
- }
- return;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- }
-
- // ========================================================================
- // Serialization
- /**
- * Serializes the HLL to an array of bytes in correspondence with the format
- * of the default schema version, {@link SerializationUtil#DEFAULT_SCHEMA_VERSION}.
- *
- * @return the array of bytes representing the HLL. This will never be
- * <code>null</code> or empty.
- */
- public byte[] toBytes() {
- return toBytes(SerializationUtil.DEFAULT_SCHEMA_VERSION);
- }
-
- /**
- * Serializes the HLL to an array of bytes in correspondence with the format
- * of the specified schema version.
- *
- * @param schemaVersion the schema version dictating the serialization format
- * @return the array of bytes representing the HLL. This will never be
- * <code>null</code> or empty.
- */
- public byte[] toBytes(final ISchemaVersion schemaVersion) {
- final byte[] bytes;
- switch(type) {
- case EMPTY:
- bytes = new byte[schemaVersion.paddingBytes(type)];
- break;
- case EXPLICIT: {
- final IWordSerializer serializer =
- schemaVersion.getSerializer(type, Long.SIZE, explicitStorage.size());
-
- final long[] values = explicitStorage.toArray();
- Arrays.sort(values);
- for(final long value : values) {
- serializer.writeWord(value);
- }
-
- bytes = serializer.getBytes();
- break;
- }
- case SPARSE: {
- final IWordSerializer serializer =
- schemaVersion.getSerializer(type, shortWordLength, sparseProbabilisticStorage.size());
-
- final int[] indices = sparseProbabilisticStorage.keys().toArray();
- Arrays.sort(indices);
- for(final int registerIndex : indices) {
- assert sparseProbabilisticStorage.containsKey(registerIndex);
- final long registerValue = sparseProbabilisticStorage.get(registerIndex);
- // pack index and value into "short word"
- final long shortWord = ((registerIndex << regwidth) | registerValue);
- serializer.writeWord(shortWord);
- }
-
- bytes = serializer.getBytes();
- break;
- }
- case FULL: {
- final IWordSerializer serializer = schemaVersion.getSerializer(type, regwidth, m);
- probabilisticStorage.getRegisterContents(serializer);
-
- bytes = serializer.getBytes();
- break;
- }
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
-
- final IHLLMetadata metadata = new HLLMetadata(schemaVersion.schemaVersionNumber(),
- type,
- log2m,
- regwidth,
- (int)NumberUtil.log2(explicitThreshold),
- explicitOff,
- explicitAuto,
- !sparseOff);
- schemaVersion.writeMetadata(bytes, metadata);
-
- return bytes;
- }
-
- /**
- * Deserializes the HLL (in {@link #toBytes(ISchemaVersion)} format) serialized
- * into <code>bytes</code>.
- *
- * @param bytes the serialized bytes of new HLL
- * @return the deserialized HLL. This will never be <code>null</code>.
- *
- * @see #toBytes(ISchemaVersion)
- */
- public static HLL fromBytes(final byte[] bytes) {
- final ISchemaVersion schemaVersion = SerializationUtil.getSchemaVersion(bytes);
- final IHLLMetadata metadata = schemaVersion.readMetadata(bytes);
-
- final HLLType type = metadata.HLLType();
- final int regwidth = metadata.registerWidth();
- final int log2m = metadata.registerCountLog2();
- final boolean sparseon = metadata.sparseEnabled();
-
- final int expthresh;
- if(metadata.explicitAuto()) {
- expthresh = -1;
- } else if(metadata.explicitOff()) {
- expthresh = 0;
- } else {
- // NOTE: take into account that the postgres-compatible constructor
- // subtracts one before taking a power of two.
- expthresh = metadata.log2ExplicitCutoff() + 1;
- }
-
- final HLL hll = new HLL(log2m, regwidth, expthresh, sparseon, type);
-
- // Short-circuit on empty, which needs no other deserialization.
- if(HLLType.EMPTY.equals(type)) {
- return hll;
- }
-
- final int wordLength;
- switch(type) {
- case EXPLICIT:
- wordLength = Long.SIZE;
- break;
- case SPARSE:
- wordLength = hll.shortWordLength;
- break;
- case FULL:
- wordLength = hll.regwidth;
- break;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
-
- final IWordDeserializer deserializer =
- schemaVersion.getDeserializer(type, wordLength, bytes);
- switch(type) {
- case EXPLICIT:
- // NOTE: This should not exceed expthresh and this will always
- // be exactly the number of words that were encoded,
- // because the word length is at least a byte wide.
- // SEE: IWordDeserializer#totalWordCount()
- for(int i=0; i<deserializer.totalWordCount(); i++) {
- hll.explicitStorage.add(deserializer.readWord());
- }
- break;
- case SPARSE:
- // NOTE: If the shortWordLength were smaller than 8 bits
- // (1 byte) there would be a possibility (because of
- // padding arithmetic) of having one or more extra
- // registers read. However, this is not relevant as the
- // extra registers will be all zeroes, which are ignored
- // in the sparse representation.
- for(int i=0; i<deserializer.totalWordCount(); i++) {
- final long shortWord = deserializer.readWord();
- final byte registerValue = (byte)(shortWord & hll.valueMask);
- // Only set non-zero registers.
- if (registerValue != 0) {
- hll.sparseProbabilisticStorage.put((int)(shortWord >>> hll.regwidth), registerValue);
- }
- }
- break;
- case FULL:
- // NOTE: Iteration is done using m (register count) and NOT
- // deserializer#totalWordCount() because regwidth may be
- // less than 8 and as such the padding on the 'last' byte
- // may be larger than regwidth, causing an extra register
- // to be read.
- // SEE: IWordDeserializer#totalWordCount()
- for(long i=0; i<hll.m; i++) {
- hll.probabilisticStorage.setRegister(i, deserializer.readWord());
- }
- break;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
-
- return hll;
- }
-
- /**
- * Create a deep copy of this HLL.
- *
- * @see java.lang.Object#clone()
- */
- @Override
- public HLL clone() throws CloneNotSupportedException {
- // NOTE: Since the package-only constructor assumes both explicit and
- // sparse are enabled, the easiest thing to do here is to re-derive
- // the expthresh parameter and create a new HLL with the public
- // constructor.
- // TODO: add a more sensible constructor to make this less obfuscated
- final int copyExpthresh;
- if(explicitAuto) {
- copyExpthresh = -1;
- } else if(explicitOff) {
- copyExpthresh = 0;
- } else {
- // explicitThreshold is defined as:
- //
- // this.explicitThreshold = (1 << (expthresh - 1));
- //
- // Since explicitThreshold is a power of two and only has a single
- // bit set, finding the LSB is the same as finding the inverse
- copyExpthresh = BitUtil.leastSignificantBit(explicitThreshold) + 1;
- }
- final HLL copy = new HLL(log2m, regwidth, copyExpthresh, !sparseOff/*sparseOn*/, type);
- switch(type) {
- case EMPTY:
- // nothing to be done
- break;
- case EXPLICIT:
- copy.explicitStorage = this.explicitStorage.clone();
- break;
- case SPARSE:
- copy.sparseProbabilisticStorage = this.sparseProbabilisticStorage.clone();
- break;
- case FULL:
- copy.probabilisticStorage = this.probabilisticStorage.clone();
- break;
- default:
- throw new RuntimeException("Unsupported HLL type " + type);
- }
- return copy;
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLMetadata.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLMetadata.java 2015-07-16 13:22:50.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLMetadata.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,136 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A concrete {@link IHLLMetadata} implemented as a simple struct.
- */
-class HLLMetadata implements IHLLMetadata {
- private final int schemaVersion;
- private final HLLType type;
- private final int registerCountLog2;
- private final int registerWidth;
- private final int log2ExplicitCutoff;
- private final boolean explicitOff;
- private final boolean explicitAuto;
- private final boolean sparseEnabled;
-
- /**
- * @param schemaVersion the schema version number of the HLL. This must
- * be greater than or equal to zero.
- * @param type the {@link HLLType type} of the HLL. This cannot
- * be <code>null</code>.
- * @param registerCountLog2 the log-base-2 register count parameter for
- * probabilistic HLLs. This must be greater than or equal to zero.
- * @param registerWidth the register width parameter for probabilistic
- * HLLs. This must be greater than or equal to zero.
- * @param log2ExplicitCutoff the log-base-2 of the explicit cardinality cutoff,
- * if it is explicitly defined. (If <code>explicitOff</code> or
- * <code>explicitAuto</code> is <code>true</code> then this has no
- * meaning.)
- * @param explicitOff the flag for 'explicit off'-mode, where the
- * {@link HLLType#EXPLICIT} representation is not used. Both this and
- * <code>explicitAuto</code> cannot be <code>true</code> at the same
- * time.
- * @param explicitAuto the flag for 'explicit auto'-mode, where the
- * {@link HLLType#EXPLICIT} representation's promotion cutoff is
- * determined based on in-memory size automatically. Both this and
- * <code>explicitOff</code> cannot be <code>true</code> at the same
- * time.
- * @param sparseEnabled the flag for 'sparse-enabled'-mode, where the
- * {@link HLLType#SPARSE} representation is used.
- */
- public HLLMetadata(final int schemaVersion,
- final HLLType type,
- final int registerCountLog2,
- final int registerWidth,
- final int log2ExplicitCutoff,
- final boolean explicitOff,
- final boolean explicitAuto,
- final boolean sparseEnabled) {
- this.schemaVersion = schemaVersion;
- this.type = type;
- this.registerCountLog2 = registerCountLog2;
- this.registerWidth = registerWidth;
- this.log2ExplicitCutoff = log2ExplicitCutoff;
- this.explicitOff = explicitOff;
- this.explicitAuto = explicitAuto;
- this.sparseEnabled = sparseEnabled;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#schemaVersion()
- */
- @Override
- public int schemaVersion() { return schemaVersion; }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#HLLType()
- */
- @Override
- public HLLType HLLType() { return type; }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#registerCountLog2()
- */
- @Override
- public int registerCountLog2() { return registerCountLog2; }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#registerWidth()
- */
- @Override
- public int registerWidth() { return registerWidth; }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
- */
- @Override
- public int log2ExplicitCutoff() { return log2ExplicitCutoff; }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#explicitOff()
- */
- @Override
- public boolean explicitOff() {
- return explicitOff;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#explicitAuto()
- * @see net.agkn.hll.serialization.IHLLMetadata#log2ExplicitCutoff()
- */
- @Override
- public boolean explicitAuto() {
- return explicitAuto;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.IHLLMetadata#sparseEnabled()
- */
- @Override
- public boolean sparseEnabled() { return sparseEnabled; }
-
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
- @Override
- public String toString() {
- return "<HLLMetadata schemaVersion: " + this.schemaVersion + ", type: " + this.type.toString() + ", registerCountLog2: " + this.registerCountLog2 + ", registerWidth: " + this.registerWidth + ", log2ExplicitCutoff: " + this.log2ExplicitCutoff + ", explicitOff: " + this.explicitOff + ", explicitAuto: " +this.explicitAuto + ">";
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLType.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,29 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * The types of algorithm/data structure that {@link HLL} can utilize. For more
- * information, see the Javadoc for {@link HLL}.
- */
-public enum HLLType {
- EMPTY,
- EXPLICIT,
- SPARSE,
- FULL;
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/HLLUtil.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,199 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Static functions for computing constants and parameters used in the HLL
- * algorithm.
- */
-final class HLLUtil {
- /**
- * Precomputed <code>pwMaxMask</code> values indexed by <code>registerSizeInBits</code>.
- * Calculated with this formula:
- * <pre>
- * int maxRegisterValue = (1 << registerSizeInBits) - 1;
- * // Mask with all bits set except for (maxRegisterValue - 1) least significant bits (see #addRaw())
- * return ~((1L << (maxRegisterValue - 1)) - 1);
- * </pre>
- *
- * @see #pwMaxMask(int)
- */
- private static final long[] PW_MASK = {
- ~((1L << (((1 << 0) - 1) - 1)) - 1),
- ~((1L << (((1 << 1) - 1) - 1)) - 1),
- ~((1L << (((1 << 2) - 1) - 1)) - 1),
- ~((1L << (((1 << 3) - 1) - 1)) - 1),
- ~((1L << (((1 << 4) - 1) - 1)) - 1),
- ~((1L << (((1 << 5) - 1) - 1)) - 1),
- ~((1L << (((1 << 6) - 1) - 1)) - 1),
- ~((1L << (((1 << 7) - 1) - 1)) - 1),
- ~((1L << (((1 << 8) - 1) - 1)) - 1)
- };
-
- /**
- * Precomputed <code>twoToL</code> values indexed by a linear combination of
- * <code>regWidth</code> and <code>log2m</code>.
- *
- * The array is one-dimensional and can be accessed by using index
- * <code>(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m</code>
- * for <code>regWidth</code> and <code>log2m</code> between the specified
- * <code>HLL.{MINIMUM,MAXIMUM}_{REGWIDTH,LOG2M}_PARAM</code> constants.
- *
- * @see #largeEstimator(int, int, double)
- * @see #largeEstimatorCutoff(int, int)
- * @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 2^L</a>"
- */
- private static final double[] TWO_TO_L = new double[(HLL.MAXIMUM_REGWIDTH_PARAM + 1) * (HLL.MAXIMUM_LOG2M_PARAM + 1)];
-
- /**
- * Spacing constant used to compute offsets into {@link #TWO_TO_L}.
- */
- private static final int REG_WIDTH_INDEX_MULTIPLIER = HLL.MAXIMUM_LOG2M_PARAM + 1;
-
- static {
- for(int regWidth = HLL.MINIMUM_REGWIDTH_PARAM; regWidth <= HLL.MAXIMUM_REGWIDTH_PARAM; regWidth++) {
- for(int log2m = HLL.MINIMUM_LOG2M_PARAM ; log2m <= HLL.MAXIMUM_LOG2M_PARAM; log2m++) {
- int maxRegisterValue = (1 << regWidth) - 1;
-
- // Since 1 is added to p(w) in the insertion algorithm, only
- // (maxRegisterValue - 1) bits are inspected hence the hash
- // space is one power of two smaller.
- final int pwBits = (maxRegisterValue - 1);
- final int totalBits = (pwBits + log2m);
- final double twoToL = Math.pow(2, totalBits);
- TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * regWidth) + log2m] = twoToL;
- }
- }
- }
-
- // ************************************************************************
- /**
- * Computes the bit-width of HLL registers necessary to estimate a set of
- * the specified cardinality.
- *
- * @param expectedUniqueElements an upper bound on the number of unique
- * elements that are expected. This must be greater than zero.
- * @return a register size in bits (i.e. <code>log2(log2(n))</code>)
- */
- public static int registerBitSize(final long expectedUniqueElements) {
- return Math.max(HLL.MINIMUM_REGWIDTH_PARAM,
- (int)Math.ceil(NumberUtil.log2(NumberUtil.log2(expectedUniqueElements))));
- }
-
- // ========================================================================
- /**
- * Computes the 'alpha-m-squared' constant used by the HyperLogLog algorithm.
- *
- * @param m this must be a power of two, cannot be less than
- * 16 (2<sup>4</sup>), and cannot be greater than 65536 (2<sup>16</sup>).
- * @return gamma times <code>registerCount</code> squared where gamma is
- * based on the value of <code>registerCount</code>.
- * @throws IllegalArgumentException if <code>registerCount</code> is less
- * than 16.
- */
- public static double alphaMSquared(final int m) {
- switch(m) {
- case 1/*2^0*/:
- case 2/*2^1*/:
- case 4/*2^2*/:
- case 8/*2^3*/:
- throw new IllegalArgumentException("'m' cannot be less than 16 (" + m + " < 16).");
-
- case 16/*2^4*/:
- return 0.673 * m * m;
-
- case 32/*2^5*/:
- return 0.697 * m * m;
-
- case 64/*2^6*/:
- return 0.709 * m * m;
-
- default/*>2^6*/:
- return (0.7213 / (1.0 + 1.079 / m)) * m * m;
- }
- }
-
- // ========================================================================
- /**
- * Computes a mask that prevents overflow of HyperLogLog registers.
- *
- * @param registerSizeInBits the size of the HLL registers, in bits.
- * @return mask a <code>long</code> mask to prevent overflow of the registers
- * @see #registerBitSize(long)
- */
- public static long pwMaxMask(final int registerSizeInBits) {
- return PW_MASK[registerSizeInBits];
- }
-
- // ========================================================================
- /**
- * The cutoff for using the "small range correction" formula, in the
- * HyperLogLog algorithm.
- *
- * @param m the number of registers in the HLL. <em>m<em> in the paper.
- * @return the cutoff for the small range correction.
- * @see #smallEstimator(int, int)
- */
- public static double smallEstimatorCutoff(final int m) {
- return ((double)m * 5) / 2;
- }
-
- /**
- * The "small range correction" formula from the HyperLogLog algorithm. Only
- * appropriate if both the estimator is smaller than <pre>(5/2) * m</pre> and
- * there are still registers that have the zero value.
- *
- * @param m the number of registers in the HLL. <em>m<em> in the paper.
- * @param numberOfZeroes the number of registers with value zero. <em>V</em>
- * in the paper.
- * @return a corrected cardinality estimate.
- */
- public static double smallEstimator(final int m, final int numberOfZeroes) {
- return m * Math.log((double)m / numberOfZeroes);
- }
-
- /**
- * The cutoff for using the "large range correction" formula, from the
- * HyperLogLog algorithm, adapted for 64 bit hashes.
- *
- * @param log2m log-base-2 of the number of registers in the HLL. <em>b<em> in the paper.
- * @param registerSizeInBits the size of the HLL registers, in bits.
- * @return the cutoff for the large range correction.
- * @see #largeEstimator(int, int, double)
- * @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 64 bit hashes and 'large range correction' cutoff</a>"
- */
- public static double largeEstimatorCutoff(final int log2m, final int registerSizeInBits) {
- return (TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m]) / 30.0;
- }
-
- /**
- * The "large range correction" formula from the HyperLogLog algorithm, adapted
- * for 64 bit hashes. Only appropriate for estimators whose value exceeds
- * the return of {@link #largeEstimatorCutoff(int, int)}.
- *
- * @param log2m log-base-2 of the number of registers in the HLL. <em>b<em> in the paper.
- * @param registerSizeInBits the size of the HLL registers, in bits.
- * @param estimator the original estimator ("E" in the paper).
- * @return a corrected cardinality estimate.
- * @see "<a href='http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/'>Blog post with section on 64 bit hashes and 'large range correction'</a>"
- */
- public static double largeEstimator(final int log2m, final int registerSizeInBits, final double estimator) {
- final double twoToL = TWO_TO_L[(REG_WIDTH_INDEX_MULTIPLIER * registerSizeInBits) + log2m];
- return -1 * twoToL * Math.log(1.0 - (estimator/twoToL));
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IHLLMetadata.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,71 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * The metadata and parameters associated with a HLL.
- */
-interface IHLLMetadata {
- /**
- * @return the schema version of the HLL. This will never be <code>null</code>.
- */
- int schemaVersion();
-
- /**
- * @return the type of the HLL. This will never be <code>null</code>.
- */
- HLLType HLLType();
-
- /**
- * @return the log-base-2 of the register count parameter of the HLL. This
- * will always be greater than or equal to 4 and less than or equal
- * to 31.
- */
- int registerCountLog2();
-
- /**
- * @return the register width parameter of the HLL. This will always be
- * greater than or equal to 1 and less than or equal to 8.
- */
- int registerWidth();
-
- /**
- * @return the log-base-2 of the explicit cutoff cardinality. This will always
- * be greater than or equal to zero and less than 31, per the specification.
- */
- int log2ExplicitCutoff();
-
- /**
- * @return <code>true</code> if the {@link HLLType#EXPLICIT} representation
- * has been disabled. <code>false</code> otherwise.
- */
- boolean explicitOff();
-
- /**
- * @return <code>true</code> if the {@link HLLType#EXPLICIT} representation
- * cutoff cardinality is set to be automatically chosen,
- * <code>false</code> otherwise.
- */
- boolean explicitAuto();
-
- /**
- * @return <code>true</code> if the {@link HLLType#SPARSE} representation
- * is enabled.
- */
- boolean sparseEnabled();
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 2015-07-16 13:22:50.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/ISchemaVersion.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,85 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A serialization schema for HLLs. Reads and writes HLL metadata to
- * and from <code>byte[]</code> representations.
- */
-interface ISchemaVersion {
- /**
- * The number of metadata bytes required for a serialized HLL of the
- * specified type.
- *
- * @param type the type of the serialized HLL
- * @return the number of padding bytes needed in order to fully accommodate
- * the needed metadata.
- */
- int paddingBytes(HLLType type);
-
- /**
- * Writes metadata bytes to serialized HLL.
- *
- * @param bytes the padded data bytes of the HLL
- * @param metadata the metadata to write to the padding bytes
- */
- void writeMetadata(byte[] bytes, IHLLMetadata metadata);
-
- /**
- * Reads the metadata bytes of the serialized HLL.
- *
- * @param bytes the serialized HLL
- * @return the HLL metadata
- */
- IHLLMetadata readMetadata(byte[] bytes);
-
- /**
- * Builds an HLL serializer that matches this schema version.
- *
- * @param type the HLL type that will be serialized. This cannot be
- * <code>null</code>.
- * @param wordLength the length of the 'words' that comprise the data of the
- * HLL. Words must be at least 5 bits and at most 64 bits long.
- * @param wordCount the number of 'words' in the HLL's data.
- * @return a byte array serializer used to serialize a HLL according
- * to this schema version's specification.
- * @see #paddingBytes(HLLType)
- * @see IWordSerializer
- */
- IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount);
-
- /**
- * Builds an HLL deserializer that matches this schema version.
- *
- * @param type the HLL type that will be deserialized. This cannot be
- * <code>null</code>.
- * @param wordLength the length of the 'words' that comprise the data of the
- * serialized HLL. Words must be at least 5 bits and at most 64
- * bits long.
- * @param bytes the serialized HLL to deserialize. This cannot be
- * <code>null</code>.
- * @return a byte array deserializer used to deserialize a HLL serialized
- * according to this schema version's specification.
- */
- IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes);
-
- /**
- * @return the schema version number.
- */
- int schemaVersionNumber();
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 2015-07-16 13:14:59.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordDeserializer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,41 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Reads 'words' of a fixed width, in sequence, from a byte array.
- */
-public interface IWordDeserializer {
- /**
- * @return the next word in the sequence. Should not be called more than
- * {@link #totalWordCount()} times.
- */
- long readWord();
-
- /**
- * Returns the number of words that could be encoded in the sequence.
- *
- * NOTE: the sequence that was encoded may be shorter than the value this
- * method returns due to padding issues within bytes. This guarantees
- * only an upper bound on the number of times {@link #readWord()}
- * can be called.
- *
- * @return the maximum number of words that could be read from the sequence.
- */
- int totalWordCount();
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/IWordSerializer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,39 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Writes 'words' of fixed width, in sequence, to a byte array.
- */
-interface IWordSerializer {
-
- /**
- * Writes the word to the backing array.
- *
- * @param word the word to write.
- */
- void writeWord(final long word);
-
- /**
- * Returns the backing array of <code>byte</code>s that contain the serialized
- * words.
- * @return the serialized words as a <code>byte[]</code>.
- */
- byte[] getBytes();
-
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,35 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A <code>long</code>-based iterator. This is not <i>is-a</i> {@link java.util.Iterator}
- * to prevent autoboxing between <code>Long</code> and <code>long</code>.
- */
-interface LongIterator {
- /**
- * @return <code>true</code> if and only if there are more elements to
- * iterate over. <code>false</code> otherwise.
- */
- boolean hasNext();
-
- /**
- * @return the next <code>long</code> in the collection.
- */
- long next();
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/NumberUtil.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,172 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A collection of utilities to work with numbers.
- */
-class NumberUtil {
- // loge(2) (log-base e of 2)
- public static final double LOGE_2 = 0.6931471805599453;
-
- // ************************************************************************
- /**
- * Computes the <code>log2</code> (log-base-two) of the specified value.
- *
- * @param value the <code>double</code> for which the <code>log2</code> is
- * desired.
- * @return the <code>log2</code> of the specified value
- */
- public static double log2(final double value) {
- // REF: http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases)
- return Math.log(value) / LOGE_2;
- }
-
- // ========================================================================
- // the hex characters
- private static final char[] HEX = { '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
-
- // ------------------------------------------------------------------------
- /**
- * Converts the specified array of <code>byte</code>s into a string of
- * hex characters (low <code>byte</code> first).
- *
- * @param bytes the array of <code>byte</code>s that are to be converted.
- * This cannot be <code>null</code> though it may be empty.
- * @param offset the offset in <code>bytes</code> at which the bytes will
- * be taken. This cannot be negative and must be less than
- * <code>bytes.length - 1</code>.
- * @param count the number of bytes to be retrieved from the specified array.
- * This cannot be negative. If greater than <code>bytes.length - offset</code>
- * then that value is used.
- * @return a string of at most <code>count</code> characters that represents
- * the specified byte array in hex. This will never be <code>null</code>
- * though it may be empty if <code>bytes</code> is empty or <code>count</code>
- * is zero.
- * @throws IllegalArgumentException if <code>offset</code> is greater than
- * or equal to <code>bytes.length</code>.
- * @see #fromHex(String, int, int)
- */
- public static String toHex(final byte[] bytes, final int offset, final int count) {
- if(offset >= bytes.length) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + bytes.length + ").")/*by contract*/;
- final int byteCount = Math.min( (bytes.length - offset), count);
- final int upperBound = byteCount + offset;
-
- final char[] chars = new char[byteCount * 2/*two chars per byte*/];
- int charIndex = 0;
- for(int i=offset; i<upperBound; i++) {
- final byte value = bytes[i];
- chars[charIndex++] = HEX[(value >>> 4) & 0x0F];
- chars[charIndex++] = HEX[value & 0x0F];
- }
-
- return new String(chars);
- }
-
- /**
- * Converts the specified array of hex characters into an array of <code>byte</code>s
- * (low <code>byte</code> first).
- *
- * @param string the string of hex characters to be converted into <code>byte</code>s.
- * This cannot be <code>null</code> though it may be blank.
- * @param offset the offset in the string at which the characters will be
- * taken. This cannot be negative and must be less than <code>string.length() - 1</code>.
- * @param count the number of characters to be retrieved from the specified
- * string. This cannot be negative and must be divisible by two
- * (since there are two characters per <code>byte</code>).
- * @return the array of <code>byte</code>s that were converted from the
- * specified string (in the specified range). This will never be
- * <code>null</code> though it may be empty if <code>string</code>
- * is empty or <code>count</code> is zero.
- * @throws IllegalArgumentException if <code>offset</code> is greater than
- * or equal to <code>string.length()</code> or if <code>count</code>
- * is not divisible by two.
- * @see #toHex(byte[], int, int)
- */
- public static byte[] fromHex(final String string, final int offset, final int count) {
- if(offset >= string.length()) throw new IllegalArgumentException("Offset is greater than the length (" + offset + " >= " + string.length() + ").")/*by contract*/;
- if( (count & 0x01) != 0) throw new IllegalArgumentException("Count is not divisible by two (" + count + ").")/*by contract*/;
- final int charCount = Math.min((string.length() - offset), count);
- final int upperBound = offset + charCount;
-
- final byte[] bytes = new byte[charCount >>> 1/*aka /2*/];
- int byteIndex = 0/*beginning*/;
- for(int i=offset; i<upperBound; i+=2) {
- bytes[byteIndex++] = (byte)(( (digit(string.charAt(i)) << 4)
- | digit(string.charAt(i + 1))) & 0xFF);
- }
-
- return bytes;
- }
-
- // ------------------------------------------------------------------------
- /**
- * @param character a hex character to be converted to a <code>byte</code>.
- * This cannot be a character other than [a-fA-F0-9].
- * @return the value of the specified character. This will be a value <code>0</code>
- * through <code>15</code>.
- * @throws IllegalArgumentException if the specified character is not in
- * [a-fA-F0-9]
- */
- private static final int digit(final char character) {
- switch(character) {
- case '0':
- return 0;
- case '1':
- return 1;
- case '2':
- return 2;
- case '3':
- return 3;
- case '4':
- return 4;
- case '5':
- return 5;
- case '6':
- return 6;
- case '7':
- return 7;
- case '8':
- return 8;
- case '9':
- return 9;
- case 'a':
- case 'A':
- return 10;
- case 'b':
- case 'B':
- return 11;
- case 'c':
- case 'C':
- return 12;
- case 'd':
- case 'D':
- return 13;
- case 'e':
- case 'E':
- return 14;
- case 'f':
- case 'F':
- return 15;
-
- default:
- throw new IllegalArgumentException("Character is not in [a-fA-F0-9] ('" + character + "').");
- }
- }
-}
\ Manca newline alla fine del file
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/package-info.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/package-info.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,24 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A fork of <a href="https://github.com/aggregateknowledge/java-hll/">Java-HyperLogLog</a> package tweaked
- * not to depend on fastutil and with cleanups to make it lean and clean.
- */
-package org.apache.solr.util.hll;
-
-
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 2015-07-16 13:22:50.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,154 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A concrete {@link ISchemaVersion} representing schema version one.
- */
-class SchemaVersionOne implements ISchemaVersion {
- /**
- * The schema version number for this instance.
- */
- public static final int SCHEMA_VERSION = 1;
-
- // ------------------------------------------------------------------------
- // Version-specific ordinals (array position) for each of the HLL types
- private static final HLLType[] TYPE_ORDINALS = new HLLType[] {
- HLLType.EMPTY,
- HLLType.EXPLICIT,
- HLLType.SPARSE,
- HLLType.FULL
- };
-
- // ------------------------------------------------------------------------
- // number of header bytes for all HLL types
- private static final int HEADER_BYTE_COUNT = 3;
-
- // sentinel values from the spec for explicit off and auto
- private static final int EXPLICIT_OFF = 0;
- private static final int EXPLICIT_AUTO = 63;
-
- // ************************************************************************
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType)
- */
- @Override
- public int paddingBytes(final HLLType type) {
- return HEADER_BYTE_COUNT;
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata)
- */
- @Override
- public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) {
- final HLLType type = metadata.HLLType();
- final int typeOrdinal = getOrdinal(type);
-
- final int explicitCutoffValue;
- if(metadata.explicitOff()) {
- explicitCutoffValue = EXPLICIT_OFF;
- } else if(metadata.explicitAuto()) {
- explicitCutoffValue = EXPLICIT_AUTO;
- } else {
- explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/;
- }
-
- bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal);
- bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2());
- bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled());
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[])
- */
- @Override
- public IHLLMetadata readMetadata(final byte[] bytes) {
- final byte versionByte = bytes[0];
- final byte parametersByte = bytes[1];
- final byte cutoffByte = bytes[2];
-
- final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte);
- final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte);
- final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF);
- final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO);
- final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/);
-
- return new HLLMetadata(SCHEMA_VERSION,
- getType(typeOrdinal),
- SerializationUtil.registerCountLog2(parametersByte),
- SerializationUtil.registerWidth(parametersByte),
- log2ExplicitCutoff,
- explicitOff,
- explicitAuto,
- SerializationUtil.sparseEnabled(cutoffByte));
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int)
- */
- @Override
- public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) {
- return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type));
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[])
- */
- @Override
- public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) {
- return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes);
- }
-
- /* (non-Javadoc)
- * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber()
- */
- @Override
- public int schemaVersionNumber() {
- return SCHEMA_VERSION;
- }
-
- // ========================================================================
- // Type/Ordinal lookups
- /**
- * Gets the ordinal for the specified {@link HLLType}.
- *
- * @param type the type whose ordinal is desired
- * @return the ordinal for the specified type, to be used in the version byte.
- * This will always be non-negative.
- */
- private static int getOrdinal(final HLLType type) {
- for(int i=0; i<TYPE_ORDINALS.length; i++) {
- if(TYPE_ORDINALS[i].equals(type)) return i;
- }
- throw new RuntimeException("Unknown HLL type " + type);
- }
-
- /**
- * Gets the {@link HLLType} for the specified ordinal.
- *
- * @param ordinal the ordinal whose type is desired
- * @return the type for the specified ordinal. This will never be <code>null</code>.
- */
- private static HLLType getType(final int ordinal) {
- if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) {
- throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed.");
- }
- return TYPE_ORDINALS[ordinal];
- }
-}
diff -Nru solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java
--- solr-5.3.0/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 2015-07-16 12:32:07.000000000 +0200
+++ solr-5.3.0.hll/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,277 +0,0 @@
-package org.apache.solr.util.hll;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A collection of constants and utilities for serializing and deserializing
- * HLLs.
- *
- * NOTE: 'package' visibility is used for many methods that only need to be
- * used by the {@link ISchemaVersion} implementations. The structure of
- * a serialized HLL's metadata should be opaque to the rest of the
- * library.
- */
-class SerializationUtil {
- /**
- * The number of bits (of the parameters byte) dedicated to encoding the
- * width of the registers.
- */
- /*package*/ static int REGISTER_WIDTH_BITS = 3;
-
- /**
- * A mask to cap the maximum value of the register width.
- */
- /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
-
- /**
- * The number of bits (of the parameters byte) dedicated to encoding
- * <code>log2(registerCount)</code>.
- */
- /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
-
- /**
- * A mask to cap the maximum value of <code>log2(registerCount)</code>.
- */
- /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
-
- /**
- * The number of bits (of the cutoff byte) dedicated to encoding the
- * log-base-2 of the explicit cutoff or sentinel values for
- * 'explicit-disabled' or 'auto'.
- */
- /*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
-
- /**
- * A mask to cap the maximum value of the explicit cutoff choice.
- */
- /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
-
- /**
- * Number of bits in a nibble.
- */
- private static int NIBBLE_BITS = 4;
-
- /**
- * A mask to cap the maximum value of a nibble.
- */
- private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
-
- // ************************************************************************
- // Serialization utilities
-
- /**
- * Schema version one (v1).
- */
- public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
-
- /**
- * The default schema version for serializing HLLs.
- */
- public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
-
- /**
- * List of registered schema versions, indexed by their version numbers. If
- * an entry is <code>null</code>, then no such schema version is registered.
- * Similarly, registering a new schema version simply entails assigning an
- * {@link ISchemaVersion} instance to the appropriate index of this array.<p/>
- *
- * By default, only {@link SchemaVersionOne} is registered. Note that version
- * zero will always be reserved for internal (e.g. proprietary, legacy) schema
- * specifications/implementations and will never be assigned to in by this
- * library.
- */
- public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
-
- static {
- REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
- }
-
- /**
- * @param schemaVersionNumber the version number of the {@link ISchemaVersion}
- * desired. This must be a registered schema version number.
- * @return The {@link ISchemaVersion} for the given number. This will never
- * be <code>null</code>.
- */
- public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
- if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
- throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
- }
- final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
- if(schemaVersion == null) {
- throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
- }
- return schemaVersion;
- }
-
- /**
- * Get the appropriate {@link ISchemaVersion schema version} for the specified
- * serialized HLL.
- *
- * @param bytes the serialized HLL whose schema version is desired.
- * @return the schema version for the specified HLL. This will never
- * be <code>null</code>.
- */
- public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
- final byte versionByte = bytes[0];
- final int schemaVersionNumber = schemaVersion(versionByte);
-
- return getSchemaVersion(schemaVersionNumber);
- }
-
- // ************************************************************************
- // Package-specific shared helpers
-
- /**
- * Generates a byte that encodes the schema version and the type ordinal
- * of the HLL.
- *
- * The top nibble is the schema version and the bottom nibble is the type
- * ordinal.
- *
- * @param schemaVersion the schema version to encode.
- * @param typeOrdinal the type ordinal of the HLL to encode.
- * @return the packed version byte
- */
- public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
- return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
- }
- /**
- * Generates a byte that encodes the log-base-2 of the explicit cutoff
- * or sentinel values for 'explicit-disabled' or 'auto', as well as the
- * boolean indicating whether to use {@link HLLType#SPARSE}
- * in the promotion hierarchy.
- *
- * The top bit is always padding, the second highest bit indicates the
- * 'sparse-enabled' boolean, and the lowest six bits encode the explicit
- * cutoff value.
- *
- * @param explicitCutoff the explicit cutoff value to encode.
- * <ul>
- * <li>
- * If 'explicit-disabled' is chosen, this value should be <code>0</code>.
- * </li>
- * <li>
- * If 'auto' is chosen, this value should be <code>63</code>.
- * </li>
- * <li>
- * If a cutoff of 2<sup>n</sup> is desired, for <code>0 <= n < 31</code>,
- * this value should be <code>n + 1</code>.
- * </li>
- * </ul>
- * @param sparseEnabled whether {@link HLLType#SPARSE}
- * should be used in the promotion hierarchy to improve HLL
- * storage.
- *
- * @return the packed cutoff byte
- */
- public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
- final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
- return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
- }
-
- /**
- * Generates a byte that encodes the parameters of a
- * {@link HLLType#FULL} or {@link HLLType#SPARSE}
- * HLL.<p/>
- *
- * The top 3 bits are used to encode <code>registerWidth - 1</code>
- * (range of <code>registerWidth</code> is thus 1-9) and the bottom 5
- * bits are used to encode <code>registerCountLog2</code>
- * (range of <code>registerCountLog2</code> is thus 0-31).
- *
- * @param registerWidth the register width (must be at least 1 and at
- * most 9)
- * @param registerCountLog2 the log-base-2 of the register count (must
- * be at least 0 and at most 31)
- * @return the packed parameters byte
- */
- public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
- final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
- final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
- return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
- }
-
- /**
- * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
- * HLL.
- *
- * @param cutoffByte the cutoff byte of the serialized HLL
- * @return the 'sparse-enabled' boolean
- */
- public static boolean sparseEnabled(final byte cutoffByte) {
- return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
- }
-
- /**
- * Extracts the explicit cutoff value from the cutoff byte of a serialized
- * HLL.
- *
- * @param cutoffByte the cutoff byte of the serialized HLL
- * @return the explicit cutoff value
- */
- public static int explicitCutoff(final byte cutoffByte) {
- return (cutoffByte & EXPLICIT_CUTOFF_MASK);
- }
-
- /**
- * Extracts the schema version from the version byte of a serialized
- * HLL.
- *
- * @param versionByte the version byte of the serialized HLL
- * @return the schema version of the serialized HLL
- */
- public static int schemaVersion(final byte versionByte) {
- return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
- }
-
- /**
- * Extracts the type ordinal from the version byte of a serialized HLL.
- *
- * @param versionByte the version byte of the serialized HLL
- * @return the type ordinal of the serialized HLL
- */
- public static int typeOrdinal(final byte versionByte) {
- return (versionByte & NIBBLE_MASK);
- }
-
- /**
- * Extracts the register width from the parameters byte of a serialized
- * {@link HLLType#FULL} HLL.
- *
- * @param parametersByte the parameters byte of the serialized HLL
- * @return the register width of the serialized HLL
- *
- * @see #packParametersByte(int, int)
- */
- public static int registerWidth(final byte parametersByte) {
- return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
- }
-
- /**
- * Extracts the log2(registerCount) from the parameters byte of a
- * serialized {@link HLLType#FULL} HLL.
- *
- * @param parametersByte the parameters byte of the serialized HLL
- * @return log2(registerCount) of the serialized HLL
- *
- * @see #packParametersByte(int, int)
- */
- public static int registerCountLog2(final byte parametersByte) {
- return (parametersByte & LOG2_REGISTER_COUNT_MASK);
- }
-}