diff --git a/nip45/helpers.go b/nip45/helpers.go index 9c82cf1..208d521 100644 --- a/nip45/helpers.go +++ b/nip45/helpers.go @@ -1,23 +1,19 @@ package nip45 -import "math" +import ( + "math" +) const two32 = 1 << 32 -// Extract bits from uint64 using LSB 0 numbering, including lo. -func eb(bits uint64, hi uint8, lo uint8) uint64 { - m := uint64(((1 << (hi - lo)) - 1) << lo) - return (bits & m) >> lo -} - func linearCounting(m uint32, v uint32) float64 { fm := float64(m) return fm * math.Log(fm/float64(v)) } -func clz64(x uint64) uint8 { +func clz56(x uint64) uint8 { var c uint8 - for m := uint64(1 << 63); m&x == 0 && m != 0; m >>= 1 { + for m := uint64(1 << 55); m&x == 0 && m != 0; m >>= 1 { c++ } return c @@ -32,25 +28,3 @@ func countZeros(s []uint8) uint32 { } return c } - -func calculateEstimate(s []uint8) float64 { - sum := 0.0 - for _, val := range s { - sum += 1.0 / float64(uint64(1)< 16 || precision < 4 { - return nil, fmt.Errorf("precision must be between 4 and 16") - } - +func New() *HyperLogLog { + // precision is always 8 + // the number of registers is always 256 (1<<8) hll := &HyperLogLog{} - hll.precision = precision - hll.registers = make([]uint8, 1< hll.registers[i] { - hll.registers[i] = zeroBits + if zeroBits > hll.registers[j] { + hll.registers[j] = zeroBits } } func (hll *HyperLogLog) Merge(other *HyperLogLog) error { - if hll.precision != other.precision { - return fmt.Errorf("precisions must be equal") - } - for i, v := range other.registers { if v > hll.registers[i] { hll.registers[i] = v } } - return nil } func (hll *HyperLogLog) Count() uint64 { - m := uint32(len(hll.registers)) + v := countZeros(hll.registers) - if v := countZeros(hll.registers); v != 0 { - lc := linearCounting(m, v) - if lc <= float64(threshold[hll.precision-4]) { + if v != 0 { + lc := linearCounting(256 /* nregisters */, v) + + if lc <= 220 /* threshold */ { return uint64(lc) } } - est := calculateEstimate(hll.registers) - if est <= float64(len(hll.registers))*5.0 { - if v := countZeros(hll.registers); v != 0 { - return uint64(linearCounting(m, v)) + est := hll.calculateEstimate() + if est <= 256 /* nregisters */ *3 { + if v != 0 { + return uint64(linearCounting(256 /* nregisters */, v)) } } return uint64(est) } -func (hll *HyperLogLog) estimateBias(est float64) float64 { - estTable, biasTable := rawEstimateData[hll.precision-4], biasData[hll.precision-4] - - if estTable[0] > est { - return biasTable[0] - } - - lastEstimate := estTable[len(estTable)-1] - if lastEstimate < est { - return biasTable[len(biasTable)-1] +func (hll HyperLogLog) calculateEstimate() float64 { + sum := 0.0 + for _, val := range hll.registers { + sum += 1.0 / float64(uint64(1)< %d)", hll.Count(), count) + c := hll.Count() + res100 := int(c * 100) + require.Greater(t, res100, count*85, "result too low (actual %d < %d)", c, count) + require.Less(t, res100, count*115, "result too high (actual %d > %d)", c, count) } } @@ -45,8 +46,8 @@ func TestHyperLogLogMerge(t *testing.T) { 777, 922, 1000, 1500, 2222, 9999, 13600, 80000, 133333, 200000, } { - hllA, _ := New(8) - hllB, _ := New(8) + hllA := New() + hllB := New() for range count / 2 { b := make([]byte, 32) @@ -65,7 +66,7 @@ func TestHyperLogLogMerge(t *testing.T) { hllB.Add(id) } - hll, _ := New(8) + hll := New() hll.Merge(hllA) hll.Merge(hllB) @@ -76,7 +77,7 @@ func TestHyperLogLogMerge(t *testing.T) { } func TestHyperLogLogMergeComplex(t *testing.T) { - rand := rand.New(rand.NewPCG(2, 0)) + rand := rand.New(rand.NewPCG(4, 0)) for _, count := range []int{ 3, 6, 9, 12, 15, 22, 36, 46, 57, @@ -85,9 +86,9 @@ func TestHyperLogLogMergeComplex(t *testing.T) { 777, 922, 1000, 1500, 2222, 9999, 13600, 80000, 133333, 200000, } { - hllA, _ := New(8) - hllB, _ := New(8) - hllC, _ := New(8) + hllA := New() + hllB := New() + hllC := New() for range count / 3 { b := make([]byte, 32) @@ -117,7 +118,7 @@ func TestHyperLogLogMergeComplex(t *testing.T) { hllA.Add(id) } - hll, _ := New(8) + hll := New() hll.Merge(hllA) hll.Merge(hllB) hll.Merge(hllC)