forked from Yawning/bloom
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom_test.go
152 lines (132 loc) · 3.98 KB
/
bloom_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// bloom_test.go - Bloom filter tests.
// Written in 2017 by Yawning Angel
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along
// with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
package bloom
import (
"bytes"
"compress/zlib"
"crypto/rand"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilter(t *testing.T) {
const (
entryLength = 32
filterSize = 15 // 2^15 bits = 4 KiB
expectedEntries = 1024
)
assert := assert.New(t)
require := require.New(t)
// 4 KiB filter, 1/2^5 (32 bit) load
f, err := New(rand.Reader, filterSize, .03125)
require.NoError(err, "New()")
assert.Equal(0, f.Entries(), "Entries(), empty filter")
// Assert that the bloom filter math is correct.
assert.Equal(expectedEntries, f.MaxEntries(), "Max entries")
// Generate enough entries to fully saturate the filter.
max := f.MaxEntries()
entries := make(map[[entryLength]byte]bool)
for count := 0; count < max; {
var ent [entryLength]byte
rand.Read(ent[:])
// This needs to ignore false positives.
if !f.TestAndSet(ent[:]) {
entries[ent] = true
count++
}
}
assert.Equal(max, f.Entries(), "After populating")
// Ensure that all the entries are present in the filter.
idx := 0
for ent := range entries {
assert.True(f.Test(ent[:]), "Test(ent #: %v)", idx)
assert.True(f.TestAndSet(ent[:]), "TestAndSet(ent #: %v)", idx)
idx++
}
// Test the false positive rate, by generating another set of entries
// NOT in the filter, and counting the false positives.
//
// This may have suprious failures once in a blue moon because the
// algorithm is probabalistic, but that's *exceedingly* unlikely with
// the chosen delta.
randomEntries := make(map[[entryLength]byte]bool)
for count := 0; count < max; {
var ent [entryLength]byte
rand.Read(ent[:])
if !entries[ent] && !randomEntries[ent] {
randomEntries[ent] = true
count++
}
}
falsePositives := 0
for ent := range randomEntries {
if f.Test(ent[:]) {
falsePositives++
}
}
observedP := float64(falsePositives) / float64(max)
t.Logf("Observed False Positive Rate: %v", observedP)
//assert.Lessf(observedP, 0.02, "False positive rate")
assert.Equal(max, f.Entries(), "After tests") // Should still be = max.
}
func TestFilterCompression(t *testing.T) {
const (
entryLength = 32
entries = 20 // 2^20 bits = 1m bits
clientcnt = 1000
)
assert := assert.New(t)
require := require.New(t)
for _, epochs := range []int{1, 5, 10, 20} {
for i, load := range []int{1,2,4,8,16,32} {
l := float64(1) / float64(load)
f, err := New(rand.Reader, entries + i, l)
require.NoError(err, "New()")
assert.Equal(0, f.Entries(), "Entries(), empty filter")
fmt.Printf("%d layer; bits/el %d: ", epochs*clientcnt, load)
entries := make(map[[entryLength]byte]bool)
for count := 0; count < epochs * clientcnt; {
var ent [entryLength]byte
rand.Read(ent[:])
// This needs to ignore false positives.
if !f.TestAndSet(ent[:]) {
entries[ent] = true
count++
}
}
layer := f.Delta()
var b bytes.Buffer
w := zlib.NewWriter(&b)
w.Write(layer)
w.Close()
// we now have a compressed size.
fmt.Printf("%d bytes. ", len(b.Bytes()))
// Now try a bunch of requests to understand the false positive rate.
randomEntries := make(map[[entryLength]byte]bool)
for count := 0; count < clientcnt * 100; {
var ent [entryLength]byte
rand.Read(ent[:])
if !entries[ent] && !randomEntries[ent] {
randomEntries[ent] = true
count++
}
}
falsePositives := 0
for ent := range randomEntries {
if f.Test(ent[:]) {
falsePositives++
}
}
observedP := float64(falsePositives) / float64(clientcnt * 100)
fmt.Printf("fp: %f\n", observedP)
}
}
}