From 753eb9e72f227e06921960a68f6ef932d1435a6d Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Fri, 15 Mar 2019 15:40:55 -0400 Subject: [PATCH] Fix postings list cache causing incorrect results (#1461) --- src/dbnode/storage/index/block_prop_test.go | 173 ++++++++++++++++++ .../storage/index/postings_list_cache.go | 15 +- .../storage/index/postings_list_cache_lru.go | 55 +++--- .../index/postings_list_cache_lru_test.go | 6 +- .../storage/index/postings_list_cache_test.go | 103 ++++++----- .../storage/index/read_through_segment.go | 18 +- .../index/read_through_segment_test.go | 2 +- src/m3ninx/idx/query.go | 7 + src/m3ninx/search/proptest/concurrent_test.go | 2 +- src/m3ninx/search/proptest/prop_test.go | 4 +- src/m3ninx/search/proptest/query_gen.go | 92 ++++++---- 11 files changed, 342 insertions(+), 135 deletions(-) create mode 100644 src/dbnode/storage/index/block_prop_test.go diff --git a/src/dbnode/storage/index/block_prop_test.go b/src/dbnode/storage/index/block_prop_test.go new file mode 100644 index 0000000000..3244ea4bf5 --- /dev/null +++ b/src/dbnode/storage/index/block_prop_test.go @@ -0,0 +1,173 @@ +// +build big +// +// Copyright (c) 2019 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package index + +import ( + "errors" + "fmt" + "math/rand" + "os" + "testing" + "time" + + "github.com/m3db/m3/src/dbnode/storage/bootstrap/result" + "github.com/m3db/m3/src/dbnode/storage/namespace" + "github.com/m3db/m3/src/m3ninx/idx" + "github.com/m3db/m3/src/m3ninx/index/segment" + "github.com/m3db/m3/src/m3ninx/index/segment/fst" + "github.com/m3db/m3/src/m3ninx/search" + "github.com/m3db/m3/src/m3ninx/search/proptest" + "github.com/m3db/m3/src/m3ninx/util" + "github.com/m3db/m3x/instrument" + + "github.com/leanovate/gopter" + "github.com/leanovate/gopter/prop" + "github.com/stretchr/testify/require" +) + +var ( + testFstOptions = fst.NewOptions() + testBlockSize = time.Hour + lotsTestDocuments = util.MustReadDocs("../../../m3ninx/util/testdata/node_exporter.json", 2000) +) + +// TestPostingsListCacheDoesNotAffectBlockQueryResults verifies that the postings list +// cache does not affect the results of querying a block by creating two blocks, one with +// the postings list cache enabled and one without. It then generates a bunch of queries +// and executes them against both blocks, ensuring that both blocks return the exact same +// results. It was added as a regression test when we encountered a bug that caused the +// postings list cache to cause the block to return incorrect results. +// +// It also generates term and regexp queries where the field and pattern are the same to +// ensure that the postings list cache correctly handles caching the results of these +// different types of queries (despite having the same field and "pattern") separately. +func TestPostingsListCacheDoesNotAffectBlockQueryResults(t *testing.T) { + parameters := gopter.DefaultTestParameters() + seed := time.Now().UnixNano() + parameters.MinSuccessfulTests = 500 + parameters.MaxSize = 20 + parameters.Rng = rand.New(rand.NewSource(seed)) + properties := gopter.NewProperties(parameters) + + testMD := newTestNSMetadata(t) + blockSize := time.Hour + + now := time.Now() + blockStart := now.Truncate(blockSize) + + uncachedBlock, err := newPropTestBlock( + t, blockStart, testMD, testOpts.SetPostingsListCache(nil)) + require.NoError(t, err) + + plCache, stopReporting, err := NewPostingsListCache(1000, PostingsListCacheOptions{ + InstrumentOptions: instrument.NewOptions(), + }) + require.NoError(t, err) + defer stopReporting() + + cachedOptions := testOpts. + SetPostingsListCache(plCache). + SetReadThroughSegmentOptions(ReadThroughSegmentOptions{ + CacheRegexp: true, + CacheTerms: true, + }) + cachedBlock, err := newPropTestBlock(t, blockStart, testMD, cachedOptions) + require.NoError(t, err) + + properties.Property("Index block with and without postings list cache always return the same results", prop.ForAll( + func(q search.Query, identicalTermAndRegexp []search.Query) (bool, error) { + queries := []search.Query{ + q, + identicalTermAndRegexp[0], + identicalTermAndRegexp[1], + } + + for _, q := range queries { + indexQuery := Query{ + idx.NewQueryFromSearchQuery(q), + } + + uncachedResults := NewResults(testOpts) + exhaustive, err := uncachedBlock.Query(indexQuery, QueryOptions{StartInclusive: blockStart, EndExclusive: blockStart.Add(blockSize)}, uncachedResults) + if err != nil { + return false, fmt.Errorf("error querying uncached block: %v", err) + } + if !exhaustive { + return false, errors.New("querying uncached block was not exhaustive") + } + + cachedResults := NewResults(testOpts) + exhaustive, err = cachedBlock.Query(indexQuery, QueryOptions{StartInclusive: blockStart, EndExclusive: blockStart.Add(blockSize)}, cachedResults) + if err != nil { + return false, fmt.Errorf("error querying cached block: %v", err) + } + if !exhaustive { + return false, errors.New("querying cached block was not exhaustive") + } + + uncachedMap := uncachedResults.Map() + cachedMap := cachedResults.Map() + if uncachedMap.Len() != cachedMap.Len() { + return false, fmt.Errorf( + "uncached map size was: %d, but cached map sized was: %d", + uncachedMap.Len(), cachedMap.Len()) + } + + for _, entry := range uncachedMap.Iter() { + key := entry.Key() + _, ok := cachedMap.Get(key) + if !ok { + return false, fmt.Errorf("cached map did not contain: %v", key) + } + } + } + + return true, nil + }, + proptest.GenQuery(lotsTestDocuments), + proptest.GenIdenticalTermAndRegexpQuery(lotsTestDocuments), + )) + + reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) + if !properties.Run(reporter) { + t.Errorf("failed with initial seed: %d", seed) + } +} + +func newPropTestBlock(t *testing.T, blockStart time.Time, nsMeta namespace.Metadata, opts Options) (Block, error) { + blk, err := NewBlock(blockStart, nsMeta, opts) + require.NoError(t, err) + + var ( + memSeg = testSegment(t, lotsTestDocuments...).(segment.MutableSegment) + fstSeg = fst.ToTestSegment(t, memSeg, testFstOptions) + // Need at least one shard to look fulfilled. + fulfilled = result.NewShardTimeRanges(blockStart, blockStart.Add(testBlockSize), uint32(1)) + indexBlock = result.NewIndexBlock(blockStart, []segment.Segment{fstSeg}, fulfilled) + ) + // Use the AddResults API because thats the only scenario in which we'll wrap a segment + // in a ReadThroughSegment to use the postings list cache. + err = blk.AddResults(indexBlock) + require.NoError(t, err) + return blk, nil +} diff --git a/src/dbnode/storage/index/postings_list_cache.go b/src/dbnode/storage/index/postings_list_cache.go index 1dae6f28ff..c478bf3f08 100644 --- a/src/dbnode/storage/index/postings_list_cache.go +++ b/src/dbnode/storage/index/postings_list_cache.go @@ -89,10 +89,12 @@ func NewPostingsListCache(size int, opts PostingsListCacheOptions) (*PostingsLis // GetRegexp returns the cached results for the provided regexp query, if any. func (q *PostingsListCache) GetRegexp( segmentUUID uuid.UUID, + field string, pattern string, ) (postings.List, bool) { return q.get( segmentUUID, + field, pattern, PatternTypeRegexp) } @@ -100,22 +102,25 @@ func (q *PostingsListCache) GetRegexp( // GetTerm returns the cached results for the provided term query, if any. func (q *PostingsListCache) GetTerm( segmentUUID uuid.UUID, + field string, pattern string, ) (postings.List, bool) { return q.get( segmentUUID, + field, pattern, PatternTypeTerm) } func (q *PostingsListCache) get( segmentUUID uuid.UUID, + field string, pattern string, patternType PatternType, ) (postings.List, bool) { // No RLock because a Get() operation mutates the LRU. q.Lock() - p, ok := q.lru.Get(segmentUUID, pattern, patternType) + p, ok := q.lru.Get(segmentUUID, field, pattern, patternType) q.Unlock() q.emitCacheGetMetrics(patternType, ok) @@ -130,23 +135,26 @@ func (q *PostingsListCache) get( // PutRegexp updates the LRU with the result of the regexp query. func (q *PostingsListCache) PutRegexp( segmentUUID uuid.UUID, + field string, pattern string, pl postings.List, ) { - q.put(segmentUUID, pattern, PatternTypeRegexp, pl) + q.put(segmentUUID, field, pattern, PatternTypeRegexp, pl) } // PutTerm updates the LRU with the result of the term query. func (q *PostingsListCache) PutTerm( segmentUUID uuid.UUID, + field string, pattern string, pl postings.List, ) { - q.put(segmentUUID, pattern, PatternTypeTerm, pl) + q.put(segmentUUID, field, pattern, PatternTypeTerm, pl) } func (q *PostingsListCache) put( segmentUUID uuid.UUID, + field string, pattern string, patternType PatternType, pl postings.List, @@ -154,6 +162,7 @@ func (q *PostingsListCache) put( q.Lock() q.lru.Add( segmentUUID, + field, pattern, patternType, pl, diff --git a/src/dbnode/storage/index/postings_list_cache_lru.go b/src/dbnode/storage/index/postings_list_cache_lru.go index 6a687da5eb..b7c9d9be4f 100644 --- a/src/dbnode/storage/index/postings_list_cache_lru.go +++ b/src/dbnode/storage/index/postings_list_cache_lru.go @@ -30,11 +30,12 @@ import ( ) // PostingsListLRU implements a non-thread safe fixed size LRU cache of postings lists -// that were resolved by running a given query against a particular segment. Normally -// an key in the LRU would look like: +// that were resolved by running a given query against a particular segment for a given +// field and pattern type (term vs regexp). Normally a key in the LRU would look like: // // type key struct { // segmentUUID uuid.UUID +// field string // pattern string // patternType PatternType // } @@ -51,7 +52,7 @@ import ( // Instead of adding additional tracking on-top of an existing generic LRU, we've created a // specialized LRU that instead of having a single top-level map pointing into the linked-list, // has a two-level map where the top level map is keyed by segment UUID and the second level map -// is keyed by pattern and pattern type. +// is keyed by the field/pattern/patternType. // // As a result, when a segment is ready to be closed, they can call into the cache with their // UUID and we can efficiently remove all the entries corresponding to that segment from the @@ -60,24 +61,18 @@ import ( type postingsListLRU struct { size int evictList *list.List - items map[uuid.Array]map[patternAndPatternType]*list.Element + items map[uuid.Array]map[key]*list.Element } // entry is used to hold a value in the evictList. type entry struct { uuid uuid.UUID - pattern string - patternType PatternType + key key postingsList postings.List } type key struct { - uuid uuid.UUID - pattern string - patternType PatternType -} - -type patternAndPatternType struct { + field string pattern string patternType PatternType } @@ -91,22 +86,23 @@ func newPostingsListLRU(size int) (*postingsListLRU, error) { return &postingsListLRU{ size: size, evictList: list.New(), - items: make(map[uuid.Array]map[patternAndPatternType]*list.Element), + items: make(map[uuid.Array]map[key]*list.Element), }, nil } // Add adds a value to the cache. Returns true if an eviction occurred. func (c *postingsListLRU) Add( segmentUUID uuid.UUID, + field string, pattern string, patternType PatternType, pl postings.List, ) (evicted bool) { + newKey := newKey(field, pattern, patternType) // Check for existing item. uuidArray := segmentUUID.Array() if uuidEntries, ok := c.items[uuidArray]; ok { - key := newPatternAndPatternType(pattern, patternType) - if ent, ok := uuidEntries[key]; ok { + if ent, ok := uuidEntries[newKey]; ok { // If it already exists, just move it to the front. This avoids storing // the same item in the LRU twice which is important because the maps // can only point to one entry at a time and we use them for purges. Also, @@ -121,18 +117,16 @@ func (c *postingsListLRU) Add( var ( ent = &entry{ uuid: segmentUUID, - pattern: pattern, - patternType: patternType, + key: newKey, postingsList: pl, } entry = c.evictList.PushFront(ent) - key = newPatternAndPatternType(pattern, patternType) ) - if patterns, ok := c.items[uuidArray]; ok { - patterns[key] = entry + if queries, ok := c.items[uuidArray]; ok { + queries[newKey] = entry } else { - c.items[uuidArray] = map[patternAndPatternType]*list.Element{ - key: entry, + c.items[uuidArray] = map[key]*list.Element{ + newKey: entry, } } @@ -147,13 +141,14 @@ func (c *postingsListLRU) Add( // Get looks up a key's value from the cache. func (c *postingsListLRU) Get( segmentUUID uuid.UUID, + field string, pattern string, patternType PatternType, ) (postings.List, bool) { + newKey := newKey(field, pattern, patternType) uuidArray := segmentUUID.Array() if uuidEntries, ok := c.items[uuidArray]; ok { - key := newPatternAndPatternType(pattern, patternType) - if ent, ok := uuidEntries[key]; ok { + if ent, ok := uuidEntries[newKey]; ok { c.evictList.MoveToFront(ent) return ent.Value.(*entry).postingsList, true } @@ -166,13 +161,14 @@ func (c *postingsListLRU) Get( // key was contained. func (c *postingsListLRU) Remove( segmentUUID uuid.UUID, + field string, pattern string, patternType PatternType, ) bool { + newKey := newKey(field, pattern, patternType) uuidArray := segmentUUID.Array() if uuidEntries, ok := c.items[uuidArray]; ok { - key := newPatternAndPatternType(pattern, patternType) - if ent, ok := uuidEntries[key]; ok { + if ent, ok := uuidEntries[newKey]; ok { c.removeElement(ent) return true } @@ -208,14 +204,13 @@ func (c *postingsListLRU) removeElement(e *list.Element) { entry := e.Value.(*entry) if patterns, ok := c.items[entry.uuid.Array()]; ok { - key := newPatternAndPatternType(entry.pattern, entry.patternType) - delete(patterns, key) + delete(patterns, entry.key) if len(patterns) == 0 { delete(c.items, entry.uuid.Array()) } } } -func newPatternAndPatternType(pattern string, patternType PatternType) patternAndPatternType { - return patternAndPatternType{pattern: pattern, patternType: patternType} +func newKey(field, pattern string, patternType PatternType) key { + return key{field: field, pattern: pattern, patternType: patternType} } diff --git a/src/dbnode/storage/index/postings_list_cache_lru_test.go b/src/dbnode/storage/index/postings_list_cache_lru_test.go index 558020527f..2fb384e767 100644 --- a/src/dbnode/storage/index/postings_list_cache_lru_test.go +++ b/src/dbnode/storage/index/postings_list_cache_lru_test.go @@ -26,11 +26,7 @@ func (c *postingsListLRU) keys() []key { keys := make([]key, 0, len(c.items)) for ent := c.evictList.Back(); ent != nil; ent = ent.Prev() { entry := ent.Value.(*entry) - keys = append(keys, key{ - uuid: entry.uuid, - pattern: entry.pattern, - patternType: entry.patternType, - }) + keys = append(keys, entry.key) } return keys } diff --git a/src/dbnode/storage/index/postings_list_cache_test.go b/src/dbnode/storage/index/postings_list_cache_test.go index 753ebb9106..0bbe8b50a8 100644 --- a/src/dbnode/storage/index/postings_list_cache_test.go +++ b/src/dbnode/storage/index/postings_list_cache_test.go @@ -50,10 +50,14 @@ var ( func init() { // Generate test data. for i := 0; i < numTestPlEntries; i++ { - segmentUUID := uuid.Parse( - fmt.Sprintf("00000000-0000-0000-0000-000000000%03d", i)) - pattern := fmt.Sprintf("%d", i) - pl := roaring.NewPostingsList() + var ( + segmentUUID = uuid.Parse( + fmt.Sprintf("00000000-0000-0000-0000-000000000%03d", i)) + + field = fmt.Sprintf("field_%d", i) + pattern = fmt.Sprintf("pattern_%d", i) + pl = roaring.NewPostingsList() + ) pl.Insert(postings.ID(i)) patternType := PatternTypeRegexp @@ -62,8 +66,7 @@ func init() { } testPlEntries = append(testPlEntries, testEntry{ segmentUUID: segmentUUID, - pattern: pattern, - patternType: patternType, + key: newKey(field, pattern, patternType), postingsList: pl, }) } @@ -71,8 +74,7 @@ func init() { type testEntry struct { segmentUUID uuid.UUID - pattern string - patternType PatternType + key key postingsList postings.List } @@ -93,17 +95,10 @@ func TestSimpleLRUBehavior(t *testing.T) { putEntry(plCache, 0) putEntry(plCache, 1) putEntry(plCache, 2) - - expectedOrder := []string{e0.pattern, e1.pattern, e2.pattern} - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e0, e1, e2}) putEntry(plCache, 3) - expectedOrder = []string{e1.pattern, e2.pattern, e3.pattern} - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e1, e2, e3}) putEntry(plCache, 4) putEntry(plCache, 4) @@ -111,34 +106,22 @@ func TestSimpleLRUBehavior(t *testing.T) { putEntry(plCache, 5) putEntry(plCache, 0) putEntry(plCache, 0) - - expectedOrder = []string{e4.pattern, e5.pattern, e0.pattern} - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e4, e5, e0}) // Miss, no expected change. getEntry(plCache, 100) - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e4, e5, e0}) // Hit. getEntry(plCache, 4) - expectedOrder = []string{e5.pattern, e0.pattern, e4.pattern} - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e5, e0, e4}) // Multiple hits. getEntry(plCache, 4) getEntry(plCache, 0) getEntry(plCache, 5) getEntry(plCache, 5) - expectedOrder = []string{e4.pattern, e0.pattern, e5.pattern} - for i, key := range plCache.lru.keys() { - require.Equal(t, expectedOrder[i], key.pattern) - } + requireExpectedOrder(t, plCache, []testEntry{e4, e0, e5}) } func TestPurgeSegment(t *testing.T) { @@ -149,16 +132,18 @@ func TestPurgeSegment(t *testing.T) { // Write many entries with the same segment UUID. for i := 0; i < 100; i++ { - if testPlEntries[i].patternType == PatternTypeRegexp { + if testPlEntries[i].key.patternType == PatternTypeRegexp { plCache.PutRegexp( testPlEntries[0].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) } else { plCache.PutTerm( testPlEntries[0].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) } @@ -175,16 +160,18 @@ func TestPurgeSegment(t *testing.T) { // All entries related to the purged segment should be gone. require.Equal(t, size-100, plCache.lru.Len()) for i := 0; i < 100; i++ { - if testPlEntries[i].patternType == PatternTypeRegexp { + if testPlEntries[i].key.patternType == PatternTypeRegexp { _, ok := plCache.GetRegexp( testPlEntries[0].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, ) require.False(t, ok) } else { _, ok := plCache.GetTerm( testPlEntries[0].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, ) require.False(t, ok) } @@ -284,62 +271,74 @@ func testConcurrency(t *testing.T, size int, purge bool, verify bool) { func putEntry(cache *PostingsListCache, i int) { // Do each put twice to test the logic that avoids storing // multiple entries for the same value. - if testPlEntries[i].patternType == PatternTypeRegexp { + if testPlEntries[i].key.patternType == PatternTypeRegexp { cache.PutRegexp( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) cache.PutRegexp( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) } else { cache.PutTerm( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) cache.PutTerm( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, testPlEntries[i].postingsList, ) } } func getEntry(cache *PostingsListCache, i int) (postings.List, bool) { - if testPlEntries[i].patternType == PatternTypeRegexp { + if testPlEntries[i].key.patternType == PatternTypeRegexp { return cache.GetRegexp( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, ) } return cache.GetTerm( testPlEntries[i].segmentUUID, - testPlEntries[i].pattern, + testPlEntries[i].key.field, + testPlEntries[i].key.pattern, ) } +func requireExpectedOrder(t *testing.T, plCache *PostingsListCache, expectedOrder []testEntry) { + for i, key := range plCache.lru.keys() { + require.Equal(t, expectedOrder[i].key, key) + } +} + func printSortedKeys(t *testing.T, cache *PostingsListCache) { keys := cache.lru.keys() sort.Slice(keys, func(i, j int) bool { - iIdx, err := strconv.ParseInt(keys[i].pattern, 10, 64) + iIdx, err := strconv.ParseInt(keys[i].field, 10, 64) if err != nil { - t.Fatalf("unable to parse: %s into int", keys[i].pattern) + t.Fatalf("unable to parse: %s into int", keys[i].field) } - jIdx, err := strconv.ParseInt(keys[j].pattern, 10, 64) + jIdx, err := strconv.ParseInt(keys[j].field, 10, 64) if err != nil { - t.Fatalf("unable to parse: %s into int", keys[i].pattern) + t.Fatalf("unable to parse: %s into int", keys[i].field) } return iIdx < jIdx }) for _, key := range keys { - fmt.Println("key: ", key.pattern) + fmt.Println("key: ", key) } } diff --git a/src/dbnode/storage/index/read_through_segment.go b/src/dbnode/storage/index/read_through_segment.go index 1ef6cfc0a4..8a3b1c833c 100644 --- a/src/dbnode/storage/index/read_through_segment.go +++ b/src/dbnode/storage/index/read_through_segment.go @@ -148,16 +148,17 @@ func (s *readThroughSegmentReader) MatchRegexp( return s.Reader.MatchRegexp(field, c) } - // TODO(rartoul): Would be nice not to allocate a string here. - pattern := c.FSTSyntax.String() - pl, ok := s.postingsListCache.GetRegexp(s.uuid, pattern) + // TODO(rartoul): Would be nice to not allocate strings here. + fieldStr := string(field) + patternStr := c.FSTSyntax.String() + pl, ok := s.postingsListCache.GetRegexp(s.uuid, fieldStr, patternStr) if ok { return pl, nil } pl, err := s.Reader.MatchRegexp(field, c) if err == nil { - s.postingsListCache.PutRegexp(s.uuid, pattern, pl) + s.postingsListCache.PutRegexp(s.uuid, fieldStr, patternStr, pl) } return pl, err } @@ -171,16 +172,17 @@ func (s *readThroughSegmentReader) MatchTerm( return s.Reader.MatchTerm(field, term) } - // TODO(rartoul): Would be nice to not allocate a string here. - termString := string(term) - pl, ok := s.postingsListCache.GetTerm(s.uuid, termString) + // TODO(rartoul): Would be nice to not allocate strings here. + fieldStr := string(field) + patternStr := string(term) + pl, ok := s.postingsListCache.GetTerm(s.uuid, fieldStr, patternStr) if ok { return pl, nil } pl, err := s.Reader.MatchTerm(field, term) if err == nil { - s.postingsListCache.PutTerm(s.uuid, termString, pl) + s.postingsListCache.PutTerm(s.uuid, fieldStr, patternStr, pl) } return pl, err } diff --git a/src/dbnode/storage/index/read_through_segment_test.go b/src/dbnode/storage/index/read_through_segment_test.go index 7acf5dae53..ee7e0ae72c 100644 --- a/src/dbnode/storage/index/read_through_segment_test.go +++ b/src/dbnode/storage/index/read_through_segment_test.go @@ -277,7 +277,7 @@ func TestClose(t *testing.T) { // Store an entry for the segment in the cache so we can check if it // gets purged after. - cache.PutRegexp(segmentUUID, "some-regexp", roaring.NewPostingsList()) + cache.PutRegexp(segmentUUID, "some-field", "some-pattern", roaring.NewPostingsList()) segment.EXPECT().Close().Return(nil) err = readThroughSeg.Close() diff --git a/src/m3ninx/idx/query.go b/src/m3ninx/idx/query.go index 659e1641a0..feecd85383 100644 --- a/src/m3ninx/idx/query.go +++ b/src/m3ninx/idx/query.go @@ -34,6 +34,13 @@ func (q Query) String() string { return q.query.String() } +// NewQueryFromSearchQuery creates a new Query from a search.Query. +func NewQueryFromSearchQuery(q search.Query) Query { + return Query{ + query: q, + } +} + // NewTermQuery returns a new query for finding documents which match a term exactly. func NewTermQuery(field, term []byte) Query { return Query{ diff --git a/src/m3ninx/search/proptest/concurrent_test.go b/src/m3ninx/search/proptest/concurrent_test.go index 19b091c8d3..9d728ab3a1 100644 --- a/src/m3ninx/search/proptest/concurrent_test.go +++ b/src/m3ninx/search/proptest/concurrent_test.go @@ -93,7 +93,7 @@ func TestConcurrentQueries(t *testing.T) { return true, nil }, - genQuery(lotsTestDocuments), + GenQuery(lotsTestDocuments), )) reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) diff --git a/src/m3ninx/search/proptest/prop_test.go b/src/m3ninx/search/proptest/prop_test.go index 31dbbe8de5..0ecc89440e 100644 --- a/src/m3ninx/search/proptest/prop_test.go +++ b/src/m3ninx/search/proptest/prop_test.go @@ -84,7 +84,7 @@ func TestSegmentDistributionDoesNotAffectQuery(t *testing.T) { return true, nil }, genPropTestInput(len(lotsTestDocuments)), - genQuery(lotsTestDocuments), + GenQuery(lotsTestDocuments), )) reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) @@ -132,7 +132,7 @@ func TestFSTSimpleSegmentsQueryTheSame(t *testing.T) { return true, nil }, - genQuery(lotsTestDocuments), + GenQuery(lotsTestDocuments), )) reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) diff --git a/src/m3ninx/search/proptest/query_gen.go b/src/m3ninx/search/proptest/query_gen.go index 103b5076ae..bbe6987e85 100644 --- a/src/m3ninx/search/proptest/query_gen.go +++ b/src/m3ninx/search/proptest/query_gen.go @@ -32,29 +32,49 @@ import ( "github.com/leanovate/gopter/gen" ) -func genTermQuery(docs []doc.Document) gopter.Gen { +// GenTermQuery generates a term query. +func GenTermQuery(docs []doc.Document) gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { - docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve() - if !ok { - panic("unable to generate term query") // should never happen - } - docID := docIDRes.(int) + fieldName, fieldValue := fieldNameAndValue(genParams, docs) + q := query.NewTermQuery(fieldName, fieldValue) + return gopter.NewGenResult(q, gopter.NoShrinker) + } +} - doc := docs[docID] - fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve() - if !ok { - panic("unable to generate term query fields") // should never happen - } +func fieldNameAndValue(genParams *gopter.GenParameters, docs []doc.Document) ([]byte, []byte) { + docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate term query") // should never happen + } + docID := docIDRes.(int) - fieldID := fieldRes.(int) - field := doc.Fields[fieldID] + doc := docs[docID] + fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve() + if !ok { + panic("unable to generate term query fields") // should never happen + } - q := query.NewTermQuery(field.Name, field.Value) - return gopter.NewGenResult(q, gopter.NoShrinker) + fieldID := fieldRes.(int) + field := doc.Fields[fieldID] + return field.Name, field.Value +} + +// GenIdenticalTermAndRegexpQuery generates a term query and regexp query with +// the exact same underlying field and pattern. +func GenIdenticalTermAndRegexpQuery(docs []doc.Document) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + fieldName, fieldValue := fieldNameAndValue(genParams, docs) + termQ := query.NewTermQuery(fieldName, fieldValue) + regexpQ, err := query.NewRegexpQuery(fieldName, fieldValue) + if err != nil { + panic(err) + } + return gopter.NewGenResult([]search.Query{termQ, regexpQ}, gopter.NoShrinker) } } -func genRegexpQuery(docs []doc.Document) gopter.Gen { +// GenRegexpQuery generates a regexp query. +func GenRegexpQuery(docs []doc.Document) gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve() if !ok { @@ -102,45 +122,51 @@ func genRegexpQuery(docs []doc.Document) gopter.Gen { } } -func genNegationQuery(docs []doc.Document) gopter.Gen { +// GenNegationQuery generates a negation query. +func GenNegationQuery(docs []doc.Document) gopter.Gen { return gen.OneGenOf( - genTermQuery(docs), - genRegexpQuery(docs), + GenTermQuery(docs), + GenRegexpQuery(docs), ). Map(func(q search.Query) search.Query { return query.NewNegationQuery(q) }) } -func genConjuctionQuery(docs []doc.Document) gopter.Gen { +// GenConjunctionQuery generates a conjunction query. +func GenConjunctionQuery(docs []doc.Document) gopter.Gen { return gen.SliceOf( gen.OneGenOf( - genTermQuery(docs), - genRegexpQuery(docs), - genNegationQuery(docs)), + GenTermQuery(docs), + GenRegexpQuery(docs), + GenNegationQuery(docs)), reflect.TypeOf((*search.Query)(nil)).Elem()). Map(func(qs []search.Query) search.Query { return query.NewConjunctionQuery(qs) }) } -func genDisjunctionQuery(docs []doc.Document) gopter.Gen { +// GenDisjunctionQuery generates a disjunction query. +func GenDisjunctionQuery(docs []doc.Document) gopter.Gen { return gen.SliceOf( gen.OneGenOf( - genTermQuery(docs), - genRegexpQuery(docs), - genNegationQuery(docs)), + GenTermQuery(docs), + GenRegexpQuery(docs), + GenNegationQuery(docs)), reflect.TypeOf((*search.Query)(nil)).Elem()). Map(func(qs []search.Query) search.Query { return query.NewDisjunctionQuery(qs) }) } -func genQuery(docs []doc.Document) gopter.Gen { +// GenQuery generates a query. +func GenQuery(docs []doc.Document) gopter.Gen { return gen.OneGenOf( - genTermQuery(docs), - genRegexpQuery(docs), - genNegationQuery(docs), - genConjuctionQuery(docs), - genDisjunctionQuery(docs)) + GenTermQuery(docs), + GenRegexpQuery(docs), + GenNegationQuery(docs), + GenConjunctionQuery(docs), + GenDisjunctionQuery(docs)) } + +// Ge