From 753eb9e72f227e06921960a68f6ef932d1435a6d Mon Sep 17 00:00:00 2001
From: Richard Artoul <richardartoul@gmail.com>
Date: Fri, 15 Mar 2019 15:40:55 -0400
Subject: [PATCH] Fix postings list cache causing incorrect results (#1461)

---
 src/dbnode/storage/index/block_prop_test.go   | 173 ++++++++++++++++++
 .../storage/index/postings_list_cache.go      |  15 +-
 .../storage/index/postings_list_cache_lru.go  |  55 +++---
 .../index/postings_list_cache_lru_test.go     |   6 +-
 .../storage/index/postings_list_cache_test.go | 103 ++++++-----
 .../storage/index/read_through_segment.go     |  18 +-
 .../index/read_through_segment_test.go        |   2 +-
 src/m3ninx/idx/query.go                       |   7 +
 src/m3ninx/search/proptest/concurrent_test.go |   2 +-
 src/m3ninx/search/proptest/prop_test.go       |   4 +-
 src/m3ninx/search/proptest/query_gen.go       |  92 ++++++----
 11 files changed, 342 insertions(+), 135 deletions(-)
 create mode 100644 src/dbnode/storage/index/block_prop_test.go

diff --git a/src/dbnode/storage/index/block_prop_test.go b/src/dbnode/storage/index/block_prop_test.go
new file mode 100644
index 0000000000..3244ea4bf5
--- /dev/null
+++ b/src/dbnode/storage/index/block_prop_test.go
@@ -0,0 +1,173 @@
+// +build big
+//
+// Copyright (c) 2019 Uber Technologies, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package index
+
+import (
+	"errors"
+	"fmt"
+	"math/rand"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/m3db/m3/src/dbnode/storage/bootstrap/result"
+	"github.com/m3db/m3/src/dbnode/storage/namespace"
+	"github.com/m3db/m3/src/m3ninx/idx"
+	"github.com/m3db/m3/src/m3ninx/index/segment"
+	"github.com/m3db/m3/src/m3ninx/index/segment/fst"
+	"github.com/m3db/m3/src/m3ninx/search"
+	"github.com/m3db/m3/src/m3ninx/search/proptest"
+	"github.com/m3db/m3/src/m3ninx/util"
+	"github.com/m3db/m3x/instrument"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+	"github.com/stretchr/testify/require"
+)
+
+var (
+	testFstOptions    = fst.NewOptions()
+	testBlockSize     = time.Hour
+	lotsTestDocuments = util.MustReadDocs("../../../m3ninx/util/testdata/node_exporter.json", 2000)
+)
+
+// TestPostingsListCacheDoesNotAffectBlockQueryResults verifies that the postings list
+// cache does not affect the results of querying a block by creating two blocks, one with
+// the postings list cache enabled and one without. It then generates a bunch of queries
+// and executes them against both blocks, ensuring that both blocks return the exact same
+// results. It was added as a regression test when we encountered a bug that caused the
+// postings list cache to cause the block to return incorrect results.
+//
+// It also generates term and regexp queries where the field and pattern are the same to
+// ensure that the postings list cache correctly handles caching the results of these
+// different types of queries (despite having the same field and "pattern") separately.
+func TestPostingsListCacheDoesNotAffectBlockQueryResults(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	seed := time.Now().UnixNano()
+	parameters.MinSuccessfulTests = 500
+	parameters.MaxSize = 20
+	parameters.Rng = rand.New(rand.NewSource(seed))
+	properties := gopter.NewProperties(parameters)
+
+	testMD := newTestNSMetadata(t)
+	blockSize := time.Hour
+
+	now := time.Now()
+	blockStart := now.Truncate(blockSize)
+
+	uncachedBlock, err := newPropTestBlock(
+		t, blockStart, testMD, testOpts.SetPostingsListCache(nil))
+	require.NoError(t, err)
+
+	plCache, stopReporting, err := NewPostingsListCache(1000, PostingsListCacheOptions{
+		InstrumentOptions: instrument.NewOptions(),
+	})
+	require.NoError(t, err)
+	defer stopReporting()
+
+	cachedOptions := testOpts.
+		SetPostingsListCache(plCache).
+		SetReadThroughSegmentOptions(ReadThroughSegmentOptions{
+			CacheRegexp: true,
+			CacheTerms:  true,
+		})
+	cachedBlock, err := newPropTestBlock(t, blockStart, testMD, cachedOptions)
+	require.NoError(t, err)
+
+	properties.Property("Index block with and without postings list cache always return the same results", prop.ForAll(
+		func(q search.Query, identicalTermAndRegexp []search.Query) (bool, error) {
+			queries := []search.Query{
+				q,
+				identicalTermAndRegexp[0],
+				identicalTermAndRegexp[1],
+			}
+
+			for _, q := range queries {
+				indexQuery := Query{
+					idx.NewQueryFromSearchQuery(q),
+				}
+
+				uncachedResults := NewResults(testOpts)
+				exhaustive, err := uncachedBlock.Query(indexQuery, QueryOptions{StartInclusive: blockStart, EndExclusive: blockStart.Add(blockSize)}, uncachedResults)
+				if err != nil {
+					return false, fmt.Errorf("error querying uncached block: %v", err)
+				}
+				if !exhaustive {
+					return false, errors.New("querying uncached block was not exhaustive")
+				}
+
+				cachedResults := NewResults(testOpts)
+				exhaustive, err = cachedBlock.Query(indexQuery, QueryOptions{StartInclusive: blockStart, EndExclusive: blockStart.Add(blockSize)}, cachedResults)
+				if err != nil {
+					return false, fmt.Errorf("error querying cached block: %v", err)
+				}
+				if !exhaustive {
+					return false, errors.New("querying cached block was not exhaustive")
+				}
+
+				uncachedMap := uncachedResults.Map()
+				cachedMap := cachedResults.Map()
+				if uncachedMap.Len() != cachedMap.Len() {
+					return false, fmt.Errorf(
+						"uncached map size was: %d, but cached map sized was: %d",
+						uncachedMap.Len(), cachedMap.Len())
+				}
+
+				for _, entry := range uncachedMap.Iter() {
+					key := entry.Key()
+					_, ok := cachedMap.Get(key)
+					if !ok {
+						return false, fmt.Errorf("cached map did not contain: %v", key)
+					}
+				}
+			}
+
+			return true, nil
+		},
+		proptest.GenQuery(lotsTestDocuments),
+		proptest.GenIdenticalTermAndRegexpQuery(lotsTestDocuments),
+	))
+
+	reporter := gopter.NewFormatedReporter(true, 160, os.Stdout)
+	if !properties.Run(reporter) {
+		t.Errorf("failed with initial seed: %d", seed)
+	}
+}
+
+func newPropTestBlock(t *testing.T, blockStart time.Time, nsMeta namespace.Metadata, opts Options) (Block, error) {
+	blk, err := NewBlock(blockStart, nsMeta, opts)
+	require.NoError(t, err)
+
+	var (
+		memSeg = testSegment(t, lotsTestDocuments...).(segment.MutableSegment)
+		fstSeg = fst.ToTestSegment(t, memSeg, testFstOptions)
+		// Need at least one shard to look fulfilled.
+		fulfilled  = result.NewShardTimeRanges(blockStart, blockStart.Add(testBlockSize), uint32(1))
+		indexBlock = result.NewIndexBlock(blockStart, []segment.Segment{fstSeg}, fulfilled)
+	)
+	// Use the AddResults API because thats the only scenario in which we'll wrap a segment
+	// in a ReadThroughSegment to use the postings list cache.
+	err = blk.AddResults(indexBlock)
+	require.NoError(t, err)
+	return blk, nil
+}
diff --git a/src/dbnode/storage/index/postings_list_cache.go b/src/dbnode/storage/index/postings_list_cache.go
index 1dae6f28ff..c478bf3f08 100644
--- a/src/dbnode/storage/index/postings_list_cache.go
+++ b/src/dbnode/storage/index/postings_list_cache.go
@@ -89,10 +89,12 @@ func NewPostingsListCache(size int, opts PostingsListCacheOptions) (*PostingsLis
 // GetRegexp returns the cached results for the provided regexp query, if any.
 func (q *PostingsListCache) GetRegexp(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 ) (postings.List, bool) {
 	return q.get(
 		segmentUUID,
+		field,
 		pattern,
 		PatternTypeRegexp)
 }
@@ -100,22 +102,25 @@ func (q *PostingsListCache) GetRegexp(
 // GetTerm returns the cached results for the provided term query, if any.
 func (q *PostingsListCache) GetTerm(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 ) (postings.List, bool) {
 	return q.get(
 		segmentUUID,
+		field,
 		pattern,
 		PatternTypeTerm)
 }
 
 func (q *PostingsListCache) get(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	patternType PatternType,
 ) (postings.List, bool) {
 	// No RLock because a Get() operation mutates the LRU.
 	q.Lock()
-	p, ok := q.lru.Get(segmentUUID, pattern, patternType)
+	p, ok := q.lru.Get(segmentUUID, field, pattern, patternType)
 	q.Unlock()
 
 	q.emitCacheGetMetrics(patternType, ok)
@@ -130,23 +135,26 @@ func (q *PostingsListCache) get(
 // PutRegexp updates the LRU with the result of the regexp query.
 func (q *PostingsListCache) PutRegexp(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	pl postings.List,
 ) {
-	q.put(segmentUUID, pattern, PatternTypeRegexp, pl)
+	q.put(segmentUUID, field, pattern, PatternTypeRegexp, pl)
 }
 
 // PutTerm updates the LRU with the result of the term query.
 func (q *PostingsListCache) PutTerm(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	pl postings.List,
 ) {
-	q.put(segmentUUID, pattern, PatternTypeTerm, pl)
+	q.put(segmentUUID, field, pattern, PatternTypeTerm, pl)
 }
 
 func (q *PostingsListCache) put(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	patternType PatternType,
 	pl postings.List,
@@ -154,6 +162,7 @@ func (q *PostingsListCache) put(
 	q.Lock()
 	q.lru.Add(
 		segmentUUID,
+		field,
 		pattern,
 		patternType,
 		pl,
diff --git a/src/dbnode/storage/index/postings_list_cache_lru.go b/src/dbnode/storage/index/postings_list_cache_lru.go
index 6a687da5eb..b7c9d9be4f 100644
--- a/src/dbnode/storage/index/postings_list_cache_lru.go
+++ b/src/dbnode/storage/index/postings_list_cache_lru.go
@@ -30,11 +30,12 @@ import (
 )
 
 // PostingsListLRU implements a non-thread safe fixed size LRU cache of postings lists
-// that were resolved by running a given query against a particular segment. Normally
-// an key in the LRU would look like:
+// that were resolved by running a given query against a particular segment for a given
+// field and pattern type (term vs regexp). Normally a key in the LRU would look like:
 //
 // type key struct {
 //    segmentUUID uuid.UUID
+//    field       string
 //    pattern     string
 //    patternType PatternType
 // }
@@ -51,7 +52,7 @@ import (
 // Instead of adding additional tracking on-top of an existing generic LRU, we've created a
 // specialized LRU that instead of having a single top-level map pointing into the linked-list,
 // has a two-level map where the top level map is keyed by segment UUID and the second level map
-// is keyed by pattern and pattern type.
+// is keyed by the field/pattern/patternType.
 //
 // As a result, when a segment is ready to be closed, they can call into the cache with their
 // UUID and we can efficiently remove all the entries corresponding to that segment from the
@@ -60,24 +61,18 @@ import (
 type postingsListLRU struct {
 	size      int
 	evictList *list.List
-	items     map[uuid.Array]map[patternAndPatternType]*list.Element
+	items     map[uuid.Array]map[key]*list.Element
 }
 
 // entry is used to hold a value in the evictList.
 type entry struct {
 	uuid         uuid.UUID
-	pattern      string
-	patternType  PatternType
+	key          key
 	postingsList postings.List
 }
 
 type key struct {
-	uuid        uuid.UUID
-	pattern     string
-	patternType PatternType
-}
-
-type patternAndPatternType struct {
+	field       string
 	pattern     string
 	patternType PatternType
 }
@@ -91,22 +86,23 @@ func newPostingsListLRU(size int) (*postingsListLRU, error) {
 	return &postingsListLRU{
 		size:      size,
 		evictList: list.New(),
-		items:     make(map[uuid.Array]map[patternAndPatternType]*list.Element),
+		items:     make(map[uuid.Array]map[key]*list.Element),
 	}, nil
 }
 
 // Add adds a value to the cache. Returns true if an eviction occurred.
 func (c *postingsListLRU) Add(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	patternType PatternType,
 	pl postings.List,
 ) (evicted bool) {
+	newKey := newKey(field, pattern, patternType)
 	// Check for existing item.
 	uuidArray := segmentUUID.Array()
 	if uuidEntries, ok := c.items[uuidArray]; ok {
-		key := newPatternAndPatternType(pattern, patternType)
-		if ent, ok := uuidEntries[key]; ok {
+		if ent, ok := uuidEntries[newKey]; ok {
 			// If it already exists, just move it to the front. This avoids storing
 			// the same item in the LRU twice which is important because the maps
 			// can only point to one entry at a time and we use them for purges. Also,
@@ -121,18 +117,16 @@ func (c *postingsListLRU) Add(
 	var (
 		ent = &entry{
 			uuid:         segmentUUID,
-			pattern:      pattern,
-			patternType:  patternType,
+			key:          newKey,
 			postingsList: pl,
 		}
 		entry = c.evictList.PushFront(ent)
-		key   = newPatternAndPatternType(pattern, patternType)
 	)
-	if patterns, ok := c.items[uuidArray]; ok {
-		patterns[key] = entry
+	if queries, ok := c.items[uuidArray]; ok {
+		queries[newKey] = entry
 	} else {
-		c.items[uuidArray] = map[patternAndPatternType]*list.Element{
-			key: entry,
+		c.items[uuidArray] = map[key]*list.Element{
+			newKey: entry,
 		}
 	}
 
@@ -147,13 +141,14 @@ func (c *postingsListLRU) Add(
 // Get looks up a key's value from the cache.
 func (c *postingsListLRU) Get(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	patternType PatternType,
 ) (postings.List, bool) {
+	newKey := newKey(field, pattern, patternType)
 	uuidArray := segmentUUID.Array()
 	if uuidEntries, ok := c.items[uuidArray]; ok {
-		key := newPatternAndPatternType(pattern, patternType)
-		if ent, ok := uuidEntries[key]; ok {
+		if ent, ok := uuidEntries[newKey]; ok {
 			c.evictList.MoveToFront(ent)
 			return ent.Value.(*entry).postingsList, true
 		}
@@ -166,13 +161,14 @@ func (c *postingsListLRU) Get(
 // key was contained.
 func (c *postingsListLRU) Remove(
 	segmentUUID uuid.UUID,
+	field string,
 	pattern string,
 	patternType PatternType,
 ) bool {
+	newKey := newKey(field, pattern, patternType)
 	uuidArray := segmentUUID.Array()
 	if uuidEntries, ok := c.items[uuidArray]; ok {
-		key := newPatternAndPatternType(pattern, patternType)
-		if ent, ok := uuidEntries[key]; ok {
+		if ent, ok := uuidEntries[newKey]; ok {
 			c.removeElement(ent)
 			return true
 		}
@@ -208,14 +204,13 @@ func (c *postingsListLRU) removeElement(e *list.Element) {
 	entry := e.Value.(*entry)
 
 	if patterns, ok := c.items[entry.uuid.Array()]; ok {
-		key := newPatternAndPatternType(entry.pattern, entry.patternType)
-		delete(patterns, key)
+		delete(patterns, entry.key)
 		if len(patterns) == 0 {
 			delete(c.items, entry.uuid.Array())
 		}
 	}
 }
 
-func newPatternAndPatternType(pattern string, patternType PatternType) patternAndPatternType {
-	return patternAndPatternType{pattern: pattern, patternType: patternType}
+func newKey(field, pattern string, patternType PatternType) key {
+	return key{field: field, pattern: pattern, patternType: patternType}
 }
diff --git a/src/dbnode/storage/index/postings_list_cache_lru_test.go b/src/dbnode/storage/index/postings_list_cache_lru_test.go
index 558020527f..2fb384e767 100644
--- a/src/dbnode/storage/index/postings_list_cache_lru_test.go
+++ b/src/dbnode/storage/index/postings_list_cache_lru_test.go
@@ -26,11 +26,7 @@ func (c *postingsListLRU) keys() []key {
 	keys := make([]key, 0, len(c.items))
 	for ent := c.evictList.Back(); ent != nil; ent = ent.Prev() {
 		entry := ent.Value.(*entry)
-		keys = append(keys, key{
-			uuid:        entry.uuid,
-			pattern:     entry.pattern,
-			patternType: entry.patternType,
-		})
+		keys = append(keys, entry.key)
 	}
 	return keys
 }
diff --git a/src/dbnode/storage/index/postings_list_cache_test.go b/src/dbnode/storage/index/postings_list_cache_test.go
index 753ebb9106..0bbe8b50a8 100644
--- a/src/dbnode/storage/index/postings_list_cache_test.go
+++ b/src/dbnode/storage/index/postings_list_cache_test.go
@@ -50,10 +50,14 @@ var (
 func init() {
 	// Generate test data.
 	for i := 0; i < numTestPlEntries; i++ {
-		segmentUUID := uuid.Parse(
-			fmt.Sprintf("00000000-0000-0000-0000-000000000%03d", i))
-		pattern := fmt.Sprintf("%d", i)
-		pl := roaring.NewPostingsList()
+		var (
+			segmentUUID = uuid.Parse(
+				fmt.Sprintf("00000000-0000-0000-0000-000000000%03d", i))
+
+			field   = fmt.Sprintf("field_%d", i)
+			pattern = fmt.Sprintf("pattern_%d", i)
+			pl      = roaring.NewPostingsList()
+		)
 		pl.Insert(postings.ID(i))
 
 		patternType := PatternTypeRegexp
@@ -62,8 +66,7 @@ func init() {
 		}
 		testPlEntries = append(testPlEntries, testEntry{
 			segmentUUID:  segmentUUID,
-			pattern:      pattern,
-			patternType:  patternType,
+			key:          newKey(field, pattern, patternType),
 			postingsList: pl,
 		})
 	}
@@ -71,8 +74,7 @@ func init() {
 
 type testEntry struct {
 	segmentUUID  uuid.UUID
-	pattern      string
-	patternType  PatternType
+	key          key
 	postingsList postings.List
 }
 
@@ -93,17 +95,10 @@ func TestSimpleLRUBehavior(t *testing.T) {
 	putEntry(plCache, 0)
 	putEntry(plCache, 1)
 	putEntry(plCache, 2)
-
-	expectedOrder := []string{e0.pattern, e1.pattern, e2.pattern}
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e0, e1, e2})
 
 	putEntry(plCache, 3)
-	expectedOrder = []string{e1.pattern, e2.pattern, e3.pattern}
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e1, e2, e3})
 
 	putEntry(plCache, 4)
 	putEntry(plCache, 4)
@@ -111,34 +106,22 @@ func TestSimpleLRUBehavior(t *testing.T) {
 	putEntry(plCache, 5)
 	putEntry(plCache, 0)
 	putEntry(plCache, 0)
-
-	expectedOrder = []string{e4.pattern, e5.pattern, e0.pattern}
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e4, e5, e0})
 
 	// Miss, no expected change.
 	getEntry(plCache, 100)
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e4, e5, e0})
 
 	// Hit.
 	getEntry(plCache, 4)
-	expectedOrder = []string{e5.pattern, e0.pattern, e4.pattern}
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e5, e0, e4})
 
 	// Multiple hits.
 	getEntry(plCache, 4)
 	getEntry(plCache, 0)
 	getEntry(plCache, 5)
 	getEntry(plCache, 5)
-	expectedOrder = []string{e4.pattern, e0.pattern, e5.pattern}
-	for i, key := range plCache.lru.keys() {
-		require.Equal(t, expectedOrder[i], key.pattern)
-	}
+	requireExpectedOrder(t, plCache, []testEntry{e4, e0, e5})
 }
 
 func TestPurgeSegment(t *testing.T) {
@@ -149,16 +132,18 @@ func TestPurgeSegment(t *testing.T) {
 
 	// Write many entries with the same segment UUID.
 	for i := 0; i < 100; i++ {
-		if testPlEntries[i].patternType == PatternTypeRegexp {
+		if testPlEntries[i].key.patternType == PatternTypeRegexp {
 			plCache.PutRegexp(
 				testPlEntries[0].segmentUUID,
-				testPlEntries[i].pattern,
+				testPlEntries[i].key.field,
+				testPlEntries[i].key.pattern,
 				testPlEntries[i].postingsList,
 			)
 		} else {
 			plCache.PutTerm(
 				testPlEntries[0].segmentUUID,
-				testPlEntries[i].pattern,
+				testPlEntries[i].key.field,
+				testPlEntries[i].key.pattern,
 				testPlEntries[i].postingsList,
 			)
 		}
@@ -175,16 +160,18 @@ func TestPurgeSegment(t *testing.T) {
 	// All entries related to the purged segment should be gone.
 	require.Equal(t, size-100, plCache.lru.Len())
 	for i := 0; i < 100; i++ {
-		if testPlEntries[i].patternType == PatternTypeRegexp {
+		if testPlEntries[i].key.patternType == PatternTypeRegexp {
 			_, ok := plCache.GetRegexp(
 				testPlEntries[0].segmentUUID,
-				testPlEntries[i].pattern,
+				testPlEntries[i].key.field,
+				testPlEntries[i].key.pattern,
 			)
 			require.False(t, ok)
 		} else {
 			_, ok := plCache.GetTerm(
 				testPlEntries[0].segmentUUID,
-				testPlEntries[i].pattern,
+				testPlEntries[i].key.field,
+				testPlEntries[i].key.pattern,
 			)
 			require.False(t, ok)
 		}
@@ -284,62 +271,74 @@ func testConcurrency(t *testing.T, size int, purge bool, verify bool) {
 func putEntry(cache *PostingsListCache, i int) {
 	// Do each put twice to test the logic that avoids storing
 	// multiple entries for the same value.
-	if testPlEntries[i].patternType == PatternTypeRegexp {
+	if testPlEntries[i].key.patternType == PatternTypeRegexp {
 		cache.PutRegexp(
 			testPlEntries[i].segmentUUID,
-			testPlEntries[i].pattern,
+			testPlEntries[i].key.field,
+			testPlEntries[i].key.pattern,
 			testPlEntries[i].postingsList,
 		)
 		cache.PutRegexp(
 			testPlEntries[i].segmentUUID,
-			testPlEntries[i].pattern,
+			testPlEntries[i].key.field,
+			testPlEntries[i].key.pattern,
 			testPlEntries[i].postingsList,
 		)
 	} else {
 		cache.PutTerm(
 			testPlEntries[i].segmentUUID,
-			testPlEntries[i].pattern,
+			testPlEntries[i].key.field,
+			testPlEntries[i].key.pattern,
 			testPlEntries[i].postingsList,
 		)
 		cache.PutTerm(
 			testPlEntries[i].segmentUUID,
-			testPlEntries[i].pattern,
+			testPlEntries[i].key.field,
+			testPlEntries[i].key.pattern,
 			testPlEntries[i].postingsList,
 		)
 	}
 }
 
 func getEntry(cache *PostingsListCache, i int) (postings.List, bool) {
-	if testPlEntries[i].patternType == PatternTypeRegexp {
+	if testPlEntries[i].key.patternType == PatternTypeRegexp {
 		return cache.GetRegexp(
 			testPlEntries[i].segmentUUID,
-			testPlEntries[i].pattern,
+			testPlEntries[i].key.field,
+			testPlEntries[i].key.pattern,
 		)
 	}
 
 	return cache.GetTerm(
 		testPlEntries[i].segmentUUID,
-		testPlEntries[i].pattern,
+		testPlEntries[i].key.field,
+		testPlEntries[i].key.pattern,
 	)
 }
 
+func requireExpectedOrder(t *testing.T, plCache *PostingsListCache, expectedOrder []testEntry) {
+	for i, key := range plCache.lru.keys() {
+		require.Equal(t, expectedOrder[i].key, key)
+	}
+}
+
 func printSortedKeys(t *testing.T, cache *PostingsListCache) {
 	keys := cache.lru.keys()
 	sort.Slice(keys, func(i, j int) bool {
-		iIdx, err := strconv.ParseInt(keys[i].pattern, 10, 64)
+		iIdx, err := strconv.ParseInt(keys[i].field, 10, 64)
 		if err != nil {
-			t.Fatalf("unable to parse: %s into int", keys[i].pattern)
+			t.Fatalf("unable to parse: %s into int", keys[i].field)
 		}
 
-		jIdx, err := strconv.ParseInt(keys[j].pattern, 10, 64)
+		jIdx, err := strconv.ParseInt(keys[j].field, 10, 64)
 		if err != nil {
-			t.Fatalf("unable to parse: %s into int", keys[i].pattern)
+			t.Fatalf("unable to parse: %s into int", keys[i].field)
 		}
 
 		return iIdx < jIdx
 	})
 
 	for _, key := range keys {
-		fmt.Println("key: ", key.pattern)
+		fmt.Println("key: ", key)
 	}
 }
diff --git a/src/dbnode/storage/index/read_through_segment.go b/src/dbnode/storage/index/read_through_segment.go
index 1ef6cfc0a4..8a3b1c833c 100644
--- a/src/dbnode/storage/index/read_through_segment.go
+++ b/src/dbnode/storage/index/read_through_segment.go
@@ -148,16 +148,17 @@ func (s *readThroughSegmentReader) MatchRegexp(
 		return s.Reader.MatchRegexp(field, c)
 	}
 
-	// TODO(rartoul): Would be nice not to allocate a string here.
-	pattern := c.FSTSyntax.String()
-	pl, ok := s.postingsListCache.GetRegexp(s.uuid, pattern)
+	// TODO(rartoul): Would be nice to not allocate strings here.
+	fieldStr := string(field)
+	patternStr := c.FSTSyntax.String()
+	pl, ok := s.postingsListCache.GetRegexp(s.uuid, fieldStr, patternStr)
 	if ok {
 		return pl, nil
 	}
 
 	pl, err := s.Reader.MatchRegexp(field, c)
 	if err == nil {
-		s.postingsListCache.PutRegexp(s.uuid, pattern, pl)
+		s.postingsListCache.PutRegexp(s.uuid, fieldStr, patternStr, pl)
 	}
 	return pl, err
 }
@@ -171,16 +172,17 @@ func (s *readThroughSegmentReader) MatchTerm(
 		return s.Reader.MatchTerm(field, term)
 	}
 
-	// TODO(rartoul): Would be nice to not allocate a string here.
-	termString := string(term)
-	pl, ok := s.postingsListCache.GetTerm(s.uuid, termString)
+	// TODO(rartoul): Would be nice to not allocate strings here.
+	fieldStr := string(field)
+	patternStr := string(term)
+	pl, ok := s.postingsListCache.GetTerm(s.uuid, fieldStr, patternStr)
 	if ok {
 		return pl, nil
 	}
 
 	pl, err := s.Reader.MatchTerm(field, term)
 	if err == nil {
-		s.postingsListCache.PutTerm(s.uuid, termString, pl)
+		s.postingsListCache.PutTerm(s.uuid, fieldStr, patternStr, pl)
 	}
 	return pl, err
 }
diff --git a/src/dbnode/storage/index/read_through_segment_test.go b/src/dbnode/storage/index/read_through_segment_test.go
index 7acf5dae53..ee7e0ae72c 100644
--- a/src/dbnode/storage/index/read_through_segment_test.go
+++ b/src/dbnode/storage/index/read_through_segment_test.go
@@ -277,7 +277,7 @@ func TestClose(t *testing.T) {
 
 	// Store an entry for the segment in the cache so we can check if it
 	// gets purged after.
-	cache.PutRegexp(segmentUUID, "some-regexp", roaring.NewPostingsList())
+	cache.PutRegexp(segmentUUID, "some-field", "some-pattern", roaring.NewPostingsList())
 
 	segment.EXPECT().Close().Return(nil)
 	err = readThroughSeg.Close()
diff --git a/src/m3ninx/idx/query.go b/src/m3ninx/idx/query.go
index 659e1641a0..feecd85383 100644
--- a/src/m3ninx/idx/query.go
+++ b/src/m3ninx/idx/query.go
@@ -34,6 +34,13 @@ func (q Query) String() string {
 	return q.query.String()
 }
 
+// NewQueryFromSearchQuery creates a new Query from a search.Query.
+func NewQueryFromSearchQuery(q search.Query) Query {
+	return Query{
+		query: q,
+	}
+}
+
 // NewTermQuery returns a new query for finding documents which match a term exactly.
 func NewTermQuery(field, term []byte) Query {
 	return Query{
diff --git a/src/m3ninx/search/proptest/concurrent_test.go b/src/m3ninx/search/proptest/concurrent_test.go
index 19b091c8d3..9d728ab3a1 100644
--- a/src/m3ninx/search/proptest/concurrent_test.go
+++ b/src/m3ninx/search/proptest/concurrent_test.go
@@ -93,7 +93,7 @@ func TestConcurrentQueries(t *testing.T) {
 
 			return true, nil
 		},
-		genQuery(lotsTestDocuments),
+		GenQuery(lotsTestDocuments),
 	))
 
 	reporter := gopter.NewFormatedReporter(true, 160, os.Stdout)
diff --git a/src/m3ninx/search/proptest/prop_test.go b/src/m3ninx/search/proptest/prop_test.go
index 31dbbe8de5..0ecc89440e 100644
--- a/src/m3ninx/search/proptest/prop_test.go
+++ b/src/m3ninx/search/proptest/prop_test.go
@@ -84,7 +84,7 @@ func TestSegmentDistributionDoesNotAffectQuery(t *testing.T) {
 			return true, nil
 		},
 		genPropTestInput(len(lotsTestDocuments)),
-		genQuery(lotsTestDocuments),
+		GenQuery(lotsTestDocuments),
 	))
 
 	reporter := gopter.NewFormatedReporter(true, 160, os.Stdout)
@@ -132,7 +132,7 @@ func TestFSTSimpleSegmentsQueryTheSame(t *testing.T) {
 
 			return true, nil
 		},
-		genQuery(lotsTestDocuments),
+		GenQuery(lotsTestDocuments),
 	))
 
 	reporter := gopter.NewFormatedReporter(true, 160, os.Stdout)
diff --git a/src/m3ninx/search/proptest/query_gen.go b/src/m3ninx/search/proptest/query_gen.go
index 103b5076ae..bbe6987e85 100644
--- a/src/m3ninx/search/proptest/query_gen.go
+++ b/src/m3ninx/search/proptest/query_gen.go
@@ -32,29 +32,49 @@ import (
 	"github.com/leanovate/gopter/gen"
 )
 
-func genTermQuery(docs []doc.Document) gopter.Gen {
+// GenTermQuery generates a term query.
+func GenTermQuery(docs []doc.Document) gopter.Gen {
 	return func(genParams *gopter.GenParameters) *gopter.GenResult {
-		docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve()
-		if !ok {
-			panic("unable to generate term query") // should never happen
-		}
-		docID := docIDRes.(int)
+		fieldName, fieldValue := fieldNameAndValue(genParams, docs)
+		q := query.NewTermQuery(fieldName, fieldValue)
+		return gopter.NewGenResult(q, gopter.NoShrinker)
+	}
+}
 
-		doc := docs[docID]
-		fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve()
-		if !ok {
-			panic("unable to generate term query fields") // should never happen
-		}
+func fieldNameAndValue(genParams *gopter.GenParameters, docs []doc.Document) ([]byte, []byte) {
+	docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve()
+	if !ok {
+		panic("unable to generate term query") // should never happen
+	}
+	docID := docIDRes.(int)
 
-		fieldID := fieldRes.(int)
-		field := doc.Fields[fieldID]
+	doc := docs[docID]
+	fieldRes, ok := gen.IntRange(0, len(doc.Fields)-1)(genParams).Retrieve()
+	if !ok {
+		panic("unable to generate term query fields") // should never happen
+	}
 
-		q := query.NewTermQuery(field.Name, field.Value)
-		return gopter.NewGenResult(q, gopter.NoShrinker)
+	fieldID := fieldRes.(int)
+	field := doc.Fields[fieldID]
+	return field.Name, field.Value
+}
+
+// GenIdenticalTermAndRegexpQuery generates a term query and regexp query with
+// the exact same underlying field and pattern.
+func GenIdenticalTermAndRegexpQuery(docs []doc.Document) gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		fieldName, fieldValue := fieldNameAndValue(genParams, docs)
+		termQ := query.NewTermQuery(fieldName, fieldValue)
+		regexpQ, err := query.NewRegexpQuery(fieldName, fieldValue)
+		if err != nil {
+			panic(err)
+		}
+		return gopter.NewGenResult([]search.Query{termQ, regexpQ}, gopter.NoShrinker)
 	}
 }
 
-func genRegexpQuery(docs []doc.Document) gopter.Gen {
+// GenRegexpQuery generates a regexp query.
+func GenRegexpQuery(docs []doc.Document) gopter.Gen {
 	return func(genParams *gopter.GenParameters) *gopter.GenResult {
 		docIDRes, ok := gen.IntRange(0, len(docs)-1)(genParams).Retrieve()
 		if !ok {
@@ -102,45 +122,51 @@ func genRegexpQuery(docs []doc.Document) gopter.Gen {
 	}
 }
 
-func genNegationQuery(docs []doc.Document) gopter.Gen {
+// GenNegationQuery generates a negation query.
+func GenNegationQuery(docs []doc.Document) gopter.Gen {
 	return gen.OneGenOf(
-		genTermQuery(docs),
-		genRegexpQuery(docs),
+		GenTermQuery(docs),
+		GenRegexpQuery(docs),
 	).
 		Map(func(q search.Query) search.Query {
 			return query.NewNegationQuery(q)
 		})
 }
 
-func genConjuctionQuery(docs []doc.Document) gopter.Gen {
+// GenConjunctionQuery generates a conjunction query.
+func GenConjunctionQuery(docs []doc.Document) gopter.Gen {
 	return gen.SliceOf(
 		gen.OneGenOf(
-			genTermQuery(docs),
-			genRegexpQuery(docs),
-			genNegationQuery(docs)),
+			GenTermQuery(docs),
+			GenRegexpQuery(docs),
+			GenNegationQuery(docs)),
 		reflect.TypeOf((*search.Query)(nil)).Elem()).
 		Map(func(qs []search.Query) search.Query {
 			return query.NewConjunctionQuery(qs)
 		})
 }
 
-func genDisjunctionQuery(docs []doc.Document) gopter.Gen {
+// GenDisjunctionQuery generates a disjunction query.
+func GenDisjunctionQuery(docs []doc.Document) gopter.Gen {
 	return gen.SliceOf(
 		gen.OneGenOf(
-			genTermQuery(docs),
-			genRegexpQuery(docs),
-			genNegationQuery(docs)),
+			GenTermQuery(docs),
+			GenRegexpQuery(docs),
+			GenNegationQuery(docs)),
 		reflect.TypeOf((*search.Query)(nil)).Elem()).
 		Map(func(qs []search.Query) search.Query {
 			return query.NewDisjunctionQuery(qs)
 		})
 }
 
-func genQuery(docs []doc.Document) gopter.Gen {
+// GenQuery generates a query.
+func GenQuery(docs []doc.Document) gopter.Gen {
 	return gen.OneGenOf(
-		genTermQuery(docs),
-		genRegexpQuery(docs),
-		genNegationQuery(docs),
-		genConjuctionQuery(docs),
-		genDisjunctionQuery(docs))
+		GenTermQuery(docs),
+		GenRegexpQuery(docs),
+		GenNegationQuery(docs),
+		GenConjunctionQuery(docs),
+		GenDisjunctionQuery(docs))
 }
+
+// Ge