Skip to content
This repository has been archived by the owner on Oct 16, 2018. It is now read-only.

[index] Add Batch method for inserting multiple documents at a time #34

Merged
merged 8 commits into from
Apr 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .excludemetalint
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ vendor/
generated/
_mock.go
.pb.go
index/segment/mem/idsgen/map_gen.go
39 changes: 19 additions & 20 deletions doc/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ import (
)

var (
errMultipleIDs = errors.New("document cannot contain multiple IDs")
errZeroLengthID = errors.New("document ID cannot be of length zero")
errEmptyDocument = errors.New("document cannot be empty")
errReservedFieldName = fmt.Errorf("'%s' is a reserved field name", IDReservedFieldName)
errEmptyDocument = errors.New("document cannot be empty")
)

// IDReservedFieldName is the field name reserved for IDs.
Expand Down Expand Up @@ -89,7 +88,7 @@ func (f Fields) shallowCopy() Fields {

// Document represents a document to be indexed.
type Document struct {
// Fields contains the list of fields by which to index the document.
ID []byte
Fields []Field
}

Expand All @@ -105,6 +104,10 @@ func (d Document) Get(fieldName []byte) ([]byte, bool) {

// Equal returns whether this document is equal to another.
func (d Document) Equal(other Document) bool {
if !bytes.Equal(d.ID, other.ID) {
return false
}

if len(d.Fields) != len(other.Fields) {
return false
}
Expand Down Expand Up @@ -133,35 +136,31 @@ func (d Document) Equal(other Document) bool {
return true
}

// Validate validates the given document and returns its ID if it has one.
func (d Document) Validate() ([]byte, error) {
// Validate returns a bool indicating whether the document is valid.
func (d Document) Validate() error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this call HasID()? maybe add a test for this case too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to require an ID here, in fact, in the segment we currently check if a document is valid before checking its ID. We might want to remove the check for the fields though in case we ever want to index just an ID. Not sure if that would ever be needed though.

if len(d.Fields) == 0 {
return nil, errEmptyDocument
return errEmptyDocument
}

var id []byte
for _, f := range d.Fields {
// TODO: Should we enforce uniqueness of field names?
if !utf8.Valid(f.Name) {
return nil, fmt.Errorf("document contains invalid field name: %v", f.Name)
return fmt.Errorf("document contains invalid field name: %v", f.Name)
}

if bytes.Equal(f.Name, IDReservedFieldName) {
if id != nil {
return nil, errMultipleIDs
}

if len(f.Value) == 0 {
return nil, errZeroLengthID
}

id = f.Value
return errReservedFieldName
}

if !utf8.Valid(f.Value) {
return nil, fmt.Errorf("document contains invalid field value: %v", f.Value)
return fmt.Errorf("document contains invalid field value: %v", f.Value)
}
}

return id, nil
return nil
}

// HasID returns a bool indicating whether the document has an ID or not.
func (d Document) HasID() bool {
return len(d.ID) > 0
}
84 changes: 72 additions & 12 deletions doc/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ func TestDocumentEquality(t *testing.T) {
{
name: "documents with the same fields in the same order are equal",
l: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Expand All @@ -171,6 +172,7 @@ func TestDocumentEquality(t *testing.T) {
},
},
r: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Expand All @@ -187,6 +189,7 @@ func TestDocumentEquality(t *testing.T) {
{
name: "documents with the same fields in different order are equal",
l: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("banana"),
Expand All @@ -199,6 +202,7 @@ func TestDocumentEquality(t *testing.T) {
},
},
r: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Expand All @@ -215,6 +219,7 @@ func TestDocumentEquality(t *testing.T) {
{
name: "documents with different fields are unequal",
l: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Expand All @@ -227,6 +232,7 @@ func TestDocumentEquality(t *testing.T) {
},
},
r: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Expand All @@ -240,6 +246,28 @@ func TestDocumentEquality(t *testing.T) {
},
expected: false,
},
{
name: "documents with different IDs are unequal",
l: Document{
ID: []byte("831992"),
Fields: []Field{
Field{
Name: []byte("apple"),
Value: []byte("red"),
},
},
},
r: Document{
ID: []byte("080292"),
Fields: []Field{
Field{
Name: []byte("apple"),
Value: []byte("red"),
},
},
},
expected: false,
},
}

for _, test := range tests {
Expand All @@ -254,7 +282,6 @@ func TestDocumentValidation(t *testing.T) {
name string
input Document
expectedErr bool
expectedID []byte
}{
{
name: "empty document",
Expand Down Expand Up @@ -286,46 +313,79 @@ func TestDocumentValidation(t *testing.T) {
expectedErr: true,
},
{
name: "valid document with ID",
name: "document contains field with reserved field name",
input: Document{
Fields: []Field{
Field{
Name: []byte("apple"),
Value: []byte("red"),
},
Field{
Name: IDReservedFieldName,
Value: []byte("123"),
},
},
},
expectedErr: false,
expectedID: nil,
expectedErr: true,
},
{
name: "valid document with ID",
name: "valid document",
input: Document{
Fields: []Field{
Field{
Name: []byte("apple"),
Value: []byte("red"),
},
Field{
Name: IDReservedFieldName,
Value: []byte("123"),
},
},
},
expectedErr: false,
expectedID: []byte("123"),
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
id, err := test.input.Validate()
err := test.input.Validate()
if test.expectedErr {
require.Error(t, err)
return
}
require.NoError(t, err)
require.Equal(t, test.expectedID, id)
})
}
}

func TestDocumentHasID(t *testing.T) {
tests := []struct {
name string
input Document
expected bool
}{
{
name: "nil ID",
input: Document{
ID: nil,
},
expected: false,
},
{
name: "zero-length ID",
input: Document{
ID: make([]byte, 0, 16),
},
expected: false,
},
{
name: "valid ID",
input: Document{
ID: []byte("831992"),
},
expected: true,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
require.Equal(t, test.expected, test.input.HasID())
})
}
}
29 changes: 17 additions & 12 deletions generated/generics/generate.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
#!/bin/bash

if [ -z $GOPATH ]; then
echo 'GOPATH is not set'
exit 1
echo 'GOPATH is not set'
exit 1
fi

GENERIC_MAP_PATH=${GOPATH}/src/github.com/m3db/m3ninx/vendor/github.com/m3db/m3x/generics/hashmap
GENERIC_MAP_IMPL=${GENERIC_MAP_PATH}/map.go

if [ ! -f "$GENERIC_MAP_IMPL" ]; then
echo "${GENERIC_MAP_IMPL} does not exist"
exit 1
echo "${GENERIC_MAP_IMPL} does not exist"
exit 1
fi

GENERATED_PATH=${GOPATH}/src/github.com/m3db/m3ninx/index/segment/mem
if [ ! -d "$GENERATED_PATH" ]; then
echo "${GENERATED_PATH} does not exist"
exit 1
echo "${GENERATED_PATH} does not exist"
exit 1
fi

# NB: We use (genny)[1] to combat the lack of generics in Go. It allows us
Expand All @@ -32,11 +32,16 @@ fi
# [1]: https://github.com/cheekybits/genny

mkdir -p $GENERATED_PATH/postingsgen
cat $GENERIC_MAP_IMPL \
| genny -out=${GENERATED_PATH}/postingsgen/generated_map.go \
-pkg=postingsgen gen "KeyType=[]byte ValueType=postings.MutableList"
cat $GENERIC_MAP_IMPL \
| genny -out=${GENERATED_PATH}/postingsgen/map_gen.go \
-pkg=postingsgen gen "KeyType=[]byte ValueType=postings.MutableList"

mkdir -p $GENERATED_PATH/fieldsgen
cat $GENERIC_MAP_IMPL \
| genny -out=${GENERATED_PATH}/fieldsgen/generated_map.go \
-pkg=fieldsgen gen "KeyType=[]byte ValueType=*postingsgen.ConcurrentMap"
cat $GENERIC_MAP_IMPL \
| genny -out=${GENERATED_PATH}/fieldsgen/map_gen.go \
-pkg=fieldsgen gen "KeyType=[]byte ValueType=*postingsgen.ConcurrentMap"

mkdir -p $GENERATED_PATH/idsgen
cat $GENERIC_MAP_IMPL \
| genny -out=${GENERATED_PATH}/idsgen/map_gen.go \
-pkg=idsgen gen "KeyType=[]byte ValueType=struct{}"
Loading