Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement io.StringWriter #32

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/segmentio/murmur3

go 1.17
18 changes: 5 additions & 13 deletions murmur.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,34 @@ Package murmur3 implements Austin Appleby's non-cryptographic MurmurHash3.
*/
package murmur3

type bmixer interface {
bmix(p []byte) (tail []byte)
Size() (n int)
reset()
}

type digest struct {
clen int // Digested input cumulative length.
tail []byte // 0 to Size()-1 bytes view of `buf'.
buf [16]byte // Expected (but not required) to be Size() large.
seed uint32 // Seed for initializing the hash.
bmixer
}

func (d *digest) BlockSize() int { return 1 }

func (d *digest) Write(p []byte) (n int, err error) {
func (d *digest) write(p []byte, size int, bmix func([]byte) []byte) (n int, err error) {
n = len(p)
d.clen += n

if len(d.tail) > 0 {
// Stick back pending bytes.
nfree := d.Size() - len(d.tail) // nfree ∈ [1, d.Size()-1].
nfree := size - len(d.tail) // nfree ∈ [1, d.Size()-1].
if nfree < len(p) {
// One full block can be formed.
block := append(d.tail, p[:nfree]...)
p = p[nfree:]
_ = d.bmix(block) // No tail.
_ = bmix(block) // No tail.
} else {
// Tail's buf is large enough to prevent reallocs.
p = append(d.tail, p...)
}
}

d.tail = d.bmix(p)
d.tail = bmix(p)

// Keep own copy of the 0 to Size()-1 pending bytes.
nn := copy(d.buf[:], d.tail)
Expand All @@ -57,8 +50,7 @@ func (d *digest) Write(p []byte) (n int, err error) {
return n, nil
}

func (d *digest) Reset() {
func (d *digest) reset() {
d.clen = 0
d.tail = nil
d.bmixer.reset()
}
50 changes: 31 additions & 19 deletions murmur128.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package murmur3

import (
//"encoding/binary"
// "encoding/binary"
"hash"
"io"
"math/bits"
"unsafe"
)
Expand All @@ -14,9 +15,9 @@ const (

// Make sure interfaces are correctly implemented.
var (
_ hash.Hash = new(digest128)
_ Hash128 = new(digest128)
_ bmixer = new(digest128)
_ hash.Hash = (*Digest128)(nil)
_ Hash128 = (*Digest128)(nil)
_ io.StringWriter = (*Digest128)(nil)
)

// Hash128 represents a 128-bit hasher
Expand All @@ -26,30 +27,41 @@ type Hash128 interface {
Sum128() (uint64, uint64)
}

// digest128 represents a partial evaluation of a 128 bites hash.
type digest128 struct {
// Digest128 represents a partial evaluation of a 128 bites hash.
type Digest128 struct {
digest
h1 uint64 // Unfinalized running hash part 1.
h2 uint64 // Unfinalized running hash part 2.
}

// New128 returns a 128-bit hasher
func New128() Hash128 { return New128WithSeed(0) }
func New128() *Digest128 { return New128WithSeed(0) }

// New128WithSeed returns a 128-bit hasher set with explicit seed value
func New128WithSeed(seed uint32) Hash128 {
d := new(digest128)
d.seed = seed
d.bmixer = d
d.Reset()
return d
func New128WithSeed(seed uint32) *Digest128 {
return &Digest128{
digest: digest{seed: seed},
h1: uint64(seed),
h2: uint64(seed),
}
}

func (d *digest128) Size() int { return 16 }
func (d *Digest128) Size() int { return 16 }

func (d *Digest128) WriteString(s string) (int, error) {
return d.Write(unsafeStringToBytes(s))
}

func (d *digest128) reset() { d.h1, d.h2 = uint64(d.seed), uint64(d.seed) }
func (d *Digest128) Write(b []byte) (int, error) {
return d.write(b, d.Size(), d.bmix)
}

func (d *Digest128) Reset() {
d.reset()
d.h1, d.h2 = uint64(d.seed), uint64(d.seed)
}

func (d *digest128) Sum(b []byte) []byte {
func (d *Digest128) Sum(b []byte) []byte {
h1, h2 := d.Sum128()
return append(b,
byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
Expand All @@ -60,7 +72,7 @@ func (d *digest128) Sum(b []byte) []byte {
)
}

func (d *digest128) bmix(p []byte) (tail []byte) {
func (d *Digest128) bmix(p []byte) (tail []byte) {
h1, h2 := d.h1, d.h2

nblocks := len(p) / 16
Expand Down Expand Up @@ -90,7 +102,7 @@ func (d *digest128) bmix(p []byte) (tail []byte) {
return p[nblocks*d.Size():]
}

func (d *digest128) Sum128() (h1, h2 uint64) {
func (d *Digest128) Sum128() (h1, h2 uint64) {

h1, h2 = d.h1, d.h2

Expand Down Expand Up @@ -190,7 +202,7 @@ func Sum128(data []byte) (h1 uint64, h2 uint64) { return Sum128WithSeed(data, 0)
// hasher.Write(data)
// return hasher.Sum128()
func Sum128WithSeed(data []byte, seed uint32) (h1 uint64, h2 uint64) {
d := digest128{h1: uint64(seed), h2: uint64(seed)}
d := Digest128{h1: uint64(seed), h2: uint64(seed)}
d.seed = seed
d.tail = d.bmix(data)
d.clen = len(data)
Expand Down
47 changes: 29 additions & 18 deletions murmur32.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,51 +4,62 @@ package murmur3

import (
"hash"
"io"
"math/bits"
"unsafe"
)

// Make sure interfaces are correctly implemented.
var (
_ hash.Hash = new(digest32)
_ hash.Hash32 = new(digest32)
_ bmixer = new(digest32)
_ hash.Hash = (*Digest32)(nil)
_ hash.Hash32 = (*Digest32)(nil)
_ io.StringWriter = (*Digest32)(nil)
)

const (
c1_32 uint32 = 0xcc9e2d51
c2_32 uint32 = 0x1b873593
)

// digest32 represents a partial evaluation of a 32 bites hash.
type digest32 struct {
// Digest32 represents a partial evaluation of a 32 bites hash.
type Digest32 struct {
digest
h1 uint32 // Unfinalized running hash.
}

// New32 returns new 32-bit hasher
func New32() hash.Hash32 { return New32WithSeed(0) }
func New32() *Digest32 { return New32WithSeed(0) }

// New32WithSeed returns new 32-bit hasher set with explicit seed value
func New32WithSeed(seed uint32) hash.Hash32 {
d := new(digest32)
d.seed = seed
d.bmixer = d
d.Reset()
return d
func New32WithSeed(seed uint32) *Digest32 {
return &Digest32{
digest: digest{seed: seed},
h1: seed,
}
}

func (d *Digest32) Reset() {
d.reset()
d.h1 = d.seed
}

func (d *digest32) Size() int { return 4 }
func (d *Digest32) Size() int { return 4 }

func (d *digest32) reset() { d.h1 = d.seed }
func (d *Digest32) WriteString(s string) (int, error) {
return d.Write(unsafeStringToBytes(s))
}

func (d *Digest32) Write(b []byte) (int, error) {
return d.write(b, d.Size(), d.bmix)
}

func (d *digest32) Sum(b []byte) []byte {
func (d *Digest32) Sum(b []byte) []byte {
h := d.Sum32()
return append(b, byte(h>>24), byte(h>>16), byte(h>>8), byte(h))
}

// Digest as many blocks as possible.
func (d *digest32) bmix(p []byte) (tail []byte) {
// digest as many blocks as possible.
func (d *Digest32) bmix(p []byte) (tail []byte) {
h1 := d.h1

nblocks := len(p) / 4
Expand All @@ -67,7 +78,7 @@ func (d *digest32) bmix(p []byte) (tail []byte) {
return p[nblocks*d.Size():]
}

func (d *digest32) Sum32() (h1 uint32) {
func (d *Digest32) Sum32() (h1 uint32) {

h1 = d.h1

Expand Down
40 changes: 28 additions & 12 deletions murmur64.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,55 @@ package murmur3

import (
"hash"
"io"
)

// Make sure interfaces are correctly implemented.
var (
_ hash.Hash = new(digest64)
_ hash.Hash64 = new(digest64)
_ bmixer = new(digest64)
_ hash.Hash = (*Digest64)(nil)
_ hash.Hash64 = (*Digest64)(nil)
_ io.StringWriter = (*Digest64)(nil)
)

// digest64 is half a digest128.
type digest64 digest128
// Digest64 is half a Digest128.
type Digest64 Digest128

// New64 returns a 64-bit hasher
func New64() hash.Hash64 { return New64WithSeed(0) }
func New64() *Digest64 { return New64WithSeed(0) }

// New64WithSeed returns a 64-bit hasher set with explicit seed value
func New64WithSeed(seed uint32) hash.Hash64 {
d := (*digest64)(New128WithSeed(seed).(*digest128))
func New64WithSeed(seed uint32) *Digest64 {
d := (*Digest64)(New128WithSeed(seed))
return d
}

func (d *digest64) Sum(b []byte) []byte {
func (d *Digest64) Sum(b []byte) []byte {
h1 := d.Sum64()
return append(b,
byte(h1>>56), byte(h1>>48), byte(h1>>40), byte(h1>>32),
byte(h1>>24), byte(h1>>16), byte(h1>>8), byte(h1))
}

func (d *digest64) Sum64() uint64 {
h1, _ := (*digest128)(d).Sum128()
func (d *Digest64) Sum64() uint64 {
h1, _ := (*Digest128)(d).Sum128()
return h1
}

func (d *Digest64) Size() int { return 8 }

func (d *Digest64) WriteString(s string) (int, error) {
return d.Write(unsafeStringToBytes(s))
}

func (d *Digest64) Write(b []byte) (int, error) {
return d.write(b, 16, (*Digest128)(d).bmix)
}

func (d *Digest64) Reset() {
d.reset()
d.h1, d.h2 = uint64(d.seed), uint64(d.seed)
}

// Sum64 returns the MurmurHash3 sum of data. It is equivalent to the
// following sequence (without the extra burden and the extra allocation):
// hasher := New64()
Expand All @@ -48,7 +64,7 @@ func Sum64(data []byte) uint64 { return Sum64WithSeed(data, 0) }
// hasher.Write(data)
// return hasher.Sum64()
func Sum64WithSeed(data []byte, seed uint32) uint64 {
d := digest128{h1: uint64(seed), h2: uint64(seed)}
d := Digest128{h1: uint64(seed), h2: uint64(seed)}
d.seed = seed
d.tail = d.bmix(data)
d.clen = len(data)
Expand Down
17 changes: 17 additions & 0 deletions murmur_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package murmur3

import (
"fmt"
"io"
"strconv"
"testing"
)
Expand Down Expand Up @@ -187,3 +188,19 @@ func Benchmark128(b *testing.B) {
})
}
}

func BenchmarkWriteString(b *testing.B) {
buf := make([]byte, 8192)
for length := 1; length <= cap(buf); length *= 2 {
b.Run(strconv.Itoa(length), func(b *testing.B) {
s := string(buf[:length])
h := New32()
b.SetBytes(int64(length))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
io.WriteString(h, s)
}
})
}
}
12 changes: 12 additions & 0 deletions unsafe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package murmur3

import "unsafe"

func unsafeStringToBytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&sliceHeader{str: s, cap: len(s)}))
}

type sliceHeader struct {
str string
cap int
}