Skip to content

Commit

Permalink
Merge pull request #11 from nyaruka/urn_queries
Browse files Browse the repository at this point in the history
Improved URN parsing
  • Loading branch information
rowanseymour authored Mar 8, 2018
2 parents d5464b9 + 0895007 commit 981b096
Show file tree
Hide file tree
Showing 27 changed files with 5,464 additions and 86 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: go

go:
- 1.8
- "1.10"

before_script:
- go get github.com/mattn/goveralls
Expand Down
110 changes: 110 additions & 0 deletions urns/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package urns

import (
"fmt"
"strings"
)

// Simple URN parser loosely based on RFC2141 (https://www.ietf.org/rfc/rfc2141.txt)

var escapes = map[rune]string{
'#': `%23`,
'%': `%25`,
// '/': `%2F`, can't enable this until we fix our URNs with slashes
'?': `%3F`,
}

type parsedURN struct {
scheme string
path string
query string
fragment string
}

func (u *parsedURN) String() string {
s := escape(u.scheme) + ":" + escape(u.path)
if u.query != "" {
s += "?" + escape(u.query)
}
if u.fragment != "" {
s += "#" + escape(u.fragment)
}
return s
}

const (
stateScheme = iota
statePath
stateQuery
stateFragment
)

func parseURN(urn string) (*parsedURN, error) {
state := stateScheme

buffers := map[int]*strings.Builder{
stateScheme: {},
statePath: {},
stateQuery: {},
stateFragment: {},
}

for _, c := range urn {
if c == ':' {
if state == stateScheme {
state = statePath
continue
}
} else if c == '?' {
if state == statePath {
state = stateQuery
continue
} else {
return nil, fmt.Errorf("query component can only come after path component")
}
} else if c == '#' {
if state == statePath || state == stateQuery {
state = stateFragment
continue
} else {
return nil, fmt.Errorf("fragment component can only come after path or query components")
}
}

buffers[state].WriteRune(c)
}

if buffers[stateScheme].Len() == 0 {
return nil, fmt.Errorf("scheme cannot be empty")
}
if buffers[statePath].Len() == 0 {
return nil, fmt.Errorf("path cannot be empty")
}

return &parsedURN{
scheme: unescape(buffers[stateScheme].String()),
path: unescape(buffers[statePath].String()),
query: unescape(buffers[stateQuery].String()),
fragment: unescape(buffers[stateFragment].String()),
}, nil
}

func escape(s string) string {
b := strings.Builder{}
for _, c := range s {
esc, isEsc := escapes[c]
if isEsc {
b.WriteString(esc)
} else {
b.WriteRune(c)
}
}
return b.String()
}

func unescape(s string) string {
for ch, esc := range escapes {
s = strings.Replace(s, esc, string(ch), -1)
}
return s
}
63 changes: 63 additions & 0 deletions urns/parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package urns

import (
"testing"
)

func TestParseURNAndBack(t *testing.T) {
testCases := []struct {
input string
scheme string
path string
query string
fragment string
hasError bool
}{
{input: "scheme:path", scheme: "scheme", path: "path"},
{input: "scheme:path#frag", scheme: "scheme", path: "path", fragment: "frag"},
{input: "scheme:path?query", scheme: "scheme", path: "path", query: "query"},
{input: "scheme:path?query#frag", scheme: "scheme", path: "path", query: "query", fragment: "frag"},
{input: "scheme:path?bar=foo&bar=zap#frag", scheme: "scheme", path: "path", query: "bar=foo&bar=zap", fragment: "frag"},
{input: "scheme:pa%25th?qu%23ery#fra%3Fg", scheme: "scheme", path: "pa%th", query: "qu#ery", fragment: "fra?g"},

{input: "scheme:path:morepath", scheme: "scheme", path: "path:morepath"},
{input: "scheme:path:morepath?foo=bar", scheme: "scheme", path: "path:morepath", query: "foo=bar"},

// can't be empty
{input: "", hasError: true},

// can't single part
{input: "xyz", hasError: true},

// can't omit scheme or path
{input: ":path", hasError: true},
{input: "scheme:", hasError: true},

// can't have multiple queries or fragments
{input: "scheme:path?query?query", hasError: true},
{input: "scheme:path#frag#frag", hasError: true},

// can't have query after fragment
{input: "scheme:path#frag?query", hasError: true},
}
for _, tc := range testCases {
p, err := parseURN(tc.input)

if err != nil {
if !tc.hasError {
t.Errorf("Failed parsing URN, got unxpected error: %s", err.Error())
}
} else {
if p.scheme != tc.scheme || p.path != tc.path || p.query != tc.query || p.fragment != tc.fragment {
t.Errorf("Failed parsing URN, got %s|%s|%s|%s, expected %s|%s|%s|%s for '%s'", p.scheme, p.path, p.query, p.fragment, tc.scheme, tc.path, tc.query, tc.fragment, tc.input)
} else {
backToStr := p.String()

if backToStr != tc.input {
t.Errorf("Failed stringifying URN, got '%s', expected '%s' for %s|%s|%s|%s", backToStr, tc.input, tc.scheme, tc.path, tc.query, tc.fragment)
}
}
}

}
}
106 changes: 54 additions & 52 deletions urns/urns.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package urns

import (
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
Expand Down Expand Up @@ -84,36 +85,43 @@ type URN string

// NewTelURNForCountry returns a URN for the passed in telephone number and country code ("US")
func NewTelURNForCountry(number string, country string) (URN, error) {
return NewURNFromParts(TelScheme, normalizeNumber(number, country), "")
return NewURNFromParts(TelScheme, normalizeNumber(number, country), "", "")
}

// NewTelegramURN returns a URN for the passed in telegram identifier
func NewTelegramURN(identifier int64, display string) (URN, error) {
return NewURNFromParts(TelegramScheme, strconv.FormatInt(identifier, 10), display)
return NewURNFromParts(TelegramScheme, strconv.FormatInt(identifier, 10), "", display)
}

// NewWhatsAppURN returns a URN for the passed in whatsapp identifier
func NewWhatsAppURN(identifier string) (URN, error) {
return NewURNFromParts(WhatsAppScheme, identifier, "")
return NewURNFromParts(WhatsAppScheme, identifier, "", "")
}

// NewFirebaseURN returns a URN for the passed in firebase identifier
func NewFirebaseURN(identifier string) (URN, error) {
return NewURNFromParts(FCMScheme, identifier, "")
return NewURNFromParts(FCMScheme, identifier, "", "")
}

// NewFacebookURN returns a URN for the passed in facebook identifier
func NewFacebookURN(identifier string) (URN, error) {
return NewURNFromParts(FacebookScheme, identifier, "")
return NewURNFromParts(FacebookScheme, identifier, "", "")
}

// NewURNFromParts returns a new URN for the given scheme, path and display
func NewURNFromParts(scheme string, path string, display string) (URN, error) {
urnString := fmt.Sprintf("%s:%s", scheme, path)
if display != "" {
urnString = fmt.Sprintf("%s#%s", urnString, display)
// returns a new URN for the given scheme, path, query and display
func newURNFromParts(scheme string, path string, query string, display string) URN {
u := &parsedURN{
scheme: scheme,
path: path,
query: query,
fragment: display,
}
urn := URN(urnString)
return URN(u.String())
}

// NewURNFromParts returns a validated URN for the given scheme, path, query and display
func NewURNFromParts(scheme string, path string, query string, display string) (URN, error) {
urn := newURNFromParts(scheme, path, query, display)
err := urn.Validate()
if err != nil {
return NilURN, err
Expand All @@ -122,28 +130,18 @@ func NewURNFromParts(scheme string, path string, display string) (URN, error) {
}

// ToParts splits the URN into scheme, path and display parts
func (u URN) ToParts() (string, string, string) {
parts := strings.SplitN(string(u), ":", 2)
if len(parts) != 2 {
return "", string(u), ""
}

scheme := parts[0]
path := parts[1]
display := ""

pathParts := strings.SplitN(path, "#", 2)
if len(pathParts) == 2 {
path = pathParts[0]
display = pathParts[1]
func (u URN) ToParts() (string, string, string, string) {
parsed, err := parseURN(string(u))
if err != nil {
return "", string(u), "", ""
}

return scheme, path, display
return parsed.scheme, parsed.path, parsed.query, parsed.fragment
}

// Normalize normalizes the URN into it's canonical form and should be performed before URN comparisons
func (u URN) Normalize(country string) (URN, error) {
scheme, path, display := u.ToParts()
func (u URN) Normalize(country string) URN {
scheme, path, query, display := u.ToParts()
normPath := strings.TrimSpace(path)

switch scheme {
Expand Down Expand Up @@ -171,12 +169,16 @@ func (u URN) Normalize(country string) (URN, error) {
normPath = strings.ToLower(normPath)
}

return NewURNFromParts(scheme, normPath, display)
return newURNFromParts(scheme, normPath, query, display)
}

// Validate returns whether this URN is considered valid
func (u URN) Validate() error {
scheme, path, display := u.ToParts()
scheme, path, _, display := u.ToParts()

if scheme == "" || path == "" {
return fmt.Errorf("scheme or path cannot be empty")
}
if !IsValidScheme(scheme) {
return fmt.Errorf("invalid scheme: '%s'", scheme)
}
Expand Down Expand Up @@ -244,56 +246,56 @@ func (u URN) Validate() error {
}
}

if path == "" {
return fmt.Errorf("invalid path: '%s'", path)
}

return nil // anything goes for external schemes
}

// Scheme returns the scheme portion for the URN
func (u URN) Scheme() string {
scheme, _, _ := u.ToParts()
scheme, _, _, _ := u.ToParts()
return scheme
}

// Path returns the path portion for the URN
func (u URN) Path() string {
_, path, _ := u.ToParts()
_, path, _, _ := u.ToParts()
return path
}

// Display returns the display portion for the URN (if any)
func (u URN) Display() string {
_, _, display := u.ToParts()
_, _, _, display := u.ToParts()
return display
}

// Identity returns the URN with any display attributes stripped
func (u URN) Identity() string {
parts := strings.SplitN(string(u), "#", 2)
if len(parts) == 2 {
return parts[0]
}
return string(u)
// RawQuery returns the unparsed query portion for the URN (if any)
func (u URN) RawQuery() string {
_, _, query, _ := u.ToParts()
return query
}

// Query returns the parsed query portion for the URN (if any)
func (u URN) Query() (url.Values, error) {
_, _, query, _ := u.ToParts()
return url.ParseQuery(query)
}

// Identity returns the URN with any query or display attributes stripped
func (u URN) Identity() URN {
scheme, path, _, _ := u.ToParts()
return newURNFromParts(scheme, path, "", "")
}

// Localize returns a new URN which is local to the given country
func (u URN) Localize(country string) URN {
scheme, path, display := u.ToParts()
scheme, path, query, display := u.ToParts()

if scheme == TelScheme {
parsed, err := phonenumbers.Parse(path, country)
if err == nil {
path = strconv.FormatUint(parsed.GetNationalNumber(), 10)
}
}
urnString := fmt.Sprintf("%s:%s", scheme, path)
if display != "" {
urnString = fmt.Sprintf("%s#%s", urnString, display)
}

return URN(urnString)
return newURNFromParts(scheme, path, query, display)
}

// IsFacebookRef returns whether this URN is a facebook referral
Expand Down Expand Up @@ -330,7 +332,7 @@ func (u URN) String() string { return string(u) }

// Format formats this URN as a human friendly string
func (u URN) Format() string {
scheme, path, display := u.ToParts()
scheme, path, _, display := u.ToParts()

if scheme == TelScheme {
parsed, err := phonenumbers.Parse(path, "")
Expand Down
Loading

0 comments on commit 981b096

Please sign in to comment.