-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from nyaruka/urn_queries
Improved URN parsing
- Loading branch information
Showing
27 changed files
with
5,464 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
language: go | ||
|
||
go: | ||
- 1.8 | ||
- "1.10" | ||
|
||
before_script: | ||
- go get github.com/mattn/goveralls | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
package urns | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
) | ||
|
||
// Simple URN parser loosely based on RFC2141 (https://www.ietf.org/rfc/rfc2141.txt) | ||
|
||
var escapes = map[rune]string{ | ||
'#': `%23`, | ||
'%': `%25`, | ||
// '/': `%2F`, can't enable this until we fix our URNs with slashes | ||
'?': `%3F`, | ||
} | ||
|
||
type parsedURN struct { | ||
scheme string | ||
path string | ||
query string | ||
fragment string | ||
} | ||
|
||
func (u *parsedURN) String() string { | ||
s := escape(u.scheme) + ":" + escape(u.path) | ||
if u.query != "" { | ||
s += "?" + escape(u.query) | ||
} | ||
if u.fragment != "" { | ||
s += "#" + escape(u.fragment) | ||
} | ||
return s | ||
} | ||
|
||
const ( | ||
stateScheme = iota | ||
statePath | ||
stateQuery | ||
stateFragment | ||
) | ||
|
||
func parseURN(urn string) (*parsedURN, error) { | ||
state := stateScheme | ||
|
||
buffers := map[int]*strings.Builder{ | ||
stateScheme: {}, | ||
statePath: {}, | ||
stateQuery: {}, | ||
stateFragment: {}, | ||
} | ||
|
||
for _, c := range urn { | ||
if c == ':' { | ||
if state == stateScheme { | ||
state = statePath | ||
continue | ||
} | ||
} else if c == '?' { | ||
if state == statePath { | ||
state = stateQuery | ||
continue | ||
} else { | ||
return nil, fmt.Errorf("query component can only come after path component") | ||
} | ||
} else if c == '#' { | ||
if state == statePath || state == stateQuery { | ||
state = stateFragment | ||
continue | ||
} else { | ||
return nil, fmt.Errorf("fragment component can only come after path or query components") | ||
} | ||
} | ||
|
||
buffers[state].WriteRune(c) | ||
} | ||
|
||
if buffers[stateScheme].Len() == 0 { | ||
return nil, fmt.Errorf("scheme cannot be empty") | ||
} | ||
if buffers[statePath].Len() == 0 { | ||
return nil, fmt.Errorf("path cannot be empty") | ||
} | ||
|
||
return &parsedURN{ | ||
scheme: unescape(buffers[stateScheme].String()), | ||
path: unescape(buffers[statePath].String()), | ||
query: unescape(buffers[stateQuery].String()), | ||
fragment: unescape(buffers[stateFragment].String()), | ||
}, nil | ||
} | ||
|
||
func escape(s string) string { | ||
b := strings.Builder{} | ||
for _, c := range s { | ||
esc, isEsc := escapes[c] | ||
if isEsc { | ||
b.WriteString(esc) | ||
} else { | ||
b.WriteRune(c) | ||
} | ||
} | ||
return b.String() | ||
} | ||
|
||
func unescape(s string) string { | ||
for ch, esc := range escapes { | ||
s = strings.Replace(s, esc, string(ch), -1) | ||
} | ||
return s | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package urns | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestParseURNAndBack(t *testing.T) { | ||
testCases := []struct { | ||
input string | ||
scheme string | ||
path string | ||
query string | ||
fragment string | ||
hasError bool | ||
}{ | ||
{input: "scheme:path", scheme: "scheme", path: "path"}, | ||
{input: "scheme:path#frag", scheme: "scheme", path: "path", fragment: "frag"}, | ||
{input: "scheme:path?query", scheme: "scheme", path: "path", query: "query"}, | ||
{input: "scheme:path?query#frag", scheme: "scheme", path: "path", query: "query", fragment: "frag"}, | ||
{input: "scheme:path?bar=foo&bar=zap#frag", scheme: "scheme", path: "path", query: "bar=foo&bar=zap", fragment: "frag"}, | ||
{input: "scheme:pa%25th?qu%23ery#fra%3Fg", scheme: "scheme", path: "pa%th", query: "qu#ery", fragment: "fra?g"}, | ||
|
||
{input: "scheme:path:morepath", scheme: "scheme", path: "path:morepath"}, | ||
{input: "scheme:path:morepath?foo=bar", scheme: "scheme", path: "path:morepath", query: "foo=bar"}, | ||
|
||
// can't be empty | ||
{input: "", hasError: true}, | ||
|
||
// can't single part | ||
{input: "xyz", hasError: true}, | ||
|
||
// can't omit scheme or path | ||
{input: ":path", hasError: true}, | ||
{input: "scheme:", hasError: true}, | ||
|
||
// can't have multiple queries or fragments | ||
{input: "scheme:path?query?query", hasError: true}, | ||
{input: "scheme:path#frag#frag", hasError: true}, | ||
|
||
// can't have query after fragment | ||
{input: "scheme:path#frag?query", hasError: true}, | ||
} | ||
for _, tc := range testCases { | ||
p, err := parseURN(tc.input) | ||
|
||
if err != nil { | ||
if !tc.hasError { | ||
t.Errorf("Failed parsing URN, got unxpected error: %s", err.Error()) | ||
} | ||
} else { | ||
if p.scheme != tc.scheme || p.path != tc.path || p.query != tc.query || p.fragment != tc.fragment { | ||
t.Errorf("Failed parsing URN, got %s|%s|%s|%s, expected %s|%s|%s|%s for '%s'", p.scheme, p.path, p.query, p.fragment, tc.scheme, tc.path, tc.query, tc.fragment, tc.input) | ||
} else { | ||
backToStr := p.String() | ||
|
||
if backToStr != tc.input { | ||
t.Errorf("Failed stringifying URN, got '%s', expected '%s' for %s|%s|%s|%s", backToStr, tc.input, tc.scheme, tc.path, tc.query, tc.fragment) | ||
} | ||
} | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.