-
Notifications
You must be signed in to change notification settings - Fork 88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Unicode and emoji support #35
base: master
Are you sure you want to change the base?
Changes from all commits
b6e87d8
f5e9805
dc2a194
74fc794
1abc7b9
73e5f59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
module github.com/gjvnq/xpath | ||
|
||
go 1.12 | ||
|
||
require github.com/antchfx/xpath v0.0.0-20190319080838-ce1d48779e67 | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
github.com/antchfx/xpath v0.0.0-20190319080838-ce1d48779e67 h1:uj4UuiIs53RhHSySIupR1TEIouckjSfnljF3QbN1yh0= | ||
github.com/antchfx/xpath v0.0.0-20190319080838-ce1d48779e67/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ import ( | |
"fmt" | ||
"strconv" | ||
"unicode" | ||
"unicode/utf8" | ||
) | ||
|
||
// A XPath expression token type. | ||
|
@@ -329,6 +330,7 @@ func (p *parser) parsePathExpr(n node) node { | |
} | ||
} else { | ||
opnd = p.parseLocationPath(nil) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why add this empty line? |
||
} | ||
return opnd | ||
} | ||
|
@@ -363,6 +365,7 @@ func (p *parser) parseLocationPath(n node) (opnd node) { | |
p.next() | ||
opnd = newRootNode("//") | ||
opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
default: | ||
opnd = p.parseRelativeLocationPath(n) | ||
} | ||
|
@@ -649,8 +652,9 @@ func (s *scanner) nextChar() bool { | |
s.curr = rune(0) | ||
return false | ||
} | ||
s.curr = rune(s.text[s.pos]) | ||
s.pos++ | ||
size := 0 | ||
s.curr, size = utf8.DecodeRuneInString(s.text[s.pos:]) | ||
s.pos += size | ||
return true | ||
} | ||
|
||
|
@@ -770,7 +774,7 @@ func (s *scanner) scanFraction() float64 { | |
) | ||
for isDigit(s.curr) { | ||
s.nextChar() | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
} | ||
v, err := strconv.ParseFloat(s.text[i:i+c], 64) | ||
if err != nil { | ||
|
@@ -786,14 +790,14 @@ func (s *scanner) scanNumber() float64 { | |
) | ||
for isDigit(s.curr) { | ||
s.nextChar() | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
} | ||
if s.curr == '.' { | ||
s.nextChar() | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
for isDigit(s.curr) { | ||
s.nextChar() | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
} | ||
} | ||
v, err := strconv.ParseFloat(s.text[i:i+c], 64) | ||
|
@@ -814,7 +818,7 @@ func (s *scanner) scanString() string { | |
if !s.nextChar() { | ||
panic(errors.New("xpath: scanString got unclosed string")) | ||
} | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
} | ||
s.nextChar() | ||
return s.text[i : i+c] | ||
|
@@ -826,7 +830,7 @@ func (s *scanner) scanName() string { | |
i = s.pos - 1 | ||
) | ||
for isName(s.curr) { | ||
c++ | ||
c += utf8.RuneLen(s.curr) | ||
if !s.nextChar() { | ||
break | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,14 @@ func TestCompile(t *testing.T) { | |
t.Fatalf("/a/b/(c, .[not(c)]) should be correct but got error %s", err) | ||
} | ||
} | ||
|
||
func TestCompile2(t *testing.T) { | ||
_, err := Compile("//ul/li/@clášs日本語") | ||
if err != nil { | ||
t.Fatalf("//ul/li/@clášs日本語 should be correct but got error %s", err) | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While this does seem to cover the additions you made in scanner/parse.go code, it would be nice to actually have a test/tests to show the compiled utf8 friendly expr that actually works on an actual html/xml, i.e. select works. |
||
|
||
func TestSelf(t *testing.T) { | ||
testXPath(t, html, ".", "html") | ||
testXPath(t, html.FirstChild, ".", "head") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wait, why do you add go mod in this patch that has nothing to do with moving the library to go mod? Have we discussed about the module management strategy moving forward and agreed upon on this approach?