Skip to content

Commit

Permalink
use xpyth-parser for parsing xpath (#91)
Browse files Browse the repository at this point in the history
* use xpyth-parser for parsing xpath

* added a comment
  • Loading branch information
bbonf authored Dec 6, 2023
1 parent 10de2e4 commit e4531f4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 8 deletions.
3 changes: 2 additions & 1 deletion backend/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ alpino-query>=2.1.10
celery[redis]>=5.2.0
mwe-query>=0.0.4
urllib3<=2.0.0
psycopg2
psycopg2
xpyth-parser
11 changes: 9 additions & 2 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ idna==3.4
iniconfig==2.0.0
# via pytest
isodate==0.6.1
# via rdflib
# via
# rdflib
# xpyth-parser
jinja2==3.1.2
# via spacy
kombu==5.3.1
Expand All @@ -114,6 +116,7 @@ lxml==4.9.3
# folia
# sastadev
# tei-reader
# xpyth-parser
markupsafe==2.1.3
# via jinja2
murmurhash==1.0.9
Expand Down Expand Up @@ -155,7 +158,9 @@ pydantic==2.1.1
pydantic-core==2.4.0
# via pydantic
pyparsing==3.1.1
# via rdflib
# via
# rdflib
# xpyth-parser
pytest==7.4.0
# via
# -r requirements.in
Expand Down Expand Up @@ -258,6 +263,8 @@ wcwidth==0.2.6
# via prompt-toolkit
xlsxwriter==3.1.2
# via sastadev
xpyth-parser==0.0.10
# via -r requirements.in

# The following packages are considered to be unsafe in a requirements file:
# setuptools
15 changes: 10 additions & 5 deletions backend/search/basex_search.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Auxiliary functions to facilitate searching in BaseX."""

import lxml.etree
import logging
import string
from io import StringIO
from typing import List

from xpyth_parser.parse import Parser
from .types import BaseXMatch, Result

log = logging.getLogger()

ALLOWED_DBNAME_CHARS = string.ascii_letters + string.digits + \
'!#$%&\'()+-=@[]^_`{}~.'
Expand All @@ -16,11 +18,14 @@
def check_xpath(xpath: str) -> bool:
"""Return True if a string is (only) a valid XPath, otherwise False."""
try:
lxml.etree.XPath(xpath)
except lxml.etree.XPathError:
return False
else:
# for some reason ".." isn't being properly parsed. since it's an abbreviation for
# parent::node(), we can replace it ahead of time
Parser(xpath.replace('..', 'parent::node()'), no_resolve=True)
return True
except:
log.exception('XPath parse exception')

return False


def check_db_name(db_name: str) -> bool:
Expand Down

0 comments on commit e4531f4

Please sign in to comment.