From e4531f40abfbf214202456bc13e21c0296747448 Mon Sep 17 00:00:00 2001 From: Ben Bonfil Date: Wed, 6 Dec 2023 11:29:44 +0100 Subject: [PATCH] use xpyth-parser for parsing xpath (#91) * use xpyth-parser for parsing xpath * added a comment --- backend/requirements.in | 3 ++- backend/requirements.txt | 11 +++++++++-- backend/search/basex_search.py | 15 ++++++++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/backend/requirements.in b/backend/requirements.in index 59f41045..aea877fc 100644 --- a/backend/requirements.in +++ b/backend/requirements.in @@ -11,4 +11,5 @@ alpino-query>=2.1.10 celery[redis]>=5.2.0 mwe-query>=0.0.4 urllib3<=2.0.0 -psycopg2 \ No newline at end of file +psycopg2 +xpyth-parser \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 23f11163..1adf1d39 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -101,7 +101,9 @@ idna==3.4 iniconfig==2.0.0 # via pytest isodate==0.6.1 - # via rdflib + # via + # rdflib + # xpyth-parser jinja2==3.1.2 # via spacy kombu==5.3.1 @@ -114,6 +116,7 @@ lxml==4.9.3 # folia # sastadev # tei-reader + # xpyth-parser markupsafe==2.1.3 # via jinja2 murmurhash==1.0.9 @@ -155,7 +158,9 @@ pydantic==2.1.1 pydantic-core==2.4.0 # via pydantic pyparsing==3.1.1 - # via rdflib + # via + # rdflib + # xpyth-parser pytest==7.4.0 # via # -r requirements.in @@ -258,6 +263,8 @@ wcwidth==0.2.6 # via prompt-toolkit xlsxwriter==3.1.2 # via sastadev +xpyth-parser==0.0.10 + # via -r requirements.in # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/backend/search/basex_search.py b/backend/search/basex_search.py index 996fe3fe..91fa97cf 100644 --- a/backend/search/basex_search.py +++ b/backend/search/basex_search.py @@ -1,12 +1,14 @@ """Auxiliary functions to facilitate searching in BaseX.""" import lxml.etree +import logging import string from io import StringIO from typing import List - +from xpyth_parser.parse import Parser from .types import BaseXMatch, Result +log = logging.getLogger() ALLOWED_DBNAME_CHARS = string.ascii_letters + string.digits + \ '!#$%&\'()+-=@[]^_`{}~.' @@ -16,11 +18,14 @@ def check_xpath(xpath: str) -> bool: """Return True if a string is (only) a valid XPath, otherwise False.""" try: - lxml.etree.XPath(xpath) - except lxml.etree.XPathError: - return False - else: + # for some reason ".." isn't being properly parsed. since it's an abbreviation for + # parent::node(), we can replace it ahead of time + Parser(xpath.replace('..', 'parent::node()'), no_resolve=True) return True + except: + log.exception('XPath parse exception') + + return False def check_db_name(db_name: str) -> bool: