Skip to content

Commit

Permalink
Optimize with new cPyparsing
Browse files Browse the repository at this point in the history
  • Loading branch information
evhub committed May 26, 2024
1 parent e2d8a20 commit e2ccf35
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 60 deletions.
18 changes: 1 addition & 17 deletions coconut/_pyparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from coconut.root import * # NOQA

import os
import re
import sys
import traceback
from warnings import warn
Expand Down Expand Up @@ -146,6 +145,7 @@
# -----------------------------------------------------------------------------------------------------------------------

if MODERN_PYPARSING:
ParserElement.leaveWhitespace = ParserElement.leave_whitespace
SUPPORTS_PACKRAT_CONTEXT = False

elif CPYPARSING:
Expand Down Expand Up @@ -290,22 +290,6 @@ def enableIncremental(*args, **kwargs):
all_parse_elements = None


# -----------------------------------------------------------------------------------------------------------------------
# MISSING OBJECTS:
# -----------------------------------------------------------------------------------------------------------------------

python_quoted_string = getattr(_pyparsing, "python_quoted_string", None)
if python_quoted_string is None:
python_quoted_string = _pyparsing.Combine(
# multiline strings must come first
(_pyparsing.Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').setName("multiline double quoted string")
| (_pyparsing.Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''").setName("multiline single quoted string")
| (_pyparsing.Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("double quoted string")
| (_pyparsing.Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("single quoted string")
).setName("Python quoted string")
_pyparsing.python_quoted_string = python_quoted_string


# -----------------------------------------------------------------------------------------------------------------------
# FAST REPRS:
# -----------------------------------------------------------------------------------------------------------------------
Expand Down
11 changes: 6 additions & 5 deletions coconut/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@
manage,
sub_all,
ComputationNode,
StartOfStrGrammar,
)
from coconut.compiler.header import (
minify_header,
Expand Down Expand Up @@ -1305,7 +1306,7 @@ def streamline(self, grammars, inputstring=None, force=False, inner=False):
input_len = 0 if inputstring is None else len(inputstring)
if force or (streamline_grammar_for_len is not None and input_len > streamline_grammar_for_len):
start_time = get_clock_time()
prep_grammar(grammar, streamline=True)
prep_grammar(grammar, for_scan=False, streamline=True)
logger.log_lambda(
lambda: "Streamlined {grammar} in {time} seconds{info}.".format(
grammar=get_name(grammar),
Expand Down Expand Up @@ -1502,7 +1503,7 @@ def str_proc(self, inputstring, **kwargs):
hold["exprs"][-1] += c
elif hold["paren_level"] > 0:
raise self.make_err(CoconutSyntaxError, "imbalanced parentheses in format string expression", inputstring, i, reformat=False)
elif match_in(self.end_f_str_expr, remaining_text):
elif does_parse(self.end_f_str_expr, remaining_text):
hold["in_expr"] = False
hold["str_parts"].append(c)
else:
Expand Down Expand Up @@ -2128,11 +2129,11 @@ def tre_return_handle(loc, tokens):
type_ignore=self.type_ignore_comment(),
)
self.tre_func_name <<= base_keyword(func_name).suppress()
return attach(
self.tre_return,
return StartOfStrGrammar(attach(
self.tre_return_base,
tre_return_handle,
greedy=True,
)
))

def detect_is_gen(self, raw_lines):
"""Determine if the given function code is for a generator."""
Expand Down
71 changes: 38 additions & 33 deletions coconut/compiler/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,13 @@
Optional,
ParserElement,
StringEnd,
StringStart,
Word,
ZeroOrMore,
hexnums,
nums,
originalTextFor,
nestedExpr,
FollowedBy,
python_quoted_string,
restOfLine,
)

Expand Down Expand Up @@ -119,6 +117,7 @@
using_fast_grammar_methods,
disambiguate_literal,
any_of,
StartOfStrGrammar,
)


Expand Down Expand Up @@ -924,7 +923,6 @@ class Grammar(object):
# rparen handles simple stmts ending parenthesized stmt lambdas
end_simple_stmt_item = FollowedBy(newline | semicolon | rparen)

start_marker = StringStart()
moduledoc_marker = condense(ZeroOrMore(lineitem) - Optional(moduledoc_item))
end_marker = StringEnd()
indent = Literal(openindent)
Expand Down Expand Up @@ -2669,19 +2667,19 @@ class Grammar(object):
line = newline | stmt

file_input = condense(moduledoc_marker - ZeroOrMore(line))
raw_file_parser = start_marker - file_input - end_marker
raw_file_parser = StartOfStrGrammar(file_input - end_marker)
line_by_line_file_parser = (
start_marker - moduledoc_marker - stores_loc_item,
start_marker - line - stores_loc_item,
StartOfStrGrammar(moduledoc_marker - stores_loc_item),
StartOfStrGrammar(line - stores_loc_item),
)
file_parser = line_by_line_file_parser if USE_LINE_BY_LINE else raw_file_parser

single_input = condense(Optional(line) - ZeroOrMore(newline))
eval_input = condense(testlist - ZeroOrMore(newline))

single_parser = start_marker - single_input - end_marker
eval_parser = start_marker - eval_input - end_marker
some_eval_parser = start_marker + eval_input
single_parser = StartOfStrGrammar(single_input - end_marker)
eval_parser = StartOfStrGrammar(eval_input - end_marker)
some_eval_parser = StartOfStrGrammar(eval_input)

parens = originalTextFor(nestedExpr("(", ")", ignoreExpr=None))
brackets = originalTextFor(nestedExpr("[", "]", ignoreExpr=None))
Expand All @@ -2699,15 +2697,16 @@ class Grammar(object):
)
)
unsafe_xonsh_parser, _impl_call_ref = disable_inside(
single_parser,
single_input - end_marker,
unsafe_impl_call_ref,
)
impl_call_ref <<= _impl_call_ref
xonsh_parser, _anything_stmt, _xonsh_command = disable_outside(
_xonsh_parser, _anything_stmt, _xonsh_command = disable_outside(
unsafe_xonsh_parser,
unsafe_anything_stmt,
unsafe_xonsh_command,
)
xonsh_parser = StartOfStrGrammar(_xonsh_parser)
anything_stmt <<= _anything_stmt
xonsh_command <<= _xonsh_command

Expand All @@ -2731,7 +2730,7 @@ class Grammar(object):

noqa_regex = compile_regex(r"\b[Nn][Oo][Qq][Aa]\b")

just_non_none_atom = start_marker + ~keyword("None") + known_atom + end_marker
just_non_none_atom = StartOfStrGrammar(~keyword("None") + known_atom + end_marker)

original_function_call_tokens = (
lparen.suppress() + rparen.suppress()
Expand All @@ -2741,19 +2740,17 @@ class Grammar(object):
)

tre_func_name = Forward()
tre_return = (
start_marker
+ keyword("return").suppress()
tre_return_base = (
keyword("return").suppress()
+ maybeparens(
lparen,
tre_func_name + original_function_call_tokens,
rparen,
) + end_marker
)

tco_return = attach(
start_marker
+ keyword("return").suppress()
tco_return = StartOfStrGrammar(attach(
keyword("return").suppress()
+ maybeparens(
lparen,
disallow_keywords(untcoable_funcs, with_suffix="(")
Expand All @@ -2778,7 +2775,7 @@ class Grammar(object):
tco_return_handle,
# this is the root in what it's used for, so might as well evaluate greedily
greedy=True,
)
))

rest_of_lambda = Forward()
lambdas = keyword("lambda") - rest_of_lambda - colon
Expand Down Expand Up @@ -2818,9 +2815,8 @@ class Grammar(object):
))
)

split_func = (
start_marker
- keyword("def").suppress()
split_func = StartOfStrGrammar(
keyword("def").suppress()
- unsafe_dotted_name
- Optional(brackets).suppress()
- lparen.suppress()
Expand All @@ -2834,13 +2830,13 @@ class Grammar(object):
| ~indent + ~dedent + any_char + keyword("for") + unsafe_name + keyword("in")
)

just_a_string = start_marker + string_atom + end_marker
just_a_string = StartOfStrGrammar(string_atom + end_marker)

end_of_line = end_marker | Literal("\n") | pound

unsafe_equals = Literal("=")

parse_err_msg = start_marker + (
parse_err_msg = StartOfStrGrammar(
# should be in order of most likely to actually be the source of the error first
fixto(
ZeroOrMore(~questionmark + ~Literal("\n") + any_char)
Expand All @@ -2859,22 +2855,31 @@ class Grammar(object):
start_f_str_regex = compile_regex(r"\br?fr?$")
start_f_str_regex_len = 4

end_f_str_expr = combine(start_marker + (rbrace | colon | bang))
end_f_str_expr = StartOfStrGrammar(combine(rbrace | colon | bang).leaveWhitespace())

python_quoted_string = regex_item(
# multiline strings must come first
r'"""(?:[^"\\]|\n|""(?!")|"(?!"")|\\.)*"""'
r"|'''(?:[^'\\]|\n|''(?!')|'(?!'')|\\.)*'''"
r'|"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*"'
r"|'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*'"
)

string_start = start_marker + python_quoted_string
string_start = StartOfStrGrammar(python_quoted_string)

no_unquoted_newlines = start_marker + ZeroOrMore(python_quoted_string | ~Literal("\n") + any_char) + end_marker
no_unquoted_newlines = StartOfStrGrammar(
ZeroOrMore(python_quoted_string | ~Literal("\n") + any_char)
+ end_marker
)

operator_stmt = (
start_marker
+ keyword("operator").suppress()
operator_stmt = StartOfStrGrammar(
keyword("operator").suppress()
+ restOfLine
)

unsafe_import_from_name = condense(ZeroOrMore(unsafe_dot) + unsafe_dotted_name | OneOrMore(unsafe_dot))
from_import_operator = (
start_marker
+ keyword("from").suppress()
from_import_operator = StartOfStrGrammar(
keyword("from").suppress()
+ unsafe_import_from_name
+ keyword("import").suppress()
+ keyword("operator").suppress()
Expand Down
40 changes: 36 additions & 4 deletions coconut/compiler/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
ParserElement,
MatchFirst,
And,
StringStart,
_trim_arity,
_ParseResultsWithOffset,
all_parse_elements,
Expand Down Expand Up @@ -610,8 +611,31 @@ def parsing_context(inner_parse=None):
yield


def prep_grammar(grammar, streamline=False):
class StartOfStrGrammar(object):
"""A container object that denotes grammars that should always be parsed at the start of the string."""
__slots__ = ("grammar",)
start_marker = StringStart()

def __init__(self, grammar):
self.grammar = grammar

def with_start_marker(self):
"""Get the grammar with the start marker."""
internal_assert(not CPYPARSING, "StartOfStrGrammar.with_start_marker() should only be necessary without cPyparsing")
return self.start_marker + self.grammar

@property
def name(self):
return get_name(self.grammar)


def prep_grammar(grammar, for_scan, streamline=False):
"""Prepare a grammar item to be used as the root of a parse."""
if isinstance(grammar, StartOfStrGrammar):
if for_scan:
grammar = grammar.with_start_marker()
else:
grammar = grammar.grammar
grammar = trace(grammar)
if streamline:
grammar.streamlined = False
Expand All @@ -624,7 +648,7 @@ def prep_grammar(grammar, streamline=False):
def parse(grammar, text, inner=None, eval_parse_tree=True):
"""Parse text using grammar."""
with parsing_context(inner):
result = prep_grammar(grammar).parseString(text)
result = prep_grammar(grammar, for_scan=False).parseString(text)
if eval_parse_tree:
result = unpack(result)
return result
Expand All @@ -645,8 +669,12 @@ def does_parse(grammar, text, inner=None):

def all_matches(grammar, text, inner=None, eval_parse_tree=True):
"""Find all matches for grammar in text."""
kwargs = {}
if CPYPARSING and isinstance(grammar, StartOfStrGrammar):
grammar = grammar.grammar
kwargs["maxStartLoc"] = 0
with parsing_context(inner):
for tokens, start, stop in prep_grammar(grammar).scanString(text):
for tokens, start, stop in prep_grammar(grammar, for_scan=True).scanString(text, **kwargs):
if eval_parse_tree:
tokens = unpack(tokens)
yield tokens, start, stop
Expand All @@ -668,8 +696,12 @@ def match_in(grammar, text, inner=None):

def transform(grammar, text, inner=None):
"""Transform text by replacing matches to grammar."""
kwargs = {}
if CPYPARSING and isinstance(grammar, StartOfStrGrammar):
grammar = grammar.grammar
kwargs["maxStartLoc"] = 0
with parsing_context(inner):
result = prep_grammar(add_action(grammar, unpack)).transformString(text)
result = prep_grammar(add_action(grammar, unpack), for_scan=True).transformString(text, **kwargs)
if result == text:
result = None
return result
Expand Down
2 changes: 1 addition & 1 deletion coconut/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ def get_path_env_var(env_var, default):

# min versions are inclusive
unpinned_min_versions = {
"cPyparsing": (2, 4, 7, 2, 3, 2),
"cPyparsing": (2, 4, 7, 2, 3, 3),
("pre-commit", "py3"): (3,),
("psutil", "py>=27"): (5,),
"jupyter": (1, 0),
Expand Down

0 comments on commit e2ccf35

Please sign in to comment.