Skip to content

Commit

Permalink
Warn on implicit str concat
Browse files Browse the repository at this point in the history
Resolves   #837.
  • Loading branch information
evhub committed May 4, 2024
1 parent 42958a6 commit 89825ed
Show file tree
Hide file tree
Showing 12 changed files with 241 additions and 111 deletions.
1 change: 1 addition & 0 deletions DOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ If the `--strict` (`-s` for short) flag is enabled, Coconut will perform additio
The style issues which will cause `--strict` to throw an error are:

- mixing of tabs and spaces
- use of `"hello" "world"` implicit string concatenation (use `+` instead for clarity; Coconut will compile the `+` away)
- use of `from __future__` imports (Coconut does these automatically)
- inheriting from `object` in classes (Coconut does this automatically)
- semicolons at end of lines
Expand Down
8 changes: 4 additions & 4 deletions coconut/_pyparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
min_versions,
max_versions,
pure_python_env_var,
enable_pyparsing_warnings,
use_left_recursion_if_available,
get_bool_env_var,
use_computation_graph_env_var,
Expand Down Expand Up @@ -243,8 +242,9 @@ def enableIncremental(*args, **kwargs):
use_computation_graph_env_var,
default=(
not MODERN_PYPARSING # not yet supported
# commented out to minimize memory footprint when running tests:
# and not PYPY # experimentally determined
# technically PYPY is faster without the computation graph, but
# it breaks some features and balloons the memory footprint
# and not PYPY
),
)

Expand All @@ -265,7 +265,7 @@ def enableIncremental(*args, **kwargs):

maybe_make_safe = getattr(_pyparsing, "maybe_make_safe", None)

if enable_pyparsing_warnings:
if DEVELOP:
if MODERN_PYPARSING:
_pyparsing.enable_all_warnings()
else:
Expand Down
37 changes: 31 additions & 6 deletions coconut/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ def bind(cls):
cls.unsafe_typedef_tuple <<= attach(cls.unsafe_typedef_tuple_ref, cls.method("unsafe_typedef_tuple_handle"))
cls.impl_call <<= attach(cls.impl_call_ref, cls.method("impl_call_handle"))
cls.protocol_intersect_expr <<= attach(cls.protocol_intersect_expr_ref, cls.method("protocol_intersect_expr_handle"))
cls.and_expr <<= attach(cls.and_expr_ref, cls.method("and_expr_handle"))

# these handlers just do strict/target checking
cls.u_string <<= attach(cls.u_string_ref, cls.method("u_string_check"))
Expand Down Expand Up @@ -4567,18 +4568,40 @@ def async_with_for_stmt_handle(self, original, loc, tokens):
loop=loop
)

def string_atom_handle(self, tokens):
def string_atom_handle(self, original, loc, tokens, allow_silent_concat=False):
"""Handle concatenation of string literals."""
internal_assert(len(tokens) >= 1, "invalid string literal tokens", tokens)
if any(s.endswith(")") for s in tokens): # has .format() calls
return "(" + " + ".join(tokens) + ")"
elif any(s.startswith(("f", "rf")) for s in tokens): # has f-strings
return " ".join(tokens)
if len(tokens) == 1:
return tokens[0]
else:
return self.eval_now(" ".join(tokens))
if not allow_silent_concat:
self.strict_err_or_warn("found Python-style implicit string concatenation (use explicit '+' for clarity; Coconut will compile it away)", original, loc)
if any(s.endswith(")") for s in tokens): # has .format() calls
# parens are necessary for string_atom_handle
return "(" + " + ".join(tokens) + ")"
elif any(s.startswith(("f", "rf")) for s in tokens): # has f-strings
return " ".join(tokens)
else:
return self.eval_now(" ".join(tokens))

string_atom_handle.ignore_one_token = True

def and_expr_handle(self, original, loc, tokens):
"""Handle expressions that could be explicit string concatenation."""
item, labels = tokens[0]
out = [item]
all_items = [item]
is_str_concat = "IS_STR" in labels
for i in range(1, len(tokens), 2):
op, (item, labels) = tokens[i:i + 2]
out += [op, item]
all_items.append(item)
is_str_concat = is_str_concat and "IS_STR" in labels and op == "+"
if is_str_concat:
return self.string_atom_handle(original, loc, all_items, allow_silent_concat=True)
else:
return " ".join(out)

def unsafe_typedef_tuple_handle(self, original, loc, tokens):
"""Handle Tuples in typedefs."""
tuple_items = self.testlist_star_expr_handle(original, loc, tokens)
Expand All @@ -4595,6 +4618,8 @@ def term_handle(self, tokens):
out += [op, term]
return " ".join(out)

term_handle.ignore_one_token = True

def impl_call_handle(self, loc, tokens):
"""Process implicit function application or coefficient syntax."""
internal_assert(len(tokens) >= 2, "invalid implicit call / coefficient tokens", tokens)
Expand Down
28 changes: 10 additions & 18 deletions coconut/compiler/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
using_fast_grammar_methods,
disambiguate_literal,
any_of,
add_labels,
)


Expand Down Expand Up @@ -592,15 +593,6 @@ def join_match_funcdef(tokens):
)


def kwd_err_msg_handle(tokens):
"""Handle keyword parse error messages."""
kwd, = tokens
if kwd == "def":
return "invalid function definition"
else:
return 'invalid use of the keyword "' + kwd + '"'


def alt_ternary_handle(tokens):
"""Handle if ... then ... else ternary operator."""
cond, if_true, if_false = tokens
Expand Down Expand Up @@ -1378,7 +1370,8 @@ class Grammar(object):
# for known_atom, type should be known at compile time
known_atom = (
const_atom
| string_atom
# IS_STR is used by and_expr_handle
| string_atom("IS_STR")
| list_item
| dict_literal
| dict_comp
Expand Down Expand Up @@ -1582,14 +1575,13 @@ class Grammar(object):
# arith_expr = exprlist(term, addop)
# shift_expr = exprlist(arith_expr, shift)
# and_expr = exprlist(shift_expr, amp)
and_expr = exprlist(
term,
any_of(
addop,
shift,
amp,
),
term_op = any_of(
addop,
shift,
amp,
)
and_expr = Forward()
and_expr_ref = tokenlist(attach(term, add_labels), term_op, allow_trailing=False, suppress=False)

protocol_intersect_expr = Forward()
protocol_intersect_expr_ref = tokenlist(and_expr, amp_colon, allow_trailing=False)
Expand Down Expand Up @@ -2863,7 +2855,7 @@ class Grammar(object):
"misplaced '?' (naked '?' is only supported inside partial application arguments)",
)
| fixto(Optional(keyword("if") + skip_to_in_line(unsafe_equals)) + equals, "misplaced assignment (maybe should be '==')")
| attach(any_keyword_in(keyword_vars + reserved_vars), kwd_err_msg_handle)
| fixto(keyword("def"), "invalid function definition")
| fixto(end_of_line, "misplaced newline (maybe missing ':')")
)

Expand Down
52 changes: 29 additions & 23 deletions coconut/compiler/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,22 @@ def proc_sequence_match(self, tokens, iter_match=False):
elif "elem" in group:
group_type = "elem_matches"
group_contents = group
# must check for f_string before string, since a mixture will be tagged as both
elif "f_string" in group:
group_type = "f_string"
# f strings are always unicode
if seq_type is None:
seq_type = '"'
elif seq_type != '"':
raise CoconutDeferredSyntaxError("string literals and byte literals cannot be mixed in string patterns", self.loc)
for str_literal in group:
if str_literal.startswith("b"):
raise CoconutDeferredSyntaxError("string literals and byte literals cannot be mixed in string patterns", self.loc)
if len(group) == 1:
str_item = group[0]
else:
str_item = self.comp.string_atom_handle(self.original, self.loc, group, allow_silent_concat=True)
group_contents = (str_item, "_coconut.len(" + str_item + ")")
elif "string" in group:
group_type = "string"
for str_literal in group:
Expand All @@ -655,16 +671,6 @@ def proc_sequence_match(self, tokens, iter_match=False):
else:
str_item = self.comp.eval_now(" ".join(group))
group_contents = (str_item, len(self.comp.literal_eval(str_item)))
elif "f_string" in group:
group_type = "f_string"
# f strings are always unicode
if seq_type is None:
seq_type = '"'
elif seq_type != '"':
raise CoconutDeferredSyntaxError("string literals and byte literals cannot be mixed in string patterns", self.loc)
internal_assert(len(group) == 1, "invalid f string sequence match group", group)
str_item = group[0]
group_contents = (str_item, "_coconut.len(" + str_item + ")")
else:
raise CoconutInternalException("invalid sequence match group", group)
seq_groups.append((group_type, group_contents))
Expand All @@ -682,12 +688,12 @@ def handle_sequence(self, seq_type, seq_groups, item, iter_match=False):
bounded = False
elif gtype == "elem_matches":
min_len_int += len(gcontents)
elif gtype == "string":
str_item, str_len = gcontents
min_len_int += str_len
elif gtype == "f_string":
str_item, str_len = gcontents
min_len_strs.append(str_len)
elif gtype == "string":
str_item, str_len = gcontents
min_len_int += str_len
else:
raise CoconutInternalException("invalid sequence match group type", gtype)
min_len = add_int_and_strs(min_len_int, min_len_strs)
Expand All @@ -711,17 +717,17 @@ def handle_sequence(self, seq_type, seq_groups, item, iter_match=False):
self.add_check("_coconut.len(" + head_var + ") == " + str(len(matches)))
self.match_all_in(matches, head_var)
start_ind_int += len(matches)
elif seq_groups[0][0] == "f_string":
internal_assert(not iter_match, "cannot be both f string and iter match")
_, (str_item, str_len) = seq_groups.pop(0)
self.add_check(item + ".startswith(" + str_item + ")")
start_ind_strs.append(str_len)
elif seq_groups[0][0] == "string":
internal_assert(not iter_match, "cannot be both string and iter match")
_, (str_item, str_len) = seq_groups.pop(0)
if str_len > 0:
self.add_check(item + ".startswith(" + str_item + ")")
start_ind_int += str_len
elif seq_groups[0][0] == "f_string":
internal_assert(not iter_match, "cannot be both f string and iter match")
_, (str_item, str_len) = seq_groups.pop(0)
self.add_check(item + ".startswith(" + str_item + ")")
start_ind_strs.append(str_len)
if not seq_groups:
return
start_ind = add_int_and_strs(start_ind_int, start_ind_strs)
Expand All @@ -735,17 +741,17 @@ def handle_sequence(self, seq_type, seq_groups, item, iter_match=False):
for i, match in enumerate(matches):
self.match(match, item + "[-" + str(len(matches) - i) + "]")
last_ind_int -= len(matches)
elif seq_groups[-1][0] == "f_string":
internal_assert(not iter_match, "cannot be both f string and iter match")
_, (str_item, str_len) = seq_groups.pop()
self.add_check(item + ".endswith(" + str_item + ")")
last_ind_strs.append("-" + str_len)
elif seq_groups[-1][0] == "string":
internal_assert(not iter_match, "cannot be both string and iter match")
_, (str_item, str_len) = seq_groups.pop()
if str_len > 0:
self.add_check(item + ".endswith(" + str_item + ")")
last_ind_int -= str_len
elif seq_groups[-1][0] == "f_string":
internal_assert(not iter_match, "cannot be both f string and iter match")
_, (str_item, str_len) = seq_groups.pop()
self.add_check(item + ".endswith(" + str_item + ")")
last_ind_strs.append("-" + str_len)
if not seq_groups:
return
last_ind = add_int_and_strs(last_ind_int, last_ind_strs)
Expand Down
Loading

0 comments on commit 89825ed

Please sign in to comment.