Skip to content

Commit

Permalink
Merge pull request #60 from alaviss/improve-grammar
Browse files Browse the repository at this point in the history
improve grammar parsing capability

- Unicode is back 🎉 
- Tuple and Generic parameters are now allowed to have empty bodies
- Term rewriting pattern syntax is fully implemented
  • Loading branch information
alaviss authored Oct 22, 2023
2 parents 6e7f09a + c9dd5ef commit bbbbf39
Show file tree
Hide file tree
Showing 7 changed files with 1,361,063 additions and 1,358,919 deletions.
72 changes: 64 additions & 8 deletions corpus/declarations.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ l_e_t
b: uint32 = 42
(e, f) = (1, 2)

let 😂 = fun

--------------------------------------------------------------------------------

(source_file
Expand Down Expand Up @@ -221,7 +223,13 @@ l_e_t
name: (identifier))))
value: (tuple_construction
(integer_literal)
(integer_literal)))))
(integer_literal))))
(let_section
(variable_declaration
(symbol_declaration_list
(symbol_declaration
name: (identifier)))
value: (identifier))))

================================================================================
Var declarations
Expand Down Expand Up @@ -912,6 +920,15 @@ proc trailing(x,)

proc trailing_fun(x, y,: float)

proc generic[]() = discard

template foo{x; y += 10}() = discard

template foo{
var x = 100;
x * x + 100
}() = discard

--------------------------------------------------------------------------------

(source_file
Expand All @@ -930,12 +947,13 @@ proc trailing_fun(x, y,: float)
(template_declaration
name: (identifier)
rewrite_pattern: (term_rewriting_pattern
(call
function: (accent_quoted
(identifier))
(argument_list
(identifier)
(integer_literal))))
(statement_list
(call
function: (accent_quoted
(identifier))
(argument_list
(identifier)
(integer_literal)))))
parameters: (parameter_declaration_list
(parameter_declaration
(symbol_declaration_list
Expand Down Expand Up @@ -1050,7 +1068,45 @@ proc trailing_fun(x, y,: float)
(symbol_declaration
name: (identifier)))
type: (type_expression
(identifier))))))
(identifier)))))
(proc_declaration
name: (identifier)
generic_parameters: (generic_parameter_list)
parameters: (parameter_declaration_list)
body: (statement_list
(discard_statement)))
(template_declaration
name: (identifier)
rewrite_pattern: (term_rewriting_pattern
(statement_list
(identifier)
(infix_expression
left: (identifier)
operator: (operator)
right: (integer_literal))))
parameters: (parameter_declaration_list)
body: (statement_list
(discard_statement)))
(template_declaration
name: (identifier)
rewrite_pattern: (term_rewriting_pattern
(statement_list
(var_section
(variable_declaration
(symbol_declaration_list
(symbol_declaration
name: (identifier)))
value: (integer_literal)))
(infix_expression
left: (infix_expression
left: (identifier)
operator: (operator)
right: (identifier))
operator: (operator)
right: (integer_literal))))
parameters: (parameter_declaration_list)
body: (statement_list
(discard_statement))))

================================================================================
Enum declarations
Expand Down
26 changes: 24 additions & 2 deletions corpus/expressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ echo $a

*:bar

±10

--------------------------------------------------------------------------------

(source_file
Expand Down Expand Up @@ -103,7 +105,10 @@ echo $a
(prefix_expression
operator: (operator)
(ERROR)
(identifier)))
(identifier))
(prefix_expression
operator: (operator)
(integer_literal)))

================================================================================
Infix expressions
Expand Down Expand Up @@ -142,6 +147,9 @@ else: 4

1+ 1

1 * 2 ⊛ 4 ∨ 5


--------------------------------------------------------------------------------

(source_file
Expand Down Expand Up @@ -255,6 +263,16 @@ else: 4
(infix_expression
left: (integer_literal)
operator: (operator)
right: (integer_literal))
(infix_expression
left: (infix_expression
left: (infix_expression
left: (integer_literal)
operator: (operator)
right: (integer_literal))
operator: (operator)
right: (integer_literal))
operator: (operator)
right: (integer_literal)))

================================================================================
Expand Down Expand Up @@ -1436,6 +1454,8 @@ ptr ptr char

(var int, var int)

tuple[]

--------------------------------------------------------------------------------

(source_file
Expand All @@ -1456,7 +1476,9 @@ ptr ptr char
(identifier)))
(var_type
(type_expression
(identifier)))))
(identifier))))
(tuple_type
(field_declaration_list)))

================================================================================
Routine expressions
Expand Down
90 changes: 44 additions & 46 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ const NumericLiteral = token(
)
)
);
const Identifier = /[a-zA-Z\x80-\xff](_?[a-zA-Z0-9\x80-\xff])*/;

// Nim doesn't define any unicode categories, so let's just pick them all
const Identifier =
/[a-zA-Z\U00000080-\U0010FFFF&&[^∙∘×★⊗⊘⊙⊛⊠⊡∩∧⊓±⊕⊖⊞⊟∪∨⊔]](_?[a-zA-Z0-9\U00000080-\U0010FFFF&&[^∙∘×★⊗⊘⊙⊛⊠⊡∩∧⊓±⊕⊖⊞⊟∪∨⊔]])*/;
const CharEscapeSequence = /[rRcCnNlLfFtTvV\\"'aAbBeE]|\d+|[xX][0-9a-fA-F]{2}/;

const Templates = {
Expand Down Expand Up @@ -103,27 +106,27 @@ const Operators = [
".",
":",
"\\",
// "∙",
// "∘",
// "×",
// "★",
// "⊗",
// "⊘",
// "⊙",
// "⊛",
// "⊠",
// "⊡",
// "∩",
// "∧",
// "⊓",
// "±",
// "⊕",
// "⊖",
// "⊞",
// "⊟",
// "∪",
// "∨",
// "⊔",
"∙",
"∘",
"×",
"★",
"⊗",
"⊘",
"⊙",
"⊛",
"⊠",
"⊡",
"∩",
"∧",
"⊓",
"±",
"⊕",
"⊖",
"⊞",
"⊟",
"∪",
"∨",
"⊔",
];
const InfixOperators = {
R10: token(seq("^", repeat(choice(...Operators)))),
Expand All @@ -133,34 +136,28 @@ const InfixOperators = {
choice(
"%",
"\\",
"/"
// "∙",
// "∘",
// "×",
// "★",
// "⊗",
// "⊘",
// "⊙",
// "⊛",
// "⊠",
// "⊡",
// "∩",
// "∧",
// "⊓"
"/",
"∙",
"∘",
"×",
"★",
"⊗",
"⊘",
"⊙",
"⊛",
"⊠",
"⊡",
"∩",
"∧",
"⊓"
),
repeat(choice(...Operators))
)
),
L9Star: token(seq("*", repeat(choice(...Operators.filter(x => x != ":"))))),
L8: token(
seq(
choice(
"+",
"-",
"~",
"|"
// "±", "⊕", "⊖", "⊞", "⊟", "∪", "∨", "⊔"
),
choice("+", "-", "~", "|", "±", "⊕", "⊖", "⊞", "⊟", "∪", "∨", "⊔"),
repeat(choice(...Operators))
)
),
Expand Down Expand Up @@ -521,8 +518,9 @@ module.exports = grammar({
field("body", optional(seq("=", $.statement_list)))
),
generic_parameter_list: $ =>
seq("[", $._parameter_declaration_list, $._bracket_close),
term_rewriting_pattern: $ => seq("{", $._statement, $._curly_close),
seq("[", optional($._parameter_declaration_list), $._bracket_close),
term_rewriting_pattern: $ =>
seq("{", alias($._semi_statement_list, $.statement_list), $._curly_close),

using_section: $ => seq(keyword("using"), $._variable_declaration_section),
const_section: $ => seq(keyword("const"), $._variable_declaration_section),
Expand Down Expand Up @@ -1064,7 +1062,7 @@ module.exports = grammar({
_tuple_field_declaration_list: $ =>
seq(
choice("[", token.immediate("[")),
$._field_declaration_list,
optional($._field_declaration_list),
$._bracket_close
),

Expand Down
Loading

0 comments on commit bbbbf39

Please sign in to comment.