From d6ce10891b6914dc5f3d5c19794ac038e30b5a57 Mon Sep 17 00:00:00 2001 From: dragoncoder047 <101021094+dragoncoder047@users.noreply.github.com> Date: Thu, 21 Mar 2024 15:12:04 +0000 Subject: [PATCH] add subexpression parser --- pickle.cpp | 122 ++++++++++++++++++++++++++++++++++---------- pickle_test.cpp | 2 +- test/out32.txt | 6 +-- test/out64.txt | 6 +-- test/valgrind32.txt | 28 +++++----- test/valgrind64.txt | 28 +++++----- 6 files changed, 131 insertions(+), 61 deletions(-) diff --git a/pickle.cpp b/pickle.cpp index 51eb711..e9d3d94 100644 --- a/pickle.cpp +++ b/pickle.cpp @@ -172,35 +172,40 @@ static void bufcat(char** b, const char* c, int n) { asprintf(b, "%s%.*s", *b ? *b : "", n, c); free(ob); } +static char revparen(char p) { + const char* a = "([{}])"; + const char* b = ")]}{[("; + return b[strchr(a, p) - a]; +} - -static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { +static object* do_parse(pvm* vm, pstate* s, object** errors, char* special) { char c = look; char* b = NULL; char* b2 = NULL; object* result = nil; if (isalpha(c)) { + DBG("symbol"); size_t p = pos; while (!eofp && test(isalpha)) next; bufcat(&b, at(p), pos - p); result = vm->sym(b); } else if (isdigit(c)) { + DBG("number"); double d; int64_t n; int num; int ok = sscanf(here, "%lg%n", &d, &num); - if (ok == 2) result = vm->number(d); + if (ok) result = vm->number(d); else { ok = sscanf(here, "%" SCNi64 "%n", &n, &num); - if (ok == 2) result = vm->integer(n); - else { - *error = true; - result = vm->string("scanf error"); - } + if (ok) result = vm->integer(n); + else vm->push(vm->string("scanf error"), *errors); } - if (ok == 2) advance num; + DBG("ok = %i", ok); + if (ok) advance num; } else if (isspace(c) && c != '\n') { + DBG("small space"); result = vm->sym("SPACE"); while (test(isspace) && c != '\n') next; } @@ -208,23 +213,25 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { // get comment or 1-character # operator next; if (look != '#') { + DBG("hash"); // it's a # operator result = vm->sym("#"); } else { next; if (look != '#') { + DBG("line comment"); // it's a line comment do bufadd(&b, look), next; while (look != '\n'); result = vm->string(b); } else { + DBG("block comment"); // it's a block comment bufcat(&b2, "###", 3); next; while (look == '#') bufadd(&b2, '#'), next; do bufadd(&b, look), next; while (!eofp && !chomp(b2)); if (eofp) { - *error = true; - result = vm->string("error: unterminated block comment"); + vm->push(vm->string("unterminated block comment"), *errors); goto done; } result = vm->string(b); @@ -232,9 +239,11 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { } } else if (c == '"' || c == '\'') { + DBG("quote string"); char start = c; next; while (look != start && !eofp && look != '\n') { + // TODO: paren stack for nested interpolations (like PEP 701) char ch = look; if (ch == '\\') { next; @@ -244,12 +253,12 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { next; } if (look != start) { - *error = true; - result = vm->string("error: unclosed string"); + vm->push(vm->string("unclosed string"), *errors); } else result = vm->string(b); } else if (c == '\n') { + DBG("block string"); getindent: // parser block next; // eat newline @@ -258,19 +267,25 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { next; } if (look == '\n') { + // Blank line free(b2); b2 = NULL; goto getindent; } + if (!b2 || !strlen(b2)) { + // no indent + result = vm->sym("NEWLINE"); + goto done; + } // validate indent for (char* c = b2; *c; c++) { if (*c != *b2) { - *error = true; - result = vm->string("error: mix of spaces and tabs indenting block"); + vm->push(vm->string("mix of spaces and tabs indenting block"), *errors); goto done; } } for (;;) { + // TODO: stop chomping with parens // get one line do bufadd(&b, look), next; while (!eofp && look != '\n'); bufadd(&b, '\n'); @@ -288,8 +303,7 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { } // not a blank line if (has_indent) { - result = vm->string("error: unindent does not match previous indent"); - *error = true; + vm->push(vm->string("unindent does not match previous indent"), *errors); goto done; } // completely unindented @@ -298,17 +312,74 @@ static object* do_parse(pvm* vm, pstate* s, bool* error, char* special) { } result = vm->string(b); } + else if (c == '{') { + DBG("curly string"); + object* stack = vm->cons(vm->integer((int64_t)c), nil); + next; + while (!eofp) { + char ch = look; + next; + // Check for open parens + if (strchr("([{", ch)) { + vm->push(vm->integer((int64_t)ch), stack); + } + // Check for close parens + if (strchr("}])", ch)) { + char got_opener = revparen(ch); + object* top = vm->pop(stack); + if (!top) { + vm->push(vm->string("internal parser error (curlystack empty)"), *errors); + } else { + char expected_opener = (char)vm->intof(top); + if (got_opener != expected_opener) { + vm->push(vm->string("mismatched paren"), *errors); + } + } + if (!stack) break; + } + bufadd(&b, ch); + } + if (eofp) { + vm->push(vm->string("unclosed curly string"), *errors); + } + else result = vm->string(b); + } + else if (c == '(' || c == '[') { + DBG("subexpression"); + // Subexpressions and lists are treated the same right now. + // TODO: fix this + object** tail = &result; + next; + char close = revparen(c); + do { + object* item = do_parse(vm, s, errors, special); + if (*special) { + if (*special == close) goto closed; + vm->push(vm->string("mismatched paren"), *errors); + } + *tail = vm->cons(item, nil); + tail = &cdr(*tail); + } while (!eofp); + vm->push(vm->string("unclosed subexpression"), *errors); + closed: + *special = 0; + } else if (strchr("(){}[]", c)) { + DBG("other special"); + next; *special = c; } else if (ispunct(c)) { + DBG("punctuation symbol"); // must test for other punctuation last to allow other special cases to take precedence + next; bufadd(&b, c); result = vm->sym(b); } else { - *error = true; - result = vm->string("unknown parser error"); + DBG("other crap"); + next; + vm->push(vm->string("unexpected crap"), *errors); } done: free(b); @@ -322,20 +393,19 @@ object* parse(pvm* vm, object* cookie, object* inst_type) { DBG("parsing"); object* string = vm->pop(); if (string->type != &string_type) { - vm->push_data(vm->string("error: non string to parse()")); + vm->push_data(vm->cons(vm->string("non string to parse()"), nil)); return vm->sym("error"); } const char* str = vm->stringof(string); pstate s = { .data = str, .i = 0, .len = strlen(str) }; - bool error = false; char special = 0; - object* result = do_parse(vm, &s, &error, &special); + object* errors = nil; + object* result = do_parse(vm, &s, &errors, &special); if (special) { - result = vm->string("unknown syntax error"); - error = true; + vm->push(vm->string("unknown syntax error"), errors); } - vm->push_data(result); - return error ? vm->sym("error") : nil; + vm->push_data(errors ? errors : result); + return errors ? vm->sym("error") : nil; } static object* get_best_match(pvm* vm, object* ast, object** env) { diff --git a/pickle_test.cpp b/pickle_test.cpp index bfe3eca..315b037 100644 --- a/pickle_test.cpp +++ b/pickle_test.cpp @@ -36,7 +36,7 @@ int main() { vm.push_data(vm.integer(42)); vm.push_data(st); vm.push_data(vm.integer(42)); - vm.push_data(vm.string("(+ 1 2) ## #### block comment '\n\n ###### \n\n\n foo")); + vm.push_data(vm.string("[(+ 1 2)\n## #### block comment '\n\n ###### \n\n\n foo]")); printf("\nqueue with data: "); vm.dump(vm.queue); putchar('\n'); diff --git a/test/out32.txt b/test/out32.txt index 6297f64..9923982 100644 --- a/test/out32.txt +++ b/test/out32.txt @@ -1,12 +1,12 @@ st data: #1=(1 2 1 2 . #1#) -queue with data: #1=((("(+ 1 2) ## #### block comment '\n\n ###### \n\n\n foo" 42 #2=(1 2 1 2 . #2#) 42) NIL (NIL parse . "normal") (NIL test_test . "output result") (error test_test . "from error handler")) (NIL NIL) . #1#) +queue with data: #1=((("[(+ 1 2)\n## #### block comment '\n\n ###### \n\n\n foo]" 42 #2=(1 2 1 2 . #2#) 42) NIL (NIL parse . "normal") (NIL test_test . "output result") (error test_test . "from error handler")) (NIL NIL) . #1#) -queue = #1=((NIL NIL) (("unknown syntax error" 42 #2=(1 2 1 2 . #2#) 42) error (NIL test_test . "output result") (error test_test . "from error handler")) . #1#) +queue = #1=((NIL NIL) ((("unclosed subexpression") 42 #2=(1 2 1 2 . #2#) 42) error (NIL test_test . "output result") (error test_test . "from error handler")) . #1#) Hello from test_test()! inst_type = error cookie = "from error handler" -top of stack = "unknown syntax error" +top of stack = ("unclosed subexpression") queue = #1=(((42 #2=(1 2 1 2 . #2#) 42) debug (debug test_test . "from inside test_test()")) . #1#) diff --git a/test/out64.txt b/test/out64.txt index 6297f64..9923982 100644 --- a/test/out64.txt +++ b/test/out64.txt @@ -1,12 +1,12 @@ st data: #1=(1 2 1 2 . #1#) -queue with data: #1=((("(+ 1 2) ## #### block comment '\n\n ###### \n\n\n foo" 42 #2=(1 2 1 2 . #2#) 42) NIL (NIL parse . "normal") (NIL test_test . "output result") (error test_test . "from error handler")) (NIL NIL) . #1#) +queue with data: #1=((("[(+ 1 2)\n## #### block comment '\n\n ###### \n\n\n foo]" 42 #2=(1 2 1 2 . #2#) 42) NIL (NIL parse . "normal") (NIL test_test . "output result") (error test_test . "from error handler")) (NIL NIL) . #1#) -queue = #1=((NIL NIL) (("unknown syntax error" 42 #2=(1 2 1 2 . #2#) 42) error (NIL test_test . "output result") (error test_test . "from error handler")) . #1#) +queue = #1=((NIL NIL) ((("unclosed subexpression") 42 #2=(1 2 1 2 . #2#) 42) error (NIL test_test . "output result") (error test_test . "from error handler")) . #1#) Hello from test_test()! inst_type = error cookie = "from error handler" -top of stack = "unknown syntax error" +top of stack = ("unclosed subexpression") queue = #1=(((42 #2=(1 2 1 2 . #2#) 42) debug (debug test_test . "from inside test_test()")) . #1#) diff --git a/test/valgrind32.txt b/test/valgrind32.txt index fff79a4..c70e1cc 100644 --- a/test/valgrind32.txt +++ b/test/valgrind32.txt @@ -1,14 +1,14 @@ -==7447== Memcheck, a memory error detector -==7447== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. -==7447== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info -==7447== Command: ./pickletest32 -==7447== -==7447== -==7447== HEAP SUMMARY: -==7447== in use at exit: 0 bytes in 0 blocks -==7447== total heap usage: 14 allocs, 14 frees, 25,776 bytes allocated -==7447== -==7447== All heap blocks were freed -- no leaks are possible -==7447== -==7447== For lists of detected and suppressed errors, rerun with: -s -==7447== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) +==21676== Memcheck, a memory error detector +==21676== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. +==21676== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info +==21676== Command: ./pickletest32 +==21676== +==21676== +==21676== HEAP SUMMARY: +==21676== in use at exit: 0 bytes in 0 blocks +==21676== total heap usage: 99 allocs, 99 frees, 30,259 bytes allocated +==21676== +==21676== All heap blocks were freed -- no leaks are possible +==21676== +==21676== For lists of detected and suppressed errors, rerun with: -s +==21676== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) diff --git a/test/valgrind64.txt b/test/valgrind64.txt index 68b5f3b..db71841 100644 --- a/test/valgrind64.txt +++ b/test/valgrind64.txt @@ -1,14 +1,14 @@ -==7425== Memcheck, a memory error detector -==7425== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. -==7425== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info -==7425== Command: ./pickletest64 -==7425== -==7425== -==7425== HEAP SUMMARY: -==7425== in use at exit: 0 bytes in 0 blocks -==7425== total heap usage: 14 allocs, 14 frees, 81,076 bytes allocated -==7425== -==7425== All heap blocks were freed -- no leaks are possible -==7425== -==7425== For lists of detected and suppressed errors, rerun with: -s -==7425== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) +==21657== Memcheck, a memory error detector +==21657== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. +==21657== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info +==21657== Command: ./pickletest64 +==21657== +==21657== +==21657== HEAP SUMMARY: +==21657== in use at exit: 0 bytes in 0 blocks +==21657== total heap usage: 99 allocs, 99 frees, 85,559 bytes allocated +==21657== +==21657== All heap blocks were freed -- no leaks are possible +==21657== +==21657== For lists of detected and suppressed errors, rerun with: -s +==21657== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)