From 6624b6db57ff1019389f8b4b5c26ba37ed094baa Mon Sep 17 00:00:00 2001 From: sinclair Date: Mon, 25 Nov 2024 04:35:38 +0900 Subject: [PATCH] Documentation --- readme.md | 118 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/readme.md b/readme.md index c18b2fb..c9285b3 100644 --- a/readme.md +++ b/readme.md @@ -56,9 +56,9 @@ type R = Static.Parse // type R = [['X', 'Y', 'Z'] ## Overview -ParseBox is a parsing library designed to embed domain-specific languages (DSLs) within the TypeScript type system. It provides a set of runtime and type-level combinators that enable EBNF notation to be encoded as types within TypeScript's type system. These combinators can then be used to parse content at runtime or interactively in editor via static type inference. +ParseBox is a parsing library designed to embed domain-specific languages (DSLs) within the TypeScript type system. It provides a set of runtime and type-level combinators that enable EBNF notation to be encoded as TypeScript types. These combinators can then be used to parse content at runtime or interactively in the editor via static type inference. -This project was originally developed as a parsing system for the [TypeBox](https://github.com/sinclairzx81/typebox) project, where it facilitates parsing TypeScript syntax into runtime types. It offers a robust, standalone system for type-level parsing within TypeScript's type system. +This project was developed as a generalized parsing solution for the [TypeBox](https://github.com/sinclairzx81/typebox) project, where it is currently used to parse TypeScript syntax into runtime types. It aims to provide a robust foundation for parsing a variety of domain-specific languages, with the encoded information in the language able to be reconciled with TypeScript's type system. License: MIT @@ -84,11 +84,11 @@ License: MIT ## Combinators -ParseBox provides a minimal set of combinators that map to structures expressible in BNF (Backus-Naur Form). These combinators serve as building blocks for constructing parsers. +ParseBox provides two sets of symmetric combinators for runtime and static environments, modeled on EBNF constructs. Instead of functions, these combinators are composable schema fragments that describe parse operations. Their schematics can be reflected to reconstruct BNF or remapped to other tools. The following section describes the Runtime combinators and their relation to EBNF. ### Const -The Const combinator parses for the next occurrence of the specified string. Whitespace and newline characters are ignored during parsing, unless the specified string explicitly matches those characters. +The Const combinator parses the next occurrence of a specified string, ignoring whitespace and newline characters unless explicitly specified as parameters. **BNF** @@ -109,7 +109,7 @@ const R = Runtime.Parse(T, 'X Y Z') // const R = ['X', ' Y Z'] ### Tuple -The Tuple parser matches a sequence of interior parsers. An empty tuple can be used to represent Epsilon (the empty production). +The Tuple parser matches a sequence of parsers, with an empty tuple representing Epsilon (the empty production). **BNF** @@ -135,7 +135,7 @@ const R = Runtime.Parse(T, 'X Y Z W') // const R = [['X', 'Y', 'Z' ### Union -The Union combinator parses using one of the interior parsers, attempting each in sequence until one matches. +The Union combinator tries each interior parser in sequence until one matches **BNF** @@ -164,7 +164,7 @@ const R3 = Runtime.Parse(T, 'Z E') // const R3 = ['Z', ' E'] ### Array -The Array combinator will parse for zero or more the interior parser. This combinator will always return a result with an empty array given for no matches. +The Array combinator parses zero or more occurrences of the interior parser, returning an empty array if there are no matches. **EBNF** @@ -189,7 +189,7 @@ const R3 = Runtime.Parse(T, 'Y Z') // const R3 = [[], 'Y Z'] ### Optional -The Optional combinator will parse for zero or one of the interior parser. This combinator always succeeds, returning either a tuple with one element, or zero elements for no match. +The Optional combinator parses zero or one occurrence of the interior parser, returning a tuple with one element or an empty tuple if there is no match. **EBNF** @@ -212,7 +212,7 @@ const R2 = Runtime.Parse(T, 'Y Z') // const R2 = [[], 'Y Z'] ## Terminals -ParseBox provides combinators that can be used to parse common terminals. +ParseBox provides combinators for parsing common lexical tokens, such as numbers, identifiers, and strings, enabling static, optimized parsing of typical JavaScript constructs. ### Number @@ -234,7 +234,7 @@ const E = Runtime.Parse(T, '01') // const E = [] ### String -The String combinator will parse for quoted string literals. Thgit is combinator accepts an array of permissable quote characters. The result of this parser is the interior wrapped string. +The String combinator parses quoted string literals, accepting an array of permissible quote characters. The result is the interior string. ```typescript const T = Runtime.String(['"']) @@ -246,10 +246,10 @@ const R = Runtime.Parse(T, '"hello"') // const R = ['hello', ''] ### Ident -The Ident combinator will parse for a valid JavaScript identifiers. The following parses a let statement. +The Ident combinator parses valid JavaScript identifiers. The following parses a `let` statement. ```bnf - ::= "let" "=" + ::= "let" "=" ``` ```typescript @@ -270,11 +270,11 @@ const R = Runtime.Parse(Let, 'let n = 10') // const R = [[ 'let', 'n', ## Mapping -ParseBox supports semantic actions (i.e., mapping) for both Static and Runtime parsing. These actions allow parsed content to be transformed into complex structures, such as abstract syntax trees (ASTs). Below is an explanation of how mapping works in both Runtime and Static environments. +ParseBox supports semantic actions (i.e., mappings) for both static and runtime parsing, enabling parsed content to be transformed into complex structures like abstract syntax trees (ASTs). Below is an explanation of how mapping works in both environments. ### Runtime -In Runtime parsing, combinators can accept an optional callback function as their last argument. This callback receives the parsed elements, which can then be mapped to arbitrary return values. The following example demonstrates how a let statement is parsed and how a mapping function is used to transform the result into a syntax node. +Runtime combinators can accept an optional callback as their last argument, which receives the parsed elements and maps them to arbitrary return values. The following example shows how a let statement is parsed and mapped into a syntax node. ```typescript const LetMapping = (_0: 'let', _1: string, _2: '=', _3: string) => { @@ -300,7 +300,7 @@ const R = Runtime.Parse(Let, 'let value = 10') // const R = [{ ### Static -In Static combinators, an optional type of IMapping is provided as the last generic argument. Unlike Runtime callbacks, which receive parsed values directly as parameters, Static actions use the `this['input']` property to access input values, and they store the mapped results in the `output` property. The following example demonstrates how to implement the Let parser using Static actions. +Static combinators accept an optional higher-kinded type, IMapping, as the last generic argument. Static mapping uses the `this['input']` property to read input values, assigning the mapping to the `output` property. The following example demonstrates implementing the Let parser using static actions. ```typescript type ParseFloat = ( @@ -330,20 +330,22 @@ type R = Static.Parse // type R = [{ ## Context -ParseBox provides a context mechanism that allows parsed content to interact with the host environment. A context value can be passed as the last argument to the Parse function, and ParseBox will propagate the context to each mapping function, enabling more dynamic parsing behavior. +ParseBox allows exterior values to be passed into and referenced within semantic actions. A context is passed as the last argument to the Static and Runtime parse types/functions, and is propagated into each action. The following demonstrates its usage. ### Runtime -In Runtime parsing, the context is passed as the second argument to the mapping functions. This allows the parser to access external data or state during the parsing process. +The Runtime Parse function accepts a context as the last argument, which is received as the second argument to the OptionMapping function. ```typescript import { Runtime } from '@sinclair/parsebox' -// use matched input as indexer on context -const OptionMapping = (input: 'A' | 'B' | 'C', context: Record) => { - return input in context - ? context[input] - : void 0 +// Context Received as Second Argument +const OptionMapping = (input: 'A' | 'B' | 'C', context: Record) => { + return ( + input in context + ? context[input] + : undefined + ) } const Option = Runtime.Union([ Runtime.Const('A'), @@ -360,16 +362,18 @@ const R = Runtime.Parse(Option, 'A', { // const R = ['Matched Foo', ### Static -In Static combinators, the context is accessible via the `this['context']` property within the mapping action type. +The Static Parse type accepts a context as the last generic argument, which is received via the `this['context']` property on the OptionMapping type. ```typescript import { Static } from '@sinclair/parsebox' -// use input as indexer on context +// Context Received on Context Property interface OptionMapping extends Static.IMapping { - output: this['input'] extends keyof this['context'] - ? this['context'][this['input']] - : never + output: ( + this['input'] extends keyof this['context'] + ? this['context'][this['input']] + : undefined + ) } type Option = Static.Union<[ Static.Const<'A'>, @@ -386,48 +390,51 @@ type R = Static.Parse values.flat()) +// Item ::= "X" "Y" "Z" + +const Item = Runtime.Union([ + Runtime.Const('X'), + Runtime.Const('Y'), + Runtime.Const('Z'), +]) + +// List ::= Item List | ε + +const List = Runtime.Union([ + Runtime.Tuple([Runtime.Ref('Item'), Runtime.Ref('List')]), // Recursive Self + Runtime.Tuple([]) // Epsilon +], values => values.flat()) + +// Embed inside Module + +const Module = new Runtime.Module({ + Item, + List }) +// Use Module.Parse + const R = Module.Parse('List', 'X Y Z Y X E') // const R = [['X', 'Y', 'Z', 'Y', 'X'], ' E'] ``` ## Advanced -ParseBox is an LL(1) parser. When building parsers for complex grammars, care must be taken to avoid infinite left recursion, which can occur if a recursive grammar refers back to itself in a way that causes the parser to enter an infinite loop. This is particularly common in expression parsers. - -### Expression Parsing - -The following shows a reference expression parser using LL(1) techniques to avoid left recursion. The following parser respects operator precedence and grouping. +The following example demonstrates using ParseBox to parse a mathematical expression with LL(1) parsing techniques, avoiding left recursion and respecting operator precedence rules. ```typescript import { Static } from '@sinclair/parsebox' -// BinaryMapping: Reduces matched Term and Expr into binary expression node +// Static Mapping Actions to remap Productions + type BinaryReduce = ( Right extends [infer Operator, infer Right, infer Rest extends unknown[]] ? { left: Left, operator: Operator, right: BinaryReduce } @@ -438,7 +445,6 @@ interface BinaryMapping extends Static.IMapping { ? BinaryReduce : never } -// FactorMapping: Map either grouped Expr or Operand interface FactorMapping extends Static.IMapping { output: ( this['input'] extends ['(', infer Expr, ')'] ? Expr : @@ -447,7 +453,7 @@ interface FactorMapping extends Static.IMapping { ) } -// ... +// Expression Grammar type Operand = Static.Ident @@ -472,9 +478,9 @@ type Term = Static.Tuple<[Factor, TermTail], BinaryMapping> type Expr = Static.Tuple<[Term, ExprTail], BinaryMapping> -// ... +// Parse! -type R = Static.Parse // type R = [{ +type Result = Static.Parse // type R = [{ // left: "x"; // operator: "*"; // right: {