diff --git a/cli/src/main/java/me/fourteendoggo/mathexpressionparser/Main.java b/cli/src/main/java/me/fourteendoggo/mathexpressionparser/Main.java index da66102..ce4db52 100644 --- a/cli/src/main/java/me/fourteendoggo/mathexpressionparser/Main.java +++ b/cli/src/main/java/me/fourteendoggo/mathexpressionparser/Main.java @@ -14,6 +14,7 @@ public static void main(String[] args) { running = false; return 0; }); + env.insertVariable("x0", 1); Scanner in = new Scanner(System.in); @@ -28,7 +29,7 @@ public static void main(String[] args) { } catch (NoSuchElementException e) { // ctrl-c break; } catch (Throwable t) { - System.out.println("\u001B[31m[✘] Error: " + t.getMessage() + "\u001B[0m"); + System.out.printf("\u001B[31m[✘] Error: %s\u001B[0m\n", t.getMessage()); } } } diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/function/FunctionCallSite.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/function/FunctionCallSite.java index 4cccf88..6fc327c 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/function/FunctionCallSite.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/function/FunctionCallSite.java @@ -3,7 +3,6 @@ import me.fourteendoggo.mathexpressionparser.symbol.Symbol; import me.fourteendoggo.mathexpressionparser.symbol.SymbolType; import me.fourteendoggo.mathexpressionparser.utils.Assert; -import me.fourteendoggo.mathexpressionparser.utils.Utility; import java.util.function.ToDoubleFunction; @@ -22,11 +21,11 @@ public FunctionCallSite(String name, int numArgs, ToDoubleFunction function) { - Assert.isTrue(Utility.isValidIdentifierName(name), "invalid function name %s", name); - - if (minArgs < 0 || maxArgs < 0 || minArgs > maxArgs) { - throw new IllegalArgumentException("minArgs must be >= 0, maxArgs must be >= 0 and minArgs must be <= maxArgs"); - } + Assert.isValidIdentifierName(name); + Assert.isTrue( + minArgs >= 0 && maxArgs >= 0 && maxArgs >= minArgs, + "minArgs must be >= 0, maxArgs must be >= 0 and minArgs must be <= maxArgs" + ); this.name = name; this.minArgs = minArgs; this.maxArgs = maxArgs; diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/ExecutionEnv.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/ExecutionEnv.java index f02ee9e..27394b1 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/ExecutionEnv.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/ExecutionEnv.java @@ -19,6 +19,7 @@ * only symbols found in this environment will be seen. */ public class ExecutionEnv { + //private static final Pattern IDENTIFIER_PATTERN = Pattern.compile("[a-zA-Z][a-zA-Z0-9]*"); private final SymbolLookup symbolLookup; /** @@ -38,9 +39,6 @@ public static ExecutionEnv createDefault() { return BuiltinSymbols.createExecutionEnv(); } - /** - * Inserts a variable - */ public void insertVariable(String name, double value) { insertSymbol(new Variable(name, value)); } @@ -113,7 +111,7 @@ public Symbol lookupSymbol(char[] buf, int pos) { if (symbol == null) { String bufAsStr = new String(buf, pos, buf.length - pos); // TODO: also change when valid chars for symbol name change - String symbolName = bufAsStr.split("[^a-zA-Z]")[0]; + String symbolName = bufAsStr.split("[^a-zA-Z0-9]")[0]; throw new SymbolNotFoundException(symbolName); } return symbol; diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/SymbolLookup.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/SymbolLookup.java index 1b23770..8f56063 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/SymbolLookup.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/SymbolLookup.java @@ -3,17 +3,64 @@ import me.fourteendoggo.mathexpressionparser.utils.Assert; import me.fourteendoggo.mathexpressionparser.utils.Utility; +import java.util.Arrays; import java.util.function.Supplier; -// TODO: allow more different chars as symbol name - /** * An efficient lookup tree for {@link Symbol}s. */ public class SymbolLookup { - private static final int CHILDREN_WIDTH = 'z' - 'a' + 1; + private static final byte INVALID_IDX = Byte.MAX_VALUE; + /** Last included ascii char of the range that {@link SymbolLookup#indexLookup} encompasses */ + private static final char MAX_RANGE_CHAR = 'z'; // (ascii value 122, 0x7A) + /** + * Stores indices into every {@link Node}'s children table, for the continuous ascii range that starts + * at '\0' (NUL) and consists of the only valid ranges '0'..'9', 'A'..'Z' and 'a'..'z' (in that order). + *

The fact that the range starts at '\0' means we store a leading range that is useless to us, as we only + * really need to start at '0', this to avoid an extra start bounds check for inclusion of a certain character. + *

Note: we also store useless ranges in between useful ranges to us, f.e. in between 'A'..'Z' and 'a'..'z', + * all these useless positions are filled with {@link SymbolLookup#INVALID_IDX}, which is clearly invalid because it is + * bigger than {@link Node#CHILDREN_WIDTH}. + *

+ * Every index into this table is a character cast to an int, as per java requirements. + *

Example: + *

{@code
+     *     char symbolChar = 'a';
+     *     // note that we don't need a start range check, because indexLookup starts at 0
+     *     // this to benefit from the fact that java chars are unsigned.
+     *     assert c <= MAX_RANGE_CHAR : "char is not contained within lookup table";
+     *     int idx = indexLookup[c];
+     *     assert idx != INVALID_IDX : "illegal char in symbol name";
+     *     Node child = root.children[idx];
+     *     assert child.value == symbolChar;
+     * }
+ */ + private static final byte[] indexLookup = new byte[MAX_RANGE_CHAR + 1]; private final Node root = new Node(' '); + static { + //noinspection ConstantConditions + if (INVALID_IDX <= Node.CHILDREN_WIDTH) { + throw new AssertionError("static assertion failed: INVALID_IDX must not be less than " + Node.CHILDREN_WIDTH); + } + + Arrays.fill(indexLookup, INVALID_IDX); + + /* relative start offsets for a certain range */ + final byte UPPERCASE_RANGE_START_OFFSET = 10; // ascii idx 65, child idx 10 + final byte LOWERCASE_RANGE_START_OFFSET = 26 + 10; // ascii idx 97, child idx 36 + + for (char c = '0'; c <= '9'; c++) { + indexLookup[c] = (byte) (c - '0'); + } + for (char c = 'A'; c <= 'Z'; c++) { + indexLookup[c] = (byte) (c - 'A' + UPPERCASE_RANGE_START_OFFSET); + } + for (char c = 'a'; c <= 'z'; c++) { + indexLookup[c] = (byte) (c - 'a' + LOWERCASE_RANGE_START_OFFSET); + } + } + public void insert(Symbol symbol) { Node node = root; char[] chars = symbol.getName().toCharArray(); @@ -35,10 +82,14 @@ public void insert(Symbol symbol) { * @return a {@link Symbol} or null if not found. */ public Symbol lookup(char[] buf, int pos) { + // NOTE: buf[pos] points to the first char of the symbol, which is already considered valid + // otherwise we wouldn't be here, no need for Utility.isValidIdentifierFirstChar() Node node = root; char current; - while (pos < buf.length && Utility.isLowercaseLetter(current = buf[pos++])) { - node = node.children[current - 'a']; + + while (pos < buf.length && Utility.isValidIdentifierChar(current = buf[pos++])) { + int childIdx = indexLookup[current]; + node = node.children[childIdx]; if (node == null) return null; } if (node instanceof ValueHoldingNode valueNode) { @@ -53,7 +104,15 @@ public String toString() { } private static class Node { + /** + * Following the specification, the first char of an identifier must be alphabetical, + * all following characters must be alphanumerical. + * We are wasting some child nodes in the root node, but those cannot (not even accidentally) be used, + * as guaranteed by the respective insert or lookup method in {@link SymbolLookup}. + */ + private static final int CHILDREN_WIDTH = 10 + 26 + 26; private final char value; // for debugging only + /* '0'..'9' 'A'..'Z' 'a'..'z' */ private final Node[] children = new Node[CHILDREN_WIDTH]; public Node(char value) { @@ -76,8 +135,10 @@ public Node insert(char value, Node node) { } private static int indexOrThrow(char value) { - Assert.isTrue(Utility.isLowercaseLetter(value), "character %s is not allowed to be used", value); - return value - 'a'; + Assert.isTrue(value < MAX_RANGE_CHAR, "character %s is not allowed in a symbol name", value); + byte idx = indexLookup[value]; + Assert.isFalse(idx == INVALID_IDX, "character %s is not allowed in a symbol name", value); + return idx; } // FIXME: more tree like string representation diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/Variable.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/Variable.java index 65417b3..adeaf78 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/Variable.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/symbol/Variable.java @@ -1,12 +1,11 @@ package me.fourteendoggo.mathexpressionparser.symbol; import me.fourteendoggo.mathexpressionparser.utils.Assert; -import me.fourteendoggo.mathexpressionparser.utils.Utility; public record Variable(String getName, double value) implements Symbol { public Variable { - Assert.isTrue(Utility.isValidIdentifierName(getName), "invalid variable name %s", getName); + Assert.isValidIdentifierName(getName); } @Override diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/token/Expression.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/token/Expression.java index 783b0dd..f163d4f 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/token/Expression.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/token/Expression.java @@ -23,23 +23,22 @@ public void pushToken(Token token) { } // if we could not reuse the last operand of tail, allocate a new tail - if (!tail.isComplete() || tail.simplify()) { - tail.pushToken(token); - } else { + if (tail.isComplete() && !tail.simplify()) { tail.next = new LinkedCalculation(tail, tail.right, (Operator) token); tail = tail.next; numCalculations++; + } else { + tail.pushToken(token); } } private void checkType(TokenType type) { if (lastType != type) return; - String message = switch (type) { + throw new SyntaxException(switch (type) { case OPERAND -> "expected operator, got operand"; case OPERATOR -> "expected operand, got operator"; - }; - throw new SyntaxException(message); + }); } /** @@ -73,14 +72,13 @@ public double solve() { LinkedCalculation first = head; LinkedCalculation second = first.next; - // `x operator y operator` was previously throwing a npe Assert.notNull(second.right, "unexpected trailing operator"); if (first.mayExecuteFirst()) { // f.e. 2*3+2 - double leftOperand = first.solve(); + double firstOperand = first.solve(); double secondOperand = second.right.getValue(); - yield second.operator.apply(leftOperand, secondOperand); + yield second.operator.apply(firstOperand, secondOperand); } // f.e. 2+3*2 double firstOperand = first.left.getValue(); @@ -191,7 +189,7 @@ public boolean mayExecuteFirst() { /** * Pushes a token to this calculation object. - * The caller should check {@link #isComplete()} before to ensure they don't overwrite the same fields again, + * The caller should check {@link #isComplete()} before, to ensure they don't overwrite the same fields again, * as this just sets fields and doesn't check anything. * @param token the token to push, either an operand or an operator */ diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Assert.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Assert.java index c8c3281..7b47818 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Assert.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Assert.java @@ -28,4 +28,10 @@ static void indexWithinBounds(int index, int size, String message, Object... pla throw new IndexOutOfBoundsException(message.formatted(placeholders)); } } + + static void isValidIdentifierName(String name) { + if (!Utility.isValidIdentifierName(name)) { + throw new SyntaxException("Invalid identifier name: " + name); + } + } } diff --git a/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Utility.java b/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Utility.java index 2aceba4..8ff841a 100644 --- a/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Utility.java +++ b/core/src/main/java/me/fourteendoggo/mathexpressionparser/utils/Utility.java @@ -29,6 +29,32 @@ public static int requireInt(double x) { return intVal; } + public static boolean isValidIdentifierFirstChar(char c) { + c |= ' '; // set lowercase bit + return c >= 'a' && c <= 'z'; + } + + // same as isValidIdentifierCharLowerCase but also handles uppercase chars + public static boolean isValidIdentifierChar(char c) { + if (c >= '0' && c <= '9') return true; + c |= ' '; + return (c >= 'a' && c <= 'z'); + } + + private static boolean isValidIdentifierCharLowerCase(char c) { + return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); + } + + public static boolean isValidIdentifierName(String name) { + if (name.isEmpty() || !isValidIdentifierFirstChar(name.charAt(0))) return false; + for (int i = 1; i < name.length(); i++) { + // numbers are immune to OR ' ' + char c = (char) (name.charAt(i) | ' '); + if (!isValidIdentifierCharLowerCase(c)) return false; + } + return true; + } + public static double boolToDouble(boolean x) { return x ? 1 : 0; } diff --git a/core/src/test/java/me/fourteendoggo/mathexpressionparser/ExpressionParserTest.java b/core/src/test/java/me/fourteendoggo/mathexpressionparser/ExpressionParserTest.java index 5ba4547..36f2b5d 100644 --- a/core/src/test/java/me/fourteendoggo/mathexpressionparser/ExpressionParserTest.java +++ b/core/src/test/java/me/fourteendoggo/mathexpressionparser/ExpressionParserTest.java @@ -22,7 +22,9 @@ void testPositiveTestCases(String expression, String expectedResult) { @ParameterizedTest @CsvFileSource(resources = "/negative-input.csv") void testNegativeTestCases(String expression) { - assertThatCode(() -> ExpressionParser.parse(expression)).isInstanceOf(SyntaxException.class); + assertThatCode(() -> ExpressionParser.parse(expression)) + .withFailMessage(expression) + .isInstanceOf(SyntaxException.class); } @Test