Skip to content

Commit

Permalink
Allow numbers and uppercase characters in identifier names
Browse files Browse the repository at this point in the history
  • Loading branch information
FourteenBrush committed Mar 3, 2024
1 parent 7c29db4 commit b76f9a7
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public static void main(String[] args) {
running = false;
return 0;
});
env.insertVariable("x0", 1);

Scanner in = new Scanner(System.in);

Expand All @@ -28,7 +29,7 @@ public static void main(String[] args) {
} catch (NoSuchElementException e) { // ctrl-c
break;
} catch (Throwable t) {
System.out.println("\u001B[31m[✘] Error: " + t.getMessage() + "\u001B[0m");
System.out.printf("\u001B[31m[✘] Error: %s\u001B[0m\n", t.getMessage());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import me.fourteendoggo.mathexpressionparser.symbol.Symbol;
import me.fourteendoggo.mathexpressionparser.symbol.SymbolType;
import me.fourteendoggo.mathexpressionparser.utils.Assert;
import me.fourteendoggo.mathexpressionparser.utils.Utility;

import java.util.function.ToDoubleFunction;

Expand All @@ -22,11 +21,11 @@ public FunctionCallSite(String name, int numArgs, ToDoubleFunction<FunctionConte
}

public FunctionCallSite(String name, int minArgs, int maxArgs, ToDoubleFunction<FunctionContext> function) {
Assert.isTrue(Utility.isValidIdentifierName(name), "invalid function name %s", name);

if (minArgs < 0 || maxArgs < 0 || minArgs > maxArgs) {
throw new IllegalArgumentException("minArgs must be >= 0, maxArgs must be >= 0 and minArgs must be <= maxArgs");
}
Assert.isValidIdentifierName(name);
Assert.isTrue(
minArgs >= 0 && maxArgs >= 0 && maxArgs >= minArgs,
"minArgs must be >= 0, maxArgs must be >= 0 and minArgs must be <= maxArgs"
);
this.name = name;
this.minArgs = minArgs;
this.maxArgs = maxArgs;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* only symbols found in this environment will be seen.
*/
public class ExecutionEnv {
//private static final Pattern IDENTIFIER_PATTERN = Pattern.compile("[a-zA-Z][a-zA-Z0-9]*");
private final SymbolLookup symbolLookup;

/**
Expand All @@ -38,9 +39,6 @@ public static ExecutionEnv createDefault() {
return BuiltinSymbols.createExecutionEnv();
}

/**
* Inserts a variable
*/
public void insertVariable(String name, double value) {
insertSymbol(new Variable(name, value));
}
Expand Down Expand Up @@ -113,7 +111,7 @@ public Symbol lookupSymbol(char[] buf, int pos) {
if (symbol == null) {
String bufAsStr = new String(buf, pos, buf.length - pos);
// TODO: also change when valid chars for symbol name change
String symbolName = bufAsStr.split("[^a-zA-Z]")[0];
String symbolName = bufAsStr.split("[^a-zA-Z0-9]")[0];
throw new SymbolNotFoundException(symbolName);
}
return symbol;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,64 @@
import me.fourteendoggo.mathexpressionparser.utils.Assert;
import me.fourteendoggo.mathexpressionparser.utils.Utility;

import java.util.Arrays;
import java.util.function.Supplier;

// TODO: allow more different chars as symbol name

/**
* An efficient lookup tree for {@link Symbol}s.
*/
public class SymbolLookup {
private static final int CHILDREN_WIDTH = 'z' - 'a' + 1;
private static final byte INVALID_IDX = Byte.MAX_VALUE;
/** Last included ascii char of the range that {@link SymbolLookup#indexLookup} encompasses */
private static final char MAX_RANGE_CHAR = 'z'; // (ascii value 122, 0x7A)
/**
* Stores indices into every {@link Node}'s children table, for the continuous ascii range that starts
* at '\0' (NUL) and consists of the only valid ranges '0'..'9', 'A'..'Z' and 'a'..'z' (in that order).
* <p>The fact that the range starts at '\0' means we store a leading range that is useless to us, as we only
* really need to start at '0', this to avoid an extra start bounds check for inclusion of a certain character.
* <p>Note: we also store useless ranges in between useful ranges to us, f.e. in between 'A'..'Z' and 'a'..'z',
* all these useless positions are filled with {@link SymbolLookup#INVALID_IDX}, which is clearly invalid because it is
* bigger than {@link Node#CHILDREN_WIDTH}.
* <p>
* Every index into this table is a character cast to an int, as per java requirements.
* <p>Example:
* <pre>{@code
* char symbolChar = 'a';
* // note that we don't need a start range check, because indexLookup starts at 0
* // this to benefit from the fact that java chars are unsigned.
* assert c <= MAX_RANGE_CHAR : "char is not contained within lookup table";
* int idx = indexLookup[c];
* assert idx != INVALID_IDX : "illegal char in symbol name";
* Node child = root.children[idx];
* assert child.value == symbolChar;
* }</pre>
*/
private static final byte[] indexLookup = new byte[MAX_RANGE_CHAR + 1];
private final Node root = new Node(' ');

static {
//noinspection ConstantConditions
if (INVALID_IDX <= Node.CHILDREN_WIDTH) {
throw new AssertionError("static assertion failed: INVALID_IDX must not be less than " + Node.CHILDREN_WIDTH);
}

Arrays.fill(indexLookup, INVALID_IDX);

/* relative start offsets for a certain range */
final byte UPPERCASE_RANGE_START_OFFSET = 10; // ascii idx 65, child idx 10
final byte LOWERCASE_RANGE_START_OFFSET = 26 + 10; // ascii idx 97, child idx 36

for (char c = '0'; c <= '9'; c++) {
indexLookup[c] = (byte) (c - '0');
}
for (char c = 'A'; c <= 'Z'; c++) {
indexLookup[c] = (byte) (c - 'A' + UPPERCASE_RANGE_START_OFFSET);
}
for (char c = 'a'; c <= 'z'; c++) {
indexLookup[c] = (byte) (c - 'a' + LOWERCASE_RANGE_START_OFFSET);
}
}

public void insert(Symbol symbol) {
Node node = root;
char[] chars = symbol.getName().toCharArray();
Expand All @@ -35,10 +82,14 @@ public void insert(Symbol symbol) {
* @return a {@link Symbol} or null if not found.
*/
public Symbol lookup(char[] buf, int pos) {
// NOTE: buf[pos] points to the first char of the symbol, which is already considered valid
// otherwise we wouldn't be here, no need for Utility.isValidIdentifierFirstChar()
Node node = root;
char current;
while (pos < buf.length && Utility.isLowercaseLetter(current = buf[pos++])) {
node = node.children[current - 'a'];

while (pos < buf.length && Utility.isValidIdentifierChar(current = buf[pos++])) {
int childIdx = indexLookup[current];
node = node.children[childIdx];
if (node == null) return null;
}
if (node instanceof ValueHoldingNode valueNode) {
Expand All @@ -53,7 +104,15 @@ public String toString() {
}

private static class Node {
/**
* Following the specification, the first char of an identifier must be alphabetical,
* all following characters must be alphanumerical.
* We are wasting some child nodes in the root node, but those cannot (not even accidentally) be used,
* as guaranteed by the respective insert or lookup method in {@link SymbolLookup}.
*/
private static final int CHILDREN_WIDTH = 10 + 26 + 26;
private final char value; // for debugging only
/* '0'..'9' 'A'..'Z' 'a'..'z' */
private final Node[] children = new Node[CHILDREN_WIDTH];

public Node(char value) {
Expand All @@ -76,8 +135,10 @@ public Node insert(char value, Node node) {
}

private static int indexOrThrow(char value) {
Assert.isTrue(Utility.isLowercaseLetter(value), "character %s is not allowed to be used", value);
return value - 'a';
Assert.isTrue(value < MAX_RANGE_CHAR, "character %s is not allowed in a symbol name", value);
byte idx = indexLookup[value];
Assert.isFalse(idx == INVALID_IDX, "character %s is not allowed in a symbol name", value);
return idx;
}

// FIXME: more tree like string representation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package me.fourteendoggo.mathexpressionparser.symbol;

import me.fourteendoggo.mathexpressionparser.utils.Assert;
import me.fourteendoggo.mathexpressionparser.utils.Utility;

public record Variable(String getName, double value) implements Symbol {

public Variable {
Assert.isTrue(Utility.isValidIdentifierName(getName), "invalid variable name %s", getName);
Assert.isValidIdentifierName(getName);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,22 @@ public void pushToken(Token token) {
}

// if we could not reuse the last operand of tail, allocate a new tail
if (!tail.isComplete() || tail.simplify()) {
tail.pushToken(token);
} else {
if (tail.isComplete() && !tail.simplify()) {
tail.next = new LinkedCalculation(tail, tail.right, (Operator) token);
tail = tail.next;
numCalculations++;
} else {
tail.pushToken(token);
}
}

private void checkType(TokenType type) {
if (lastType != type) return;

String message = switch (type) {
throw new SyntaxException(switch (type) {
case OPERAND -> "expected operator, got operand";
case OPERATOR -> "expected operand, got operator";
};
throw new SyntaxException(message);
});
}

/**
Expand Down Expand Up @@ -73,14 +72,13 @@ public double solve() {
LinkedCalculation first = head;
LinkedCalculation second = first.next;

// `x operator y operator` was previously throwing a npe
Assert.notNull(second.right, "unexpected trailing operator");

if (first.mayExecuteFirst()) {
// f.e. 2*3+2
double leftOperand = first.solve();
double firstOperand = first.solve();
double secondOperand = second.right.getValue();
yield second.operator.apply(leftOperand, secondOperand);
yield second.operator.apply(firstOperand, secondOperand);
}
// f.e. 2+3*2
double firstOperand = first.left.getValue();
Expand Down Expand Up @@ -191,7 +189,7 @@ public boolean mayExecuteFirst() {

/**
* Pushes a token to this calculation object.
* The caller should check {@link #isComplete()} before to ensure they don't overwrite the same fields again,
* The caller should check {@link #isComplete()} before, to ensure they don't overwrite the same fields again,
* as this just sets fields and doesn't check anything.
* @param token the token to push, either an operand or an operator
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,10 @@ static void indexWithinBounds(int index, int size, String message, Object... pla
throw new IndexOutOfBoundsException(message.formatted(placeholders));
}
}

static void isValidIdentifierName(String name) {
if (!Utility.isValidIdentifierName(name)) {
throw new SyntaxException("Invalid identifier name: " + name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,32 @@ public static int requireInt(double x) {
return intVal;
}

public static boolean isValidIdentifierFirstChar(char c) {
c |= ' '; // set lowercase bit
return c >= 'a' && c <= 'z';
}

// same as isValidIdentifierCharLowerCase but also handles uppercase chars
public static boolean isValidIdentifierChar(char c) {
if (c >= '0' && c <= '9') return true;
c |= ' ';
return (c >= 'a' && c <= 'z');
}

private static boolean isValidIdentifierCharLowerCase(char c) {
return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9');
}

public static boolean isValidIdentifierName(String name) {
if (name.isEmpty() || !isValidIdentifierFirstChar(name.charAt(0))) return false;
for (int i = 1; i < name.length(); i++) {
// numbers are immune to OR ' '
char c = (char) (name.charAt(i) | ' ');
if (!isValidIdentifierCharLowerCase(c)) return false;
}
return true;
}

public static double boolToDouble(boolean x) {
return x ? 1 : 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ void testPositiveTestCases(String expression, String expectedResult) {
@ParameterizedTest
@CsvFileSource(resources = "/negative-input.csv")
void testNegativeTestCases(String expression) {
assertThatCode(() -> ExpressionParser.parse(expression)).isInstanceOf(SyntaxException.class);
assertThatCode(() -> ExpressionParser.parse(expression))
.withFailMessage(expression)
.isInstanceOf(SyntaxException.class);
}

@Test
Expand Down

0 comments on commit b76f9a7

Please sign in to comment.