Skip to content

Commit

Permalink
Fix #1
Browse files Browse the repository at this point in the history
  • Loading branch information
FourteenBrush committed Mar 16, 2024
1 parent 8dd12c8 commit 4fa92e0
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import me.fourteendoggo.mathexpressionparser.function.FunctionCallSite;
import me.fourteendoggo.mathexpressionparser.function.FunctionContext;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.VisibleForTesting;

import java.util.function.DoubleBinaryOperator;
import java.util.function.DoubleSupplier;
Expand All @@ -21,7 +22,8 @@
*/
public class ExecutionEnv {
private static final Pattern INVERSE_IDENTIFIER_PATTERN = Pattern.compile("[^a-zA-Z_0-9]");
private final SymbolLookup symbolLookup;
@VisibleForTesting
final SymbolLookup symbolLookup;

/**
* @see ExecutionEnv#empty()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import me.fourteendoggo.mathexpressionparser.utils.Assert;
import me.fourteendoggo.mathexpressionparser.utils.Utility;
import me.fourteendoggo.mathexpressionparser.exceptions.SyntaxException;
import org.jetbrains.annotations.Debug;
import org.jetbrains.annotations.VisibleForTesting;

import java.util.Arrays;

Expand All @@ -11,8 +13,10 @@
*/
public class SymbolLookup {
private static final byte INVALID_IDX = Byte.MAX_VALUE;
/** Last included ascii char of the range that {@link SymbolLookup#indexLookup} encompasses */
private static final char MAX_RANGE_CHAR = 'z'; // decimal 122
/**
* Last included ascii char of the range that {@link SymbolLookup#indexLookup} encompasses
*/
private static final char MAX_RANGE_CHAR = 'z';
/**
* Stores a mapping between characters and indices into a {@link Node}'s children table. Every index into this table
* is a character cast to an int. Every value is an absolute index into {@link Node#children}.
Expand All @@ -32,14 +36,13 @@ public class SymbolLookup {
* assert child.value == symbolChar;
* }</pre>
*/
private static final byte[] indexLookup = new byte[MAX_RANGE_CHAR + 1];
private final Node root = new Node(' ');
private static final byte[] indexLookup = new byte[MAX_RANGE_CHAR + 1]; // 123
@VisibleForTesting
final Node root = new Node(/* must be a valid identifier char */'_');

static {
//noinspection ConstantConditions
if (INVALID_IDX <= Node.CHILDREN_WIDTH) {
throw new AssertionError("static assertion failed: INVALID_IDX must not be less than " + Node.CHILDREN_WIDTH);
}
assert INVALID_IDX > Node.CHILDREN_WIDTH : "static assertion failed: INVALID_IDX must not be less than " + Node.CHILDREN_WIDTH;

Arrays.fill(indexLookup, INVALID_IDX);

Expand All @@ -63,24 +66,25 @@ public class SymbolLookup {

/**
* Inserts a symbol
* @throws SyntaxException when this symbol already exists.
*
* @throws SyntaxException when this symbol is already inserted.
*/
public void insert(Symbol symbol) {
char[] chars = symbol.getName().toCharArray();
String name = symbol.getName();
Node node = root;

for (int i = 0; i < chars.length - 1; i++) {
char current = chars[i];
node = node.getOrInsertChild(current);
for (int i = 0; i < name.length() - 1; i++) {
node = node.getOrInsertChild(name.charAt(i));
}

char lastChar = chars[chars.length - 1];
char lastChar = name.charAt(name.length() - 1);
Node lastNode = node.insertValue(lastChar, symbol);
Assert.isFalse(lastNode instanceof ValueHoldingNode, "symbol %s was already inserted", symbol.getName());
Assert.isFalse(lastNode instanceof ValueHoldingNode, "symbol %s was already inserted", name);
}

/**
* Looks up a {@link Symbol} in the given char buffer, starting at the given position.
*
* @param buf the char buffer supplied by the tokenizer.
* @param pos the position to start looking at.
* @return a {@link Symbol} or null if not found.
Expand All @@ -104,61 +108,99 @@ public String toString() {
return "SymbolLookup{root=" + root + '}';
}

private static class Node {
@VisibleForTesting
@Debug.Renderer(text = """
"%s data=0b%s %s".formatted(this.toString(),
String.format("%8s", Integer.toString(this.data >> 8, 2)).replace(' ', '0'),
String.format("%8s", Integer.toString(this.getCharacter(), 2)).replace(' ', '0'))""")
static class Node {
/**
* Following the specification, the first char of an identifier must be alphabetical,
* all following characters must be alphanumerical or an underscore.
* We are wasting some child nodes in the root node, but those cannot (not even accidentally) be used,
* as guaranteed by the respective insert or lookup method in {@link SymbolLookup}.
*/
private static final int CHILDREN_WIDTH = 10 + 26 + 1 + 26;
static final int HAS_CHILDREN_SHIFT = 8;
static final int HAS_CHILDREN = 1;

/* '0'..'9' 'A'..'Z' '_' 'a'..'z' */
private final Node[] children = new Node[CHILDREN_WIDTH];
private final char value; // for debugging only
final Node[] children;
/**
* High byte either stores {@link Node#HAS_CHILDREN} or 0, to indicate
* whether any node in {@link Node#children} is not null.
* Low byte stores the low byte of the character this node represents.
* The high byte that would've been in the character is never used.
* | byte 0: indicates children | byte 1: character value |
*/
private short data;

public Node(char value) {
this.value = value;
private Node(char value) {
this(value, new Node[CHILDREN_WIDTH]);
}

public Node getOrInsertChild(char value) {
private Node(char value, Node[] children) {
assert value <= SymbolLookup.MAX_RANGE_CHAR;
assert indexLookup[value] != INVALID_IDX;
this.data = (short) value;
this.children = children;
}

private Node getOrInsertChild(char value) {
int idx = indexOrThrow(value);
if (children[idx] == null) {
children[idx] = new Node(value);
Node child = children[idx];
if (child == null) {
child = children[idx] = new Node(value);
}
return children[idx];
return child;
}

public Node insertValue(char value, Symbol symbol) {
int index = indexOrThrow(value);
Node oldValue = children[index];
// TODO: insert children copy
children[index] = new ValueHoldingNode(value, symbol);
private Node insertValue(char value, Symbol symbol) {
int idx = indexOrThrow(value);
Node oldValue = children[idx];

children[idx] = oldValue != null
? new ValueHoldingNode(value, oldValue.children, symbol)
: new ValueHoldingNode(value, symbol);
this.data |= (HAS_CHILDREN << HAS_CHILDREN_SHIFT);
return oldValue;
}

boolean hasChildren() {
return data >> HAS_CHILDREN_SHIFT == HAS_CHILDREN;
}

short getData() {
return data;
}

char getCharacter() {
return (char) (data & 0x00ff);
}

private static int indexOrThrow(char value) {
Assert.isTrue(value <= MAX_RANGE_CHAR, "character %s is not allowed in a symbol name", value);
byte idx = indexLookup[value];
Assert.isFalse(idx == INVALID_IDX, "character %s is not allowed in a symbol name", value);
Assert.isTrue(idx != INVALID_IDX, "character %s is not allowed in a symbol name", value);
return idx;
}

// FIXME: more tree like string representation
@Override
public String toString() {
StringBuilder sb = new StringBuilder(100);
StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append("{'").append(value).append("', children=");

for (int i = 0; i < children.length; i++) {
if (children[i] == null) continue;
if (i > 0 && children[i - 1] != null) {
sb.append(", ");
sb.append("{'").append(getCharacter()).append("', children=");

if (hasChildren()) {
for (int i = 0; i < children.length; i++) {
if (children[i] == null) continue;
if (i > 0 && children[i - 1] != null) {
sb.append(", ");
}
sb.append(children[i]);
}
sb.append(children[i]);
}
// no children
if (sb.charAt(sb.length() - 1) == '=') {
} else {
sb.append("[]");
}
return sb.append('}').toString();
Expand All @@ -171,9 +213,14 @@ public String toString() {
private static class ValueHoldingNode extends Node {
private final Symbol symbol;

public ValueHoldingNode(char value, Symbol symbol) {
private ValueHoldingNode(char value, Symbol symbol) {
super(value);
this.symbol = symbol;
}

private ValueHoldingNode(char value, Node[] children, Symbol symbol) {
super(value, children);
this.symbol = symbol;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,15 @@ public Expression readTokens() {
advance();
expr.pushToken(Operator.NOT_EQUALS);
} else { // one of the highest priority operators, can be solved immediately
// TODO: outline
Tokenizer tokenizer = branchOff(loopCondition, pos);
double toBeNegated = tokenizer.readTokens().solve();
pos = tokenizer.pos;
expr.pushToken(Utility.boolNot(toBeNegated));
}
}
case '~' -> { // one of the highest priority operators, can be solved immediately
// TODO: outline
Tokenizer tokenizer = branchOff(loopCondition, pos);
int input = Utility.requireInt(tokenizer.readTokens().solve());
pos = tokenizer.pos;
Expand Down Expand Up @@ -303,19 +305,17 @@ private Operand readFunctionCall(FunctionCallSite desc) {

char maybeClosingParenthesis = currentOrThrow("missing closing parenthesis for function call %s", functionName);
FunctionContext parameters = desc.allocateParameters();
if (maybeClosingParenthesis != ')') { // arguments were provided
// TODO: when calling f.e. exit( ), the space gets interpreted as parameters too
Assert.isTrue(desc.supportsArgs(), "did not expect any parameters for function %s", functionName);

// do while doesn't really work here due to that + 1
Tokenizer paramTokenizer = branchOff(Utility::isValidArgument, pos);
parameters.add(paramTokenizer.readTokens().solve());
if (maybeClosingParenthesis != ')') { // parameters were provided
// TODO: when calling f.e. exit( ), the space gets interpreted as parameters too
Assert.isTrue(desc.supportsArgs(), "function %s did not expect any parameters", functionName);

while (paramTokenizer.currentOrDefault() == ',') {
// TODO: can we reuse the tokenizer?
paramTokenizer = paramTokenizer.branchOff(Utility::isValidArgument, paramTokenizer.pos + 1); // + 1 to consume comma
// TODO: can we reuse the tokenizer and avoid an extra allocation per parameter?
Tokenizer paramTokenizer = this;
do {
paramTokenizer = paramTokenizer.branchOff(Utility::isValidArgument, paramTokenizer.pos);
parameters.add(paramTokenizer.readTokens().solve());
}
} while (paramTokenizer.match(','));
pos = paramTokenizer.pos; // move to ')' (or something else if there's an error)
}
matchOrThrow(')', "missing closing parenthesis for function %s", functionName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ static void isTrue(boolean condition, String message, Object... placeholders) {
}
}

static void indexWithinBounds(int index, int size, String message, Object... placeholders) {
if (index < 0 || index >= size) {
static void indexWithinBounds(int idx, int size, String message, Object... placeholders) {
if (idx < 0 || idx >= size) {
throw new IndexOutOfBoundsException(message.formatted(placeholders));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ public static int getPowerOfTen(int exponent) {
return (int) Math.pow(10, exponent);
}

public static String getOrdinalName(int index) {
if (index < COMMON_ORDINAL_NAMES.length) {
return COMMON_ORDINAL_NAMES[index];
public static String getOrdinalName(int idx) {
if (idx < COMMON_ORDINAL_NAMES.length) {
return COMMON_ORDINAL_NAMES[idx];
}
return (index + 1) + "th";
return (idx + 1) + "th";
}

public static int requireInt(double x) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import me.fourteendoggo.mathexpressionparser.exceptions.SyntaxException;
import me.fourteendoggo.mathexpressionparser.symbol.ExecutionEnv;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvFileSource;
Expand All @@ -24,7 +23,6 @@ void setUp() {
void testPositiveTestCases(String expression, String expectedResult) {
double expected = ExpressionParser.parse(expectedResult);
double result = assertDoesNotThrow(() -> ExpressionParser.parse(expression));
// delta 0 to make 0.0 == -0.0 pass, junit uses Double.doubleToLongBits for comparison
assertThat(result)
.withFailMessage("%s: got %f instead of %f", expression, result, expected)
.isEqualTo(expected);
Expand All @@ -47,14 +45,13 @@ void testThrowingExpressions() {
assertThatThrownBy(() -> ExpressionParser.parse("")).isInstanceOf(SyntaxException.class);
}

@Test
@Disabled // TODO: fix #1
@Test // was an issue in #1
void reproduce() {
ExecutionEnv env = ExecutionEnv.empty();
env.insertVariable("xy", 1);
env.insertVariable("x", 2);
env.insertVariable("y", 3);
assertThat(ExpressionParser.parse("xy", env)).isEqualTo(1); // "xy" not found
assertThat(ExpressionParser.parse("xy", env)).isEqualTo(1);
assertThat(ExpressionParser.parse("x", env)).isEqualTo(2);
assertThat(ExpressionParser.parse("y", env)).isEqualTo(3);
}
Expand All @@ -75,12 +72,11 @@ void testIdentifierValidChars() {

assertThatThrownBy(() -> ExpressionParser.parse("a_00__1____", env)).isInstanceOf(SyntaxException.class);

// TODO: #1
String[] identifiers = {
"aa", "lA", /*"z_", "z9" , */
"u0pz"/*,"zAA", "zZ_" */, "_E01",
"aa", "lA", "z_", "z9" ,
"u0pz","zAA", "zZ_", "_E01",
"__z", "_UXs", "___Az0", "_01_9z",
"i", "z"/*, "_"*/, "e0", "t"/*, "__", "_0"*/
"i", "z", "_", "e0", "t", "__", "_0"
};

for (String ident : identifiers) {
Expand Down
Loading

0 comments on commit 4fa92e0

Please sign in to comment.