Merge pull request #19 from bigyihsuan/base36-numbers

Base36 numbers
bigyihsuan · Jun 23, 2020 · 6c78f2a · 6c78f2a
2 parents 35a3ca8 + dc841fa
commit 6c78f2a
Show file tree

Hide file tree

Showing 8 changed files with 175 additions and 125 deletions.
diff --git a/src/enums.py b/src/enums.py
diff --git a/src/evaluator.py b/src/evaluator.py
@@ -1,5 +1,6 @@
-from enums import Token as T
-from enums import EvalState as E
+from util import Token as T
+from util import EvalState as E
+from util import convert_base
 from instructions import executeInstruction
 
 import math

diff --git a/src/instructions.py b/src/instructions.py
@@ -1,4 +1,4 @@
-import math, string
+import math, string, util
 
 def executeInstruction(instruction, unvoiced, voiced, currentStack):
     """
@@ -233,69 +233,66 @@ def executeInstruction(instruction, unvoiced, voiced, currentStack):
     elif instruction == "ɪ":
         ele = input()
         try:
-            if "{" in ele[0] and "}" in ele[-1]:
-                ele = eval(ele[1:-1])
+            if len(ele) > 0 and "{" in ele[0] and "}" in ele[-1]:
+                base = 10
+                for d in ele:
+                    if d in string.ascii_letters:
+                        base = 36
+                        break
+                ele = util.convert_base(ele[1:-1], base)
             elif ele in string.digits:
                 ele = eval(ele)
-            elif "[" in ele[0] and "]" in ele[-1]:
-                o = []
-                inNum, inStr, inList = False, False, False
-                numList = 0
-                n, s, l = "", "", ""
-                for i,c in enumerate(ele):
-                    if not inNum and not inStr and not inList:
-                        if c in "{":
-                            inNum = True
-                        elif c in '"':
-                            inStr = True
-                            s += c
-                        elif c in "[":
-                            inList = True
-                            numList += 0
-                        elif c in string.digits:
-                            o.append(c)
-                    if inNum and not inList:
-                        if c in string.digits or c in "." or c in "-":
-                            n += c
-                        elif c in "}":
-                            o.append(eval(n))
-                            n = ""
-                            inNum = False
-                    elif inStr and not inList:
-                        s += c
-                        if c in '"':
-                            s = bytearray(s+c, "utf-8").decode("unicode_escape")
-                            o.append(eval(s))
-                            inStr = False
-                    elif inList:
-                        if c in "[":
-                            l += c
-                            numList += 1
-                        elif not inStr and c in "]" and numList > 0:
-                            l += c
-                            numList -= 1
-                        elif not inNum and not inStr and c in ".":
-                            l += ","
-                        elif c in "{":
-                            inNum = True
-                        elif inNum:
-                            if c in "}":
-                                l += ""
-                                inNum = False
+            elif len(ele) > 0 and "[" in ele[0] and "]" in ele[-1]:
+                # copy-pasted from interpreter.py and evaluator.py
+                import lexer, parser
+                ele = ele + " "
+                lexes = []
+                outputList = []
+                lastlex = lexer.Lex(util.Token.BEGIN, "")
+                while lastlex.token != util.Token.END:
+                    ele, lex = parser.Parser().getNextToken(ele)
+                    if (lex.token != util.Token.COMMENT):
+                        lexes.append(lex)
+                    lastlex = lex
+
+                for ep,lex in enumerate(lexes):
+                    if lexes[ep].token == util.Token.NUMBER:
+                        base = 10
+                        for d in lexes[ep].lexeme:
+                            if d in string.ascii_letters:
+                                base = 36
+                                break
+                        outputList.append(util.convert_base(lexes[ep].lexeme, base))
+                    elif lexes[ep].token == util.Token.STRING:
+                        outputList.append(lexes[ep].lexeme[1:-1])
+                        continue
+                    elif lexes[ep].token == util.Token.LISTBEGIN:
+                        numList = 1
+                        list = "["
+                        while numList > 0:
+                            ep += 1
+                            if lexes[ep].token == util.Token.NUMBER:
+                                base = 10
+                                for d in lexes[ep].lexeme:
+                                    if d in string.ascii_letters:
+                                        base = 36
+                                        break
+                                list += str(util.convert_base(lexes[ep].lexeme, base))
                             else:
-                                l += c
-                        else:
-                            l += c
-                if l != "": # not sure what happened here
-                    ele = eval(l)
-                else:
-                    ele = o
-            elif '"' in ele[0] and '"' in ele[-1]:
+                                list += lexes[ep].lexeme if lexes[ep].token != util.Token.LISTSEP else ","
+                            if lexes[ep].token == util.Token.LISTBEGIN:
+                                numList += 1
+                            if lexes[ep].token == util.Token.LISTEND:
+                                numList -= 1
+                        outputList = eval(list)
+                        break
+                    elif lexes[ep].token == util.Token.END:
+                        break
+                ele = outputList
+            elif len(ele) > 0 and '"' in ele[0] and '"' in ele[-1]:
                 ele = eval(ele)
             else:
                 ele = ele
-        except:
-            pass
         finally:
             currentStack.append(ele)
     elif instruction == "o":

diff --git a/src/interpreter.py b/src/interpreter.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
-import enums as E
-from enums import Token as T
+import util as E
+from util import Token as T
 import sys
 import io
 import lexer

diff --git a/src/lexer.py b/src/lexer.py
@@ -1,5 +1,5 @@
-from enums import Token as T
-from enums import LexState as LS
+from util import Token as T
+from util import LexState as LS
 import string
 
 class Lex:
@@ -82,30 +82,30 @@ def getNextToken(code):
                 return (code[start+len(lexeme):], Lex(T.COMMENT, lexeme))
 
         elif lexstate == LS.INSTRING:
-            lexeme += c
             if c == '\\':
                 sawEscape = True
                 lexeme += c
-            if c == '"' and lexeme[-1] not in "\\" and lexeme[-2] not in "\\" and not sawEscape:
-                lexeme = lexeme.replace("\\\\", "\\")
+            else:
+                lexeme += c
+            if c == '"' and not sawEscape:
                 return (code[start+len(lexeme):], Lex(T.STRING, bytearray(lexeme, "utf-8").decode("unicode_escape")))
             if sawEscape:
                 sawEscape = False
 
         elif lexstate == LS.INNUMBER:
             lexeme += c
             if c == "}":
-                return (code[start+len(lexeme):], Lex(T.NUMBER, lexeme[1:-1]))
+                return (code[start+len(lexeme):], Lex(T.NUMBER, lexeme[1:-1].upper()))
             if c == ".":
                 lexstate = LS.INFLOAT
-            elif c not in string.digits and c not in "-":
+            elif c not in string.digits + string.ascii_letters and c not in "-":
                 return (code[start+len(lexeme):], Lex(T.ERR, "Invalid character in multidigit number '{}'".format(c)))
 
         elif lexstate == LS.INFLOAT:
             lexeme += c
             if c == "}":
-                return (code[start+len(lexeme):], Lex(T.NUMBER, lexeme[1:-1]))
-            if c not in string.digits:
+                return (code[start+len(lexeme):], Lex(T.NUMBER, lexeme[1:-1].upper()))
+            if c not in string.digits + string.ascii_letters:
                 return (code[start+len(lexeme):], Lex(T.ERR, "Invalid character in float number '{}'".format(c)))
 
         elif lexstate == LS.INLABEL:

diff --git a/src/parser.py b/src/parser.py
@@ -1,4 +1,4 @@
-from enums import Token as T
+from util import Token as T
 import lexer
 import os
 

diff --git a/src/util.py b/src/util.py
@@ -0,0 +1,95 @@
+from enum import Flag, auto
+
+class Token(Flag):
+    BEGIN = auto()
+    END = auto()
+    ERR = auto()
+    COMMENT = auto()
+    NUMBER = auto()
+    STRING = auto()
+    LISTBEGIN = auto()
+    LISTSEP = auto()
+    LISTEND = auto()
+    FUNNAME = auto()
+    FUNDEFSTART = auto()
+    FUNDEFEND = auto()
+    LABEL = auto()
+    JUMP = auto()
+    INSTRUCTION = auto()
+    LOOPSTART = auto()
+    LOOPEND = auto()
+    LOOPEXIT = auto()
+    DONE = auto()
+
+class LexState(Flag):
+    """
+    Represents the lexer state.
+    """
+    BEGIN = auto()
+    INNUMBER = auto()
+    INFLOAT = auto()
+    INSTRING = auto()
+    INLIST = auto()
+    INCOMMENT = auto()
+    INFUNCTIONNAME = auto()
+    INFUNCTIONCODE = auto()
+    INLABEL = auto()
+    ININSTRUCTION = auto()
+
+class EvalState(Flag):
+    """
+    Represents the interpreter state.
+    """
+    DEFAULT = auto()
+    CALLING = auto()
+
+digits = {
+"0":0, "1":1,
+"2":2, "3":3,
+"4":4, "5":5,
+"6":6, "7":7,
+"8":8, "9":9,
+"A":10, "a":10,
+"B":11, "b":11,
+"C":12, "c":12,
+"D":13, "d":13,
+"E":14, "e":14,
+"F":15, "f":15,
+"G":16, "g":16,
+"H":17, "h":17,
+"I":18, "i":18,
+"J":19, "j":19,
+"K":20, "k":20,
+"L":21, "l":21,
+"M":22, "m":22,
+"N":23, "n":23,
+"O":24, "o":24,
+"P":25, "p":25,
+"Q":26, "q":26,
+"R":27, "r":27,
+"S":28, "s":28,
+"T":29, "t":29,
+"U":30, "u":30,
+"V":31, "v":31,
+"W":32, "w":32,
+"X":33, "x":33,
+"Y":34, "y":34,
+"Z":35, "z":35 }
+
+# https://stackoverflow.com/a/20170279/8143168
+def convert_base(s, base=10):
+    ret = 0
+    if "." not in s:
+        bef = s
+    else:
+        bef, aft = s.split(".")
+    for i in enumerate(reversed(bef)):
+        integer = digits[i[1]]
+        if integer >= base: raise ValueError
+        ret += base**i[0] * integer
+    if "." not in s: return ret
+    for i in enumerate(aft):
+        integer = digits[i[1]]
+        if integer >= base: raise ValueError
+        ret += base**-(i[0] + 1) * integer
+    return ret
diff --git a/states.mermaid b/states.mermaid
@@ -7,15 +7,16 @@ graph LR
     BEGIN -- "[0-9]" --> DIGIT
     BEGIN -- "{" --> MULTIDIGIT
     DIGIT --> NUMBER
-    MULTIDIGIT -- "[0-9]+" --> MULTIDIGIT
-    FLOAT -- "[0-9]*" --> FLOAT
-    MULTIDIGIT -- "." --> MULTIFLOAT
-    MULTIDIGIT -- "}" --> NUMBER
-    MULTIDIGIT -- "else" --> ERR
-    MULTIFLOAT -- "else" --> ERR
-    MULTIFLOAT -- "[0-9]" --> FLOAT
-    FLOAT -- "}" --> NUMBER
-    FLOAT -- "else" --> ERR
+    MULTIDIGIT -- "[0-9]+" --> MULTIDIGIT10
+    MULTIDIGIT -- "[0-9A-Za-z]+" --> MULTIDIGIT36
+    MULTIDIGIT10 -- "." --> MULTIFLOAT10
+    MULTIDIGIT36 -- "." --> MULTIFLOAT36
+    MULTIFLOAT10 -- "[0-9]*" --> MULTIFLOAT10
+    MULTIFLOAT36 -- "[0-9A-Za-z]*" --> MULTIFLOAT36
+    MULTIDIGIT10 -- "else" --> ERR
+    MULTIDIGIT36 -- "else" --> ERR
+    MULTIFLOAT10 & MULTIFLOAT36 -- "}" --> NUMBER
+    MULTIFLOAT10 & MULTIFLOAT36 -- "else" --> ERR
     NUMBER --> END
 
     %% Strings