From 0201b6107990eb43f8f33df0690a2cf5324fbf13 Mon Sep 17 00:00:00 2001 From: Florin Iucha Date: Sat, 16 May 2020 12:23:48 -0400 Subject: [PATCH] [Core] Synchronize lexer output with C++ implementation --- README.md | 5 ++ build.gradle | 3 +- src/main/antlr/SamXLexer.g4 | 2 +- src/main/java/net/signbit/samx/RawTokens.java | 63 +++++++++++++++++++ 4 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 src/main/java/net/signbit/samx/RawTokens.java diff --git a/README.md b/README.md index 8f1153a..4de24f1 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,14 @@ Semantic Authoring Markdown Implementation of a SAM toolset in Java using an ANTLRv4 Grammar. +For a C++ implementation please see [SamX-C++](https://github.com/0x8000-0000/samx-cpp) + + Please see [SAM Documentation](https://mbakeranalecta.github.io/sam/) for an overview and scope of SAM. Please refer to https://github.com/mbakeranalecta/sam for the original implementation. + Dependencies ------------ @@ -82,6 +86,7 @@ License for SAM [Original SAM code is available](https://github.com/mbakeranalecta/sam/blob/master/license.txt) under Apache 2.0 license or Eclipse Public License v1.0. + License for SAMx ---------------- diff --git a/build.gradle b/build.gradle index f6a09c4..f8ccc74 100644 --- a/build.gradle +++ b/build.gradle @@ -7,7 +7,7 @@ plugins { } group 'net.signbit.samx' -version '0.4.7' +version '0.4.8' sourceCompatibility = 1.8 @@ -53,6 +53,7 @@ task createAllStartScripts() { } def scripts = ['tokenize' : 'net.signbit.samx.Tokenize', + 'raw_tokens' : 'net.signbit.samx.RawTokens', 'to_xml' : 'net.signbit.samx.ConvertToXml', 'to_html' : 'net.signbit.samx.ConvertToHtml', 'pretty_print' : 'net.signbit.samx.PrettyPrint', diff --git a/src/main/antlr/SamXLexer.g4 b/src/main/antlr/SamXLexer.g4 index b557749..1d24f7f 100644 --- a/src/main/antlr/SamXLexer.g4 +++ b/src/main/antlr/SamXLexer.g4 @@ -152,7 +152,7 @@ tokens { INDENT, DEDENT, END, INVALID, BOL } private void addCodeIndent(int indentLevel) { java.lang.StringBuilder builder = new java.lang.StringBuilder(indentLevel + 1); - for (int ii = 0; ii < indentLevel; ++ii) + for (int ii = 0; ii <= indentLevel; ++ii) { builder.append(' '); } diff --git a/src/main/java/net/signbit/samx/RawTokens.java b/src/main/java/net/signbit/samx/RawTokens.java new file mode 100644 index 0000000..2f31629 --- /dev/null +++ b/src/main/java/net/signbit/samx/RawTokens.java @@ -0,0 +1,63 @@ +/* + Copyright 2020 Florin Iucha + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package net.signbit.samx; + +import java.io.IOException; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Token; + +import net.signbit.samx.parser.SamXLexer; +import net.signbit.samx.parser.SamXParser; + +public final class RawTokens +{ + public static void main(String[] args) + { + if (args.length < 1) + { + System.err.println("No arguments provided"); + return; + } + + try + { + CharStream input = CharStreams.fromFileName(args[0]); + + SamXLexer lexer = new SamXLexer(input); + + CommonTokenStream tokens = new CommonTokenStream(lexer); + + tokens.fill(); + + for (Token tok : tokens.getTokens()) + { + System.out.println(tok); + } + } + catch (IOException ioe) + { + System.err.println("Caught i/o exception: " + ioe.getMessage()); + } + catch (Exception ee) + { + System.err.println("Caught exception: " + ee.getMessage()); + } + } +}