Skip to content

Commit

Permalink
Merge pull request #5 from Jkutkut/3-empty-line-with-content-line
Browse files Browse the repository at this point in the history
Fix: 3 empty line with content line
  • Loading branch information
Jkutkut authored Jun 4, 2024
2 parents ae8321c + eaf05e0 commit be74d77
Show file tree
Hide file tree
Showing 9 changed files with 564 additions and 489 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "osmia"
version = "1.9.3"
version = "1.10.0"
edition = "2021"
authors = ["Jkutkut"]

Expand Down
208 changes: 138 additions & 70 deletions src/lexer/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::VecDeque;

use crate::lexer::Token;
use crate::lexer::Tokenizer;
use std::collections::LinkedList;

pub struct Lexer<'a> {
delimiter_start: &'a str,
Expand All @@ -15,95 +16,162 @@ impl<'a> Lexer<'a> {
}
}

pub fn scan(&self, input: &'a str) -> Result<LinkedList<Token>, String> {
let mut tokens = LinkedList::new();
pub fn scan(&self, input: &'a str) -> Result<Vec<Token>, String> {
let mut tokens: Vec<Token> = Vec::new();
let input_arr: &[u8] = input.as_bytes();
let mut i = 0;
let mut start = 0;
while i < input.len() {
let delimiter_start_idx = input[i..].find(self.delimiter_start).unwrap_or(input.len() - i);
if delimiter_start_idx > 0 {
let chunk = &input[i..i + delimiter_start_idx];
if !self.can_be_omitted(chunk) {
tokens.push_back(Token::Raw(chunk.to_string()));
}
i += delimiter_start_idx;
if i >= input.len() {
break;
let current_char = input_arr[i] as char;
if current_char == '\n' {
Self::add_raw_token(&mut tokens, Self::cut_input(&input, start, i));
Self::add_raw_token(&mut tokens, Some("\n".to_string()));
start = i + 1;
while i < input.len() {
let c = input_arr[i] as char;
if !c.is_whitespace() {
break;
}
i += 1;
}
Self::add_raw_token(&mut tokens, Self::cut_input(&input, start, i));
start = i;
continue;
}
tokens.push_back(Token::DelimiterStart);
i += self.delimiter_start.len();
let mut stack = std::collections::VecDeque::new();
let mut delimiter_end_idx = 0;
while i + delimiter_end_idx < input.len() - self.delimiter_end.len() {
if input[i + delimiter_end_idx..].starts_with(self.delimiter_end) &&
stack.is_empty() {
break;
}
let current_char = input.chars().nth(i + delimiter_end_idx).unwrap();
for (s, e) in [('{', '}'), ('[', ']'), ('(', ')')] {
if current_char == s {
stack.push_back(e);
else if
self.delimiter_start.starts_with(current_char) &&
input[i..].starts_with(self.delimiter_start)
{
Self::add_raw_token(&mut tokens, Self::cut_input(&input, start, i));
Self::add_token(&mut tokens, Some(Token::DelimiterStart));
i += self.delimiter_start.len();
start = i;
let mut stack: VecDeque<char> = VecDeque::new();
while i < input.len() - self.delimiter_end.len() {
let next_char = input_arr[i] as char;
if
self.delimiter_end.starts_with(next_char) &&
input[i..].starts_with(self.delimiter_end) &&
stack.is_empty()
{
break;
}
if current_char == e {
if let Some(stack_element) = stack.pop_back() {
if stack_element != e {
let current_char = input_arr[i] as char;
for (s, e) in [('{', '}'), ('[', ']'), ('(', ')')] {
if current_char == s {
stack.push_back(e);
break;
}
if current_char == e {
if let Some(stack_element) = stack.pop_back() {
if stack_element != e {
return Err(format!("Invalid close delimiter: {}", current_char));
}
}
else {
return Err(format!("Invalid close delimiter: {}", current_char));
}
break;
}
else {
return Err(format!("Invalid close delimiter: {}", current_char));
}
break;
}
i += 1;
}
delimiter_end_idx += 1;
}
if !stack.is_empty() {
return Err(format!("{} was not closed", stack.back().unwrap()));
}
if i + delimiter_end_idx >= input.len() ||
!input[i + delimiter_end_idx..].starts_with(self.delimiter_end) {
return Err(format!("Osmia delimiter was not closed"));
}
for token in Tokenizer::new(&input[i..i + delimiter_end_idx]) {
let token = token?;
match Token::from_str(token) {
Some(t) => tokens.push_back(t),
None => tokens.push_back(Token::Value(token.to_string()))
if !stack.is_empty() {
return Err(format!("{} was not closed", stack.back().unwrap()));
}
if i >= input.len() || !input[i..].starts_with(self.delimiter_end) {
return Err(format!("'{}' was not closed with '{}'", self.delimiter_start, self.delimiter_end));
}
for token in Tokenizer::new(&input[start..i]) {
let token = token?;
match Token::from_str(token) {
Some(t) => tokens.push(t),
None => tokens.push(Token::Value(token.to_string()))
}
}
tokens.push(Token::DelimiterEnd);
start = i + self.delimiter_end.len();
i = start;
continue;
}
tokens.push_back(Token::DelimiterEnd);
i += delimiter_end_idx + self.delimiter_end.len();
i += 1;
}
tokens.push_back(Token::Eof);
if start < input.len() {
Self::add_raw_token(&mut tokens, Self::cut_input(&input, start, input.len()));
}
Self::add_token(&mut tokens, Some(Token::Eof));
Self::clean_tokens(&mut tokens);
Ok(tokens)
}

fn can_be_omitted(&self, chunk: &str) -> bool {
let mut omit = false;
let mut chunk_chars = chunk.chars();
loop {
match chunk_chars.next() {
Some(c) => match c {
'\n' => {
omit = true;
break;
},
c => if !c.is_whitespace() {
break;
}
},
None => break
};
fn cut_input(input: &str, from: usize, to: usize) -> Option<String> {
if from >= to {
return None;
}
Some(input[from..to].to_string())
}

fn add_token(tokens: &mut Vec<Token>, token: Option<Token>) {
if let Some(token) = token {
tokens.push(token);
}
}

fn add_raw_token(tokens: &mut Vec<Token>, token: Option<String>) {
if let Some(token) = token {
tokens.push(Token::Raw(token));
}
if omit {
for c in chunk.chars() {
if !c.is_whitespace() && c != '\n' {
return false;
}

fn clean_tokens(tokens: &mut Vec<Token>) {
let mut i = 0;
while i < tokens.len() - 1 {
i += 1;
if !Self::is_new_line_token(&tokens[i - 1]) {
continue;
}
if !Self::is_whitespace_token(&tokens[i]) {
continue;
}
let mut j = 1;
let mut is_printable_token_in_row = false;
while i + j < tokens.len() - 1 {
if Self::is_new_line_token(&tokens[i + j]) {
break;
}
else if let Token::Raw(_) = &tokens[i + j] {
is_printable_token_in_row = true;
break;
}
else if let Token::DelimiterStart = &tokens[i + j] {
match &tokens[i + j + 1] {
Token::Value(_) => {
is_printable_token_in_row = true;
break;
},
_ => ()
}
}
j += 1;
}
if !is_printable_token_in_row {
tokens.drain(i - 1..i + 1);
i = i + j - 2;
}
}
omit
}

fn is_new_line_token(token: &Token) -> bool {
match token {
Token::Raw(chunk) => chunk == "\n",
_ => false
}
}

fn is_whitespace_token(token: &Token) -> bool {
match token {
Token::Raw(chunk) => chunk.chars().all(|c| c.is_whitespace()),
_ => false
}
}
}
2 changes: 0 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use interpreter::Interpreter;
use model::Ctx;
use parser::Parser;
use lexer::Lexer;
use lexer::Token;
use model::Stmt;

/// This simple API is designed to use the osmia template engine.
Expand Down Expand Up @@ -214,7 +213,6 @@ impl Osmia {
pub fn custom_code(code: &str, start_delimiter: &str, end_delimiter: &str) -> Result<Stmt, String> {
let lexer = Lexer::new(start_delimiter, end_delimiter);
let tokens = lexer.scan(code)?;
let tokens = tokens.iter().map(|t| t.clone()).collect::<Vec<Token>>();
Parser::new(tokens).parse()
}
}
2 changes: 1 addition & 1 deletion src/model/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl Ctx {
JsonTree::Float(f) => Ok(Literal::Float(*f)),
JsonTree::Null => Ok(Literal::Null),
JsonTree::Bool(b) => Ok(Literal::Bool(*b)),
_ => Err("Cannot get a array or object".to_string())
_ => Err("Cannot get an array or object as a literal".to_string())
}
}

Expand Down
4 changes: 1 addition & 3 deletions src/tests/lexer/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::lexer::Token;
use crate::lexer::Lexer;

use std::collections::LinkedList;
use crate::macro_tests;

#[cfg(test)]
Expand All @@ -11,14 +10,13 @@ fn lexer() -> Lexer<'static> {

#[cfg(test)]
fn check_result(
real: Result<LinkedList<Token>, String>,
real: Result<Vec<Token>, String>,
expected: Vec<Token>
) {
if let Err(err) = real {
panic!("Error: {}", err);
}
let real = real.unwrap();
let real = real.into_iter().collect::<Vec<Token>>();
println!("real : {:?}", real);
println!("expected: {:?}", expected);
for (real, expected) in real.iter().zip(expected.iter()) {
Expand Down
2 changes: 2 additions & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ mod model;
mod lexer;

mod syntax_tree_printer;

mod tests;
Loading

0 comments on commit be74d77

Please sign in to comment.