Skip to content
This repository has been archived by the owner on Jun 2, 2020. It is now read-only.

Commit

Permalink
Add some gross logic to handle unwanted character consumption when pa…
Browse files Browse the repository at this point in the history
…rsing numbers, strings.
  • Loading branch information
mcon committed Feb 15, 2020
1 parent 2c32bbf commit f5efda4
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 45 deletions.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
extern crate proc_macro;
extern crate core;
extern crate itertools;

pub mod scanner;
pub mod ast;
Expand Down
2 changes: 0 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ pub fn run_file(source_file: &String) {

pub fn run(source: String) {
let mut scanner = loxrust::scanner::Scanner::new(source);
let tokens_as_string: Vec<String>;
{
let tokens = scanner.scan_tokens();
let mut parser = Parser::new(tokens);
Expand All @@ -60,6 +59,5 @@ pub fn run(source: String) {
}
Err(err) => println!("Errors in statement: {:?}", err)
}
tokens_as_string = tokens.iter().map(|x| format!("{:?}", x)).collect::<Vec<String>>();
}
}
50 changes: 49 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ impl<'a> Parser<'a> {
let valid_tokens = &[TokenType::BANG, TokenType::MINUS];
// TODO: If this whole scheme works, then make consume_valid_tokens a top level function and re-use
fn consume_valid_tokens(instance : &mut Parser, valid_tokens : &[TokenType]) -> bool {
if instance.data.len() <= instance.current_position {
return false
}
let current = instance.data.index(instance.current_position);
if current.token_type.matches(valid_tokens)
&& instance.current_position != instance.data.len() {
Expand All @@ -164,6 +167,9 @@ impl<'a> Parser<'a> {

fn primary(&mut self) -> Result<Exp, String> {
fn consume_valid_tokens<'a>(instance : &mut Parser, valid_tokens : &'a [TokenType]) -> bool {
if instance.data.len() <= instance.current_position {
return false
}
let current = instance.data.index(instance.current_position);
if current.token_type.matches(valid_tokens)
&& instance.current_position != instance.data.len() {
Expand All @@ -177,7 +183,12 @@ impl<'a> Parser<'a> {
instance.current_position += 1;
}
}

// TODO: this consume_valid_tokens and position checking logic is duplicated a bunch - clean it up
// Match literals
if self.data.len() <= self.current_position {
return Err("No tokens to parse".to_string())
}
let current = self.data.index(self.current_position);
match current.token_type {
TokenType::NIL => {
Expand All @@ -203,7 +214,7 @@ impl<'a> Parser<'a> {
}
return Err("Expect ')' after expression.".to_string())
}
Err("No valid token".to_string()) // TODO: Unlear why this needs to be String not &str
Err("No valid ')' found after '('".to_string()) // TODO: Unlear why this needs to be String not &str
},
TokenType::RightParen => panic!("Did not expect to reach this branch of match statement"),
_ => Err("Didn't expect to get anything other than a literal or paren here".to_string())
Expand Down Expand Up @@ -243,4 +254,41 @@ mod tests {
Err(err) => panic!(err)
}
}

#[test]
fn parse_expresion_with_brackets()
{
let valid_tokens = vec![
Token{token_type: TokenType::LeftParen, lexeme: "(".to_string(), line: 0},
Token{token_type: TokenType::Literal(Literal::IDENTIFIER("foobar".to_string())), lexeme: "f".to_string(), line: 0},
Token{token_type: TokenType::EqualEqual, lexeme: "==".to_string(), line: 0},
Token{token_type: TokenType::Literal(Literal::NUMBER(i64::from(2))), lexeme: "2".to_string(), line: 0},
Token{token_type: TokenType::RightParen, lexeme: ")".to_string(), line: 0},
];
let expected_exp: Exp =
Exp::GroupingExp(
GroupingExp {
exp: Box::new(
Exp::BinaryExp(
BinaryExp {
left: Box::new(Exp::LiteralExp(LiteralExp{ value: Literal::IDENTIFIER("foobar".to_string()) })
),
operator: Token {
token_type: TokenType::EqualEqual,
lexeme: "==".to_string(),
line: 0
},
right: Box::new(Exp::LiteralExp(LiteralExp{value: Literal::NUMBER(2)}))}
)
)
}
);
let exp_result = Parser::new(valid_tokens.as_ref()).expression();
match exp_result {
Ok(exp) => {
assert_eq!(exp, expected_exp)
},
Err(err) => panic!(err)
}
}
}
157 changes: 115 additions & 42 deletions src/scanner.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::str::Chars;
use std::iter::FromIterator;
use std::iter::{FromIterator, Peekable};
use std::mem::discriminant;
use itertools::{Itertools, PeekingNext};
use std::slice::Iter;


#[derive(Clone)]
Expand Down Expand Up @@ -118,7 +120,7 @@ impl Scanner {
pub fn scan_tokens(&mut self) -> &Vec<Token> {
// TODO: clone hack
let temp_copy = self.source.clone();
let mut source_iter = temp_copy.chars();
let mut source_iter = temp_copy.chars().peekable();
let mut remaining_chars = source_iter.clone().count();

// TODO: error here
Expand All @@ -132,51 +134,105 @@ impl Scanner {
return &self.tokens;
}

fn scan_token(&mut self, remaining_source: &mut Chars) -> Option<Token> {
let next_char = remaining_source.next().expect("Have asserted that char is there");
fn scan_token(&mut self, remaining_source: &mut Peekable<Chars>) -> Option<Token>
{
// TODO: Refactor this in terms of peek_next
let next_char = remaining_source.peek().expect("Have asserted that char is there").clone();
let token_match = match next_char {
'(' => Some(TokenType::LeftParen),
')' => Some(TokenType::RightParen),
'{' => Some(TokenType::LeftBrace),
'}' => Some(TokenType::RightBrace),
',' => Some(TokenType::COMMA),
'.' => Some(TokenType::DOT),
'-' => Some(TokenType::MINUS),
'+' => Some(TokenType::PLUS),
';' => Some(TokenType::SEMICOLON),
'*' => Some(TokenType::STAR),
'!' => if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::BangEqual) } else { Some(TokenType::BANG) },
'=' => if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::EqualEqual) } else { Some(TokenType::EQUAL) },
'<' => if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::LessEqual) } else { Some(TokenType::LESS) },
'>' => if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::GreaterEqual) } else { Some(TokenType::GREATER) },
'/' => if remaining_source.next().expect("Have asserted that char is there") == '/'
'(' => {
remaining_source.next();
Some(TokenType::LeftParen)
},
')' => {
remaining_source.next();
Some(TokenType::RightParen)
},
'{' => {
remaining_source.next();
Some(TokenType::LeftBrace)
},
'}' => {
remaining_source.next();
Some(TokenType::RightBrace)
},
',' => {
remaining_source.next();
Some(TokenType::COMMA)
},
'.' => {
remaining_source.next();
Some(TokenType::DOT)
},
'-' => {
remaining_source.next();
Some(TokenType::MINUS)
},
'+' => {
remaining_source.next();
Some(TokenType::PLUS)
},
';' => {
remaining_source.next();
Some(TokenType::SEMICOLON)
},
'*' => {
remaining_source.next();
Some(TokenType::STAR)
},
'!' => {
remaining_source.next();
if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::BangEqual) } else { Some(TokenType::BANG) }
},
'=' => {
remaining_source.next();
if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::EqualEqual) } else { Some(TokenType::EQUAL) }
},
'<' => {
remaining_source.next();
if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::LessEqual) } else { Some(TokenType::LESS) }
},
'>' => {
remaining_source.next();
if remaining_source.next().expect("Have asserted that char is there") == '='
{ Some(TokenType::GreaterEqual) } else { Some(TokenType::GREATER) }
},
'/' => {
remaining_source.next();
if remaining_source.next().expect("Have asserted that char is there") == '/'
{
remaining_source.skip_while(|x| *x != '\n').next();
None
} else { Some(TokenType::SLASH) }
' ' => None,
'\r' => None,
'\t' => None,
}
' ' => {
remaining_source.next();
None
},
'\r' => {
remaining_source.next();
None
},
'\t' => {
remaining_source.next();
None
},
'\n' => {
remaining_source.next();
self.line += 1;
None
}
'"' => self.scan_string(remaining_source),
'"' => {
remaining_source.next();
self.scan_string(remaining_source)
},
character @ '0'...'9' => {
let mut last_character = character.to_string();
let mut larger_string_iter = last_character.chars().chain(remaining_source);

self.scan_number(&mut larger_string_iter)
self.scan_number(remaining_source)
},
character => {
let mut last_character = character.to_string();
let mut larger_string_iter = last_character.chars().chain(remaining_source);

self.scan_keyword_or_identifier(&mut larger_string_iter)
_ => {
self.scan_keyword_or_identifier(remaining_source)
},
};
Some(Token {
Expand All @@ -186,7 +242,7 @@ impl Scanner {
})
}

fn scan_string(&mut self, remaining_source: &mut Chars) -> Option<TokenType> {
fn scan_string(&mut self, remaining_source: &mut Peekable<Chars>) -> Option<TokenType> {
let string: String;
{
let string_iter =
Expand All @@ -203,12 +259,11 @@ impl Scanner {
Some(TokenType::Literal(Literal::STRING(string)))
}

fn scan_number<I>(&mut self, remaining_source: &mut I) -> Option<TokenType>
where I: Iterator<Item=char>
fn scan_number(&mut self, remaining_source: &mut Peekable<Chars>) -> Option<TokenType>
{
{
let string_iter =
remaining_source.take_while(|x| x.is_digit(10) || x.eq(&'.'));
remaining_source.peeking_take_while(|x| x.is_digit(10) || x.eq(&'.'));
let string: String = string_iter.collect();
if string.ends_with('.') {
self.errors.push(format!("Number not permitted to end with '.' on line {}", self.line));
Expand All @@ -228,10 +283,10 @@ impl Scanner {
}

fn scan_keyword_or_identifier<I>(&mut self, remaining_source: &mut I) -> Option<TokenType>
where I: Iterator<Item=char>
where I: PeekingNext<Item=char>
{
let mut string_iter =
remaining_source.take_while(|x| x.is_alphanumeric());
remaining_source.peeking_take_while(|x| x.is_alphanumeric());
let string: String = string_iter.collect();

match string.as_str() {
Expand Down Expand Up @@ -274,4 +329,22 @@ mod tests {
];
assert_eq!(&expected, tokens);
}

#[test]
fn statement_with_brackets()
{
let source = "(foobar = 2)";

let mut scanner = Scanner::new(source.to_string());
let tokens = scanner.scan_tokens();

let expected = vec![
Token{token_type: TokenType::LeftParen, lexeme: "(".to_string(), line: 0},
Token{token_type: TokenType::Literal(Literal::IDENTIFIER("foobar".to_string())), lexeme: "f".to_string(), line: 0},
Token{token_type: TokenType::EQUAL, lexeme: "=".to_string(), line: 0},
Token{token_type: TokenType::Literal(Literal::NUMBER(i64::from(2))), lexeme: "2".to_string(), line: 0},
Token{token_type: TokenType::RightParen, lexeme: ")".to_string(), line: 0},
];
assert_eq!(tokens, &expected);
}
}

0 comments on commit f5efda4

Please sign in to comment.