UPDATE: Dump of initial files

This commit is contained in:
Nicolás Hatcher
2023-11-18 21:26:18 +01:00
commit c5b8efd83d
279 changed files with 42654 additions and 0 deletions

View File

@@ -0,0 +1,762 @@
//! A tokenizer for spreadsheet formulas.
//!
//! This is meant to feed a formula parser.
//!
//! You will need to instantiate it with a language and a locale.
//!
//! It supports two working modes:
//!
//! 1. A1 or display mode
//! This is for user formulas. References are like `D4`, `D$4` or `F5:T10`
//! 2. R1C1, internal or runtime mode
//! A reference like R1C1 refers to $A$1 and R3C4 to $D$4
//! R[2]C[5] refers to a cell two rows below and five columns to the right
//! It uses the 'en' locale and language.
//! This is used internally at runtime.
//!
//! Formulas look different in different locales:
//!
//! =IF(A1, B1, NA()) versus =IF(A1; B1; NA())
//!
//! Also numbers are different:
//!
//! 1,123.45 versus 1.123,45
//!
//! The names of the errors and functions are different in different languages,
//! but they stay the same in different locales.
//!
//! Note that in IronCalc if you are using a locale different from 'en' or a language different from 'en'
//! you will still need the 'en' locale and language because formulas are stored in that language and locale
//!
//! # Examples:
//! ```
//! use ironcalc_base::expressions::lexer::{Lexer, LexerMode};
//! use ironcalc_base::expressions::token::{TokenType, OpCompare};
//! use ironcalc_base::locale::get_locale;
//! use ironcalc_base::language::get_language;
//!
//! let locale = get_locale("en").unwrap();
//! let language = get_language("en").unwrap();
//! let mut lexer = Lexer::new("=A1*SUM(Sheet2!C3:D5)", LexerMode::A1, &locale, &language);
//! assert_eq!(lexer.next_token(), TokenType::Compare(OpCompare::Equal));
//! assert!(matches!(lexer.next_token(), TokenType::Reference { .. }));
//! ```
use crate::expressions::token::{OpCompare, OpProduct, OpSum};
use crate::language::Language;
use crate::locale::Locale;
use super::token::{index, Error, TokenType};
use super::types::*;
use super::utils;
pub mod util;
#[cfg(test)]
mod test;
mod ranges;
mod structured_references;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LexerError {
pub position: usize,
pub message: String,
}
pub(super) type Result<T> = std::result::Result<T, LexerError>;
#[derive(Clone, PartialEq, Eq)]
pub enum LexerMode {
A1,
R1C1,
}
/// Tokenize an input
#[derive(Clone)]
pub struct Lexer {
position: usize,
next_token_position: Option<usize>,
len: usize,
chars: Vec<char>,
mode: LexerMode,
locale: Locale,
language: Language,
}
impl Lexer {
/// Creates a new `Lexer` that returns the tokens of a formula.
pub fn new(formula: &str, mode: LexerMode, locale: &Locale, language: &Language) -> Lexer {
let chars: Vec<char> = formula.chars().collect();
let len = chars.len();
Lexer {
chars,
position: 0,
next_token_position: None,
len,
mode,
locale: locale.clone(),
language: language.clone(),
}
}
/// Changes the lexer mode
pub fn set_lexer_mode(&mut self, mode: LexerMode) {
self.mode = mode;
}
// FIXME: I don't think we should have `is_a1_mode` and `get_formula`.
// The caller already knows those two
/// Returns true if mode is A1
pub fn is_a1_mode(&self) -> bool {
self.mode == LexerMode::A1
}
/// Returns the formula
pub fn get_formula(&self) -> String {
self.chars.iter().collect()
}
// FIXME: This is used to get the "marked tokens"
// I think a better API would be to return the marked tokens
/// Returns the position of the lexer
pub fn get_position(&self) -> i32 {
self.position as i32
}
/// Resets the formula
pub fn set_formula(&mut self, content: &str) {
self.chars = content.chars().collect();
self.len = self.chars.len();
self.position = 0;
self.next_token_position = None;
}
/// Returns an error if the token is not the expected one.
pub fn expect(&mut self, tk: TokenType) -> Result<()> {
let nt = self.next_token();
if index(&nt) != index(&tk) {
return Err(self.set_error(&format!("Error, expected {}", tk), self.position));
}
Ok(())
}
/// Checks the next token without advancing position
/// See also [advance_token](Self::advance_token)
pub fn peek_token(&mut self) -> TokenType {
let position = self.position;
let tk = self.next_token();
self.next_token_position = Some(self.position);
self.position = position;
tk
}
/// Advances position. This is used in conjunction with [`peek_token`](Self::peek_token)
/// It is a noop if the has not been a previous peek_token
pub fn advance_token(&mut self) {
if let Some(position) = self.next_token_position {
self.position = position;
self.next_token_position = None;
}
}
/// Returns the next token
pub fn next_token(&mut self) -> TokenType {
self.next_token_position = None;
self.consume_whitespace();
match self.read_next_char() {
Some(char) => {
match char {
'+' => TokenType::Addition(OpSum::Add),
'-' => TokenType::Addition(OpSum::Minus),
'*' => TokenType::Product(OpProduct::Times),
'/' => TokenType::Product(OpProduct::Divide),
'(' => TokenType::LeftParenthesis,
')' => TokenType::RightParenthesis,
'=' => TokenType::Compare(OpCompare::Equal),
'{' => TokenType::LeftBrace,
'}' => TokenType::RightBrace,
'[' => TokenType::LeftBracket,
']' => TokenType::RightBracket,
':' => TokenType::Colon,
';' => TokenType::Semicolon,
',' => {
if self.locale.numbers.symbols.decimal == "," {
match self.consume_number(',') {
Ok(number) => TokenType::Number(number),
Err(error) => TokenType::Illegal(error),
}
} else {
TokenType::Comma
}
}
'.' => {
if self.locale.numbers.symbols.decimal == "." {
match self.consume_number('.') {
Ok(number) => TokenType::Number(number),
Err(error) => TokenType::Illegal(error),
}
} else {
// There is no TokenType::PERIOD
TokenType::Illegal(self.set_error("Expecting a number", self.position))
}
}
'!' => TokenType::Bang,
'^' => TokenType::Power,
'%' => TokenType::Percent,
'&' => TokenType::And,
'$' => self.consume_absolute_reference(),
'<' => {
let next_token = self.peek_char();
if next_token == Some('=') {
self.position += 1;
TokenType::Compare(OpCompare::LessOrEqualThan)
} else if next_token == Some('>') {
self.position += 1;
TokenType::Compare(OpCompare::NonEqual)
} else {
TokenType::Compare(OpCompare::LessThan)
}
}
'>' => {
if self.peek_char() == Some('=') {
self.position += 1;
TokenType::Compare(OpCompare::GreaterOrEqualThan)
} else {
TokenType::Compare(OpCompare::GreaterThan)
}
}
'#' => self.consume_error(),
'"' => TokenType::String(self.consume_string()),
'\'' => self.consume_quoted_sheet_reference(),
'0'..='9' => {
let position = self.position - 1;
match self.consume_number(char) {
Ok(number) => {
if self.peek_token() == TokenType::Colon
&& self.mode == LexerMode::A1
{
// Its a row range 3:5
// FIXME: There are faster ways of parsing this
// Like checking that 'number' is integer and that the next token is integer
self.position = position;
match self.consume_range_a1() {
Ok(ParsedRange { left, right }) => {
if let Some(right) = right {
TokenType::Range {
sheet: None,
left,
right,
}
} else {
TokenType::Illegal(
self.set_error("Expecting row range", position),
)
}
}
Err(error) => {
// Examples:
// * 'Sheet 1'!3.4:5
// * 'Sheet 1'!3:A2
// * 'Sheet 1'!3:
TokenType::Illegal(error)
}
}
} else {
TokenType::Number(number)
}
}
Err(error) => {
// tried to read a number but failed
self.position = self.len;
TokenType::Illegal(error)
}
}
}
_ => {
if char.is_alphabetic() || char == '_' {
// At this point is one of the following:
// 1. A range with sheet: Sheet3!A3:D7
// 2. A boolean: TRUE or FALSE (dependent on the language)
// 3. A reference like WS34 or R3C5
// 4. A range without sheet ER4:ER7
// 5. A column range E:E
// 6. An identifier like a function name or a defined name
// 7. A range operator A1:OFFSET(...)
// 8. An Invalid token
let position = self.position;
self.position -= 1;
let name = self.consume_identifier();
let position_indent = self.position;
let peek_char = self.peek_char();
let next_char_is_colon = self.peek_char() == Some(':');
if peek_char == Some('!') {
// reference
self.position += 1;
return self.consume_range(Some(name));
} else if peek_char == Some('$') {
self.position = position - 1;
return self.consume_range(None);
}
let name_upper = name.to_ascii_uppercase();
if name_upper == self.language.booleans.true_value {
return TokenType::Boolean(true);
} else if name_upper == self.language.booleans.false_value {
return TokenType::Boolean(false);
}
if self.mode == LexerMode::A1 {
let parsed_reference = utils::parse_reference_a1(&name_upper);
if parsed_reference.is_some()
|| (utils::is_valid_column(name_upper.trim_start_matches('$'))
&& next_char_is_colon)
{
self.position = position - 1;
match self.consume_range_a1() {
Ok(ParsedRange { left, right }) => {
if let Some(right) = right {
return TokenType::Range {
sheet: None,
left,
right,
};
} else {
return TokenType::Reference {
sheet: None,
column: left.column,
row: left.row,
absolute_row: left.absolute_row,
absolute_column: left.absolute_column,
};
}
}
Err(error) => {
// This could be the range operator: ":"
if let Some(r) = parsed_reference {
if next_char_is_colon {
self.position = position_indent;
return TokenType::Reference {
sheet: None,
row: r.row,
column: r.column,
absolute_column: r.absolute_column,
absolute_row: r.absolute_row,
};
}
}
self.position = self.len;
return TokenType::Illegal(error);
}
}
} else if utils::is_valid_identifier(&name) {
if peek_char == Some('[') {
if let Ok(r) = self.consume_structured_reference(&name) {
return r;
}
return TokenType::Illegal(self.set_error(
"Invalid structured reference",
self.position,
));
}
return TokenType::Ident(name);
} else {
return TokenType::Illegal(
self.set_error("Invalid identifier (A1)", self.position),
);
}
} else {
let pos = self.position;
self.position = position - 1;
match self.consume_range_r1c1() {
// it's a valid R1C1 range
// We need to check it's not something like R1C1P
Ok(ParsedRange { left, right }) => {
if pos > self.position {
self.position = pos;
if utils::is_valid_identifier(&name) {
return TokenType::Ident(name);
} else {
self.position = self.len;
return TokenType::Illegal(
self.set_error(
"Invalid identifier (R1C1)",
pos,
),
);
}
}
if let Some(right) = right {
return TokenType::Range {
sheet: None,
left,
right,
};
} else {
return TokenType::Reference {
sheet: None,
column: left.column,
row: left.row,
absolute_row: left.absolute_row,
absolute_column: left.absolute_column,
};
}
}
Err(error) => {
self.position = position - 1;
if let Ok(r) = self.consume_reference_r1c1() {
if self.peek_char() == Some(':') {
return TokenType::Reference {
sheet: None,
row: r.row,
column: r.column,
absolute_column: r.absolute_column,
absolute_row: r.absolute_row,
};
}
}
self.position = pos;
if utils::is_valid_identifier(&name) {
return TokenType::Ident(name);
} else {
return TokenType::Illegal(self.set_error(
&format!("Invalid identifier (R1C1): {name}"),
error.position,
));
}
}
}
}
}
TokenType::Illegal(self.set_error("Unknown error", self.position))
}
}
}
None => TokenType::EOF,
}
}
// Private methods
fn set_error(&mut self, message: &str, position: usize) -> LexerError {
self.position = self.len;
LexerError {
position,
message: message.to_string(),
}
}
fn peek_char(&mut self) -> Option<char> {
let position = self.position;
if position < self.len {
Some(self.chars[position])
} else {
None
}
}
fn expect_char(&mut self, ch_expected: char) -> Result<()> {
let position = self.position;
if position >= self.len {
return Err(self.set_error(
&format!("Error, expected {} found EOF", &ch_expected),
self.position,
));
} else {
let ch = self.chars[position];
if ch_expected != ch {
return Err(self.set_error(
&format!("Error, expected {} found {}", &ch_expected, &ch),
self.position,
));
}
self.position += 1;
}
Ok(())
}
fn read_next_char(&mut self) -> Option<char> {
let position = self.position;
if position < self.len {
self.position = position + 1;
Some(self.chars[position])
} else {
None
}
}
// Consumes an integer from the input stream
fn consume_integer(&mut self, first: char) -> Result<i32> {
let mut position = self.position;
let len = self.len;
let mut chars = first.to_string();
while position < len {
let next_char = self.chars[position];
if next_char.is_ascii_digit() {
chars.push(next_char);
} else {
break;
}
position += 1;
}
self.position = position;
chars.parse::<i32>().map_err(|_| LexerError {
position,
message: format!("Failed to parse to int: {}", chars),
})
}
// Consumes a number in the current locale.
// It only takes into account the decimal separator
// Note that we do not parse the thousands separator
// Let's say ',' is the thousands separator. Then 1,234 would be an error.
// This is ok for most cases:
// =IF(A1=1,234, TRUE, FALSE) will not work
// If a user introduces a single number in the cell 1,234 we should be able to parse
// and format the cell appropriately
fn consume_number(&mut self, first: char) -> Result<f64> {
let mut position = self.position;
let len = self.len;
let mut chars = first.to_string();
// numbers before the decimal point
while position < len {
let x = self.chars[position];
if x.is_ascii_digit() {
chars.push(x);
} else {
break;
}
position += 1;
}
if position < len && self.chars[position].to_string() == self.locale.numbers.symbols.decimal
{
// numbers after the decimal point
chars.push('.');
position += 1;
while position < len {
let x = self.chars[position];
if x.is_ascii_digit() {
chars.push(x);
} else {
break;
}
position += 1;
}
}
if position + 1 < len && (self.chars[position] == 'e' || self.chars[position] == 'E') {
// exponential side
let x = self.chars[position + 1];
if x == '-' || x == '+' || x.is_ascii_digit() {
chars.push('e');
chars.push(x);
position += 2;
while position < len {
let x = self.chars[position];
if x.is_ascii_digit() {
chars.push(x);
} else {
break;
}
position += 1;
}
}
}
self.position = position;
match chars.parse::<f64>() {
Err(_) => {
Err(self.set_error(&format!("Failed to parse to double: {}", chars), position))
}
Ok(v) => Ok(v),
}
}
// Consumes an identifier from the input stream
fn consume_identifier(&mut self) -> String {
let mut position = self.position;
while position < self.len {
let next_char = self.chars[position];
if next_char.is_alphanumeric() || next_char == '_' || next_char == '.' {
position += 1;
} else {
break;
}
}
let chars = self.chars[self.position..position].iter().collect();
self.position = position;
chars
}
fn consume_string(&mut self) -> String {
let mut position = self.position;
let len = self.len;
let mut chars = "".to_string();
while position < len {
let x = self.chars[position];
position += 1;
if x != '"' {
chars.push(x);
} else if position < len && self.chars[position] == '"' {
chars.push(x);
chars.push(self.chars[position]);
position += 1;
} else {
break;
}
}
self.position = position;
chars
}
// Consumes a quoted string from input
// 'This is a quoted string'
// ' Also is a ''quoted'' string'
// Returns an error if it does not find a closing quote
fn consume_single_quote_string(&mut self) -> Result<String> {
let mut position = self.position;
let len = self.len;
let mut success = false;
let mut needs_escape = false;
while position < len {
let next_char = self.chars[position];
position += 1;
if next_char == '\'' {
if position == len {
success = true;
break;
}
if self.chars[position] != '\'' {
success = true;
break;
} else {
// In Excel we escape "'" with "''"
needs_escape = true;
position += 1;
}
}
}
if !success {
// We reached the end without the closing quote
return Err(self.set_error("Expected closing \"'\" but found end of input", position));
}
let chars: String = self.chars[self.position..position - 1].iter().collect();
self.position = position;
if needs_escape {
// In most cases we will not needs escaping so this would be an overkill
return Ok(chars.replace("''", "'"));
}
Ok(chars)
}
// Reads an error from the input stream
fn consume_error(&mut self) -> TokenType {
let errors = &self.language.errors;
let rest_of_formula: String = self.chars[self.position - 1..self.len].iter().collect();
if rest_of_formula.starts_with(&errors.ref_value) {
self.position += errors.ref_value.chars().count() - 1;
return TokenType::Error(Error::REF);
} else if rest_of_formula.starts_with(&errors.name) {
self.position += errors.name.chars().count() - 1;
return TokenType::Error(Error::NAME);
} else if rest_of_formula.starts_with(&errors.value) {
self.position += errors.value.chars().count() - 1;
return TokenType::Error(Error::VALUE);
} else if rest_of_formula.starts_with(&errors.div) {
self.position += errors.div.chars().count() - 1;
return TokenType::Error(Error::DIV);
} else if rest_of_formula.starts_with(&errors.na) {
self.position += errors.na.chars().count() - 1;
return TokenType::Error(Error::NA);
} else if rest_of_formula.starts_with(&errors.num) {
self.position += errors.num.chars().count() - 1;
return TokenType::Error(Error::NUM);
} else if rest_of_formula.starts_with(&errors.error) {
self.position += errors.error.chars().count() - 1;
return TokenType::Error(Error::ERROR);
} else if rest_of_formula.starts_with(&errors.nimpl) {
self.position += errors.nimpl.chars().count() - 1;
return TokenType::Error(Error::NIMPL);
} else if rest_of_formula.starts_with(&errors.spill) {
self.position += errors.spill.chars().count() - 1;
return TokenType::Error(Error::SPILL);
} else if rest_of_formula.starts_with(&errors.calc) {
self.position += errors.calc.chars().count() - 1;
return TokenType::Error(Error::CALC);
} else if rest_of_formula.starts_with(&errors.null) {
self.position += errors.null.chars().count() - 1;
return TokenType::Error(Error::NULL);
} else if rest_of_formula.starts_with(&errors.circ) {
self.position += errors.circ.chars().count() - 1;
return TokenType::Error(Error::CIRC);
}
TokenType::Illegal(self.set_error("Invalid error.", self.position))
}
fn consume_whitespace(&mut self) {
let mut position = self.position;
let len = self.len;
while position < len {
let x = self.chars[position];
if !x.is_whitespace() {
break;
}
position += 1;
}
self.position = position;
}
fn consume_absolute_reference(&mut self) -> TokenType {
// This is an absolute reference.
// $A$4
if self.mode == LexerMode::R1C1 {
return TokenType::Illegal(
self.set_error("Cannot parse A1 reference in R1C1 mode", self.position),
);
}
self.position -= 1;
self.consume_range(None)
}
fn consume_quoted_sheet_reference(&mut self) -> TokenType {
// This is a reference:
// 'First Sheet'!A34
let sheet_name = match self.consume_single_quote_string() {
Ok(v) => v,
Err(error) => {
return TokenType::Illegal(error);
}
};
if self.next_token() != TokenType::Bang {
return TokenType::Illegal(self.set_error("Expected '!'", self.position));
}
self.consume_range(Some(sheet_name))
}
fn consume_range(&mut self, sheet: Option<String>) -> TokenType {
let m = if self.mode == LexerMode::A1 {
self.consume_range_a1()
} else {
self.consume_range_r1c1()
};
match m {
Ok(ParsedRange { left, right }) => {
if let Some(right) = right {
TokenType::Range { sheet, left, right }
} else {
TokenType::Reference {
sheet,
column: left.column,
row: left.row,
absolute_row: left.absolute_row,
absolute_column: left.absolute_column,
}
}
}
Err(error) => TokenType::Illegal(error),
}
}
}

View File

@@ -0,0 +1,319 @@
use crate::constants::{LAST_COLUMN, LAST_ROW};
use crate::expressions::{token::TokenType, utils::column_to_number};
use super::Lexer;
use super::{ParsedRange, ParsedReference, Result};
impl Lexer {
// Consumes a reference in A1 style like:
// AS23, $AS23, AS$23, $AS$23, R12
// Or returns an error
fn consume_reference_a1(&mut self) -> Result<ParsedReference> {
let mut absolute_column = false;
let mut absolute_row = false;
let mut position = self.position;
let len = self.len;
if position < len && self.chars[position] == '$' {
absolute_column = true;
position += 1;
}
let mut column = "".to_string();
while position < len {
let x = self.chars[position].to_ascii_uppercase();
match x {
'A'..='Z' => column.push(x),
_ => break,
}
position += 1;
}
if column.is_empty() {
return Err(self.set_error("Failed to parse reference", position));
}
if position < len && self.chars[position] == '$' {
absolute_row = true;
position += 1;
}
let mut row = "".to_string();
while position < len {
let x = self.chars[position];
match x {
'0'..='9' => row.push(x),
_ => break,
}
position += 1;
}
// Note that row numbers could start with 0
self.position = position;
let column = column_to_number(&column).map_err(|error| self.set_error(&error, position))?;
match row.parse::<i32>() {
Ok(row) => {
if row > LAST_ROW {
return Err(self.set_error("Row too large in reference", position));
}
Ok(ParsedReference {
column,
row,
absolute_column,
absolute_row,
})
}
Err(..) => Err(self.set_error("Failed to parse integer", position)),
}
}
// Parsing a range is a parser on it's own right. Here is the grammar:
//
// range -> cell | cell ':' cell | row ':' row | column ':' column
// cell -> column row
// column -> '$' column_name | column_name
// row -> '$' row_name | row_name
// column_name -> 'A'..'XFD'
// row_name -> 1..1_048_576
pub(super) fn consume_range_a1(&mut self) -> Result<ParsedRange> {
// first let's try to parse a cell
let mut position = self.position;
match self.consume_reference_a1() {
Ok(cell) => {
if self.peek_char() == Some(':') {
// It's a range
self.position += 1;
if let Ok(cell2) = self.consume_reference_a1() {
Ok(ParsedRange {
left: cell,
right: Some(cell2),
})
} else {
Err(self.set_error("Expecting reference in range", self.position))
}
} else {
// just a reference
Ok(ParsedRange {
left: cell,
right: None,
})
}
}
Err(_) => {
self.position = position;
// It's either a row range or a column range (or not a range at all)
let len = self.len;
let mut absolute_left = false;
if position < len && self.chars[position] == '$' {
absolute_left = true;
position += 1;
}
let mut column_left = "".to_string();
let mut row_left = "".to_string();
while position < len {
let x = self.chars[position].to_ascii_uppercase();
match x {
'A'..='Z' => column_left.push(x),
'0'..='9' => row_left.push(x),
_ => break,
}
position += 1;
}
if position >= len || self.chars[position] != ':' {
return Err(self.set_error("Expecting reference in range", self.position));
}
position += 1;
let mut absolute_right = false;
if position < len && self.chars[position] == '$' {
absolute_right = true;
position += 1;
}
let mut column_right = "".to_string();
let mut row_right = "".to_string();
while position < len {
let x = self.chars[position].to_ascii_uppercase();
match x {
'A'..='Z' => column_right.push(x),
'0'..='9' => row_right.push(x),
_ => break,
}
position += 1;
}
self.position = position;
// At this point either the columns are the empty string or the rows are the empty string
if !row_left.is_empty() {
// It is a row range 23:56
if row_right.is_empty() || !column_left.is_empty() || !column_right.is_empty() {
return Err(self.set_error("Error parsing Range", position));
}
// Note that row numbers can start with 0
let row_left = match row_left.parse::<i32>() {
Ok(n) => n,
Err(_) => {
return Err(self
.set_error(&format!("Failed parsing row {}", row_left), position))
}
};
let row_right = match row_right.parse::<i32>() {
Ok(n) => n,
Err(_) => {
return Err(self
.set_error(&format!("Failed parsing row {}", row_right), position))
}
};
if row_left > LAST_ROW {
return Err(self.set_error("Row too large in reference", position));
}
if row_right > LAST_ROW {
return Err(self.set_error("Row too large in reference", position));
}
return Ok(ParsedRange {
left: ParsedReference {
row: row_left,
absolute_row: absolute_left,
column: 1,
absolute_column: true,
},
right: Some(ParsedReference {
row: row_right,
absolute_row: absolute_right,
column: LAST_COLUMN,
absolute_column: true,
}),
});
}
// It is a column range
if column_right.is_empty() || !row_right.is_empty() {
return Err(self.set_error("Error parsing Range", position));
}
let column_left = column_to_number(&column_left)
.map_err(|error| self.set_error(&error, position))?;
let column_right = column_to_number(&column_right)
.map_err(|error| self.set_error(&error, position))?;
Ok(ParsedRange {
left: ParsedReference {
row: 1,
absolute_row: true,
column: column_left,
absolute_column: absolute_left,
},
right: Some(ParsedReference {
row: LAST_ROW,
absolute_row: true,
column: column_right,
absolute_column: absolute_right,
}),
})
}
}
}
pub(super) fn consume_range_r1c1(&mut self) -> Result<ParsedRange> {
// first let's try to parse a cell
match self.consume_reference_r1c1() {
Ok(cell) => {
if self.peek_char() == Some(':') {
// It's a range
self.position += 1;
if let Ok(cell2) = self.consume_reference_r1c1() {
Ok(ParsedRange {
left: cell,
right: Some(cell2),
})
} else {
Err(self.set_error("Expecting reference in range", self.position))
}
} else {
// just a reference
Ok(ParsedRange {
left: cell,
right: None,
})
}
}
Err(s) => Err(s),
}
}
pub(super) fn consume_reference_r1c1(&mut self) -> Result<ParsedReference> {
// R12C3, R[2]C[-2], R3C[6], R[-3]C4, RC1, R[-2]C
let absolute_column;
let absolute_row;
let position = self.position;
let row;
let column;
self.expect_char('R')?;
match self.peek_char() {
Some('[') => {
absolute_row = false;
self.expect_char('[')?;
let c = match self.read_next_char() {
Some(s) => s,
None => {
return Err(self.set_error("Expected column number", position));
}
};
match self.consume_integer(c) {
Ok(v) => row = v,
Err(_) => {
return Err(self.set_error("Expected row number", position));
}
}
self.expect(TokenType::RightBracket)?;
}
Some(c) => {
absolute_row = true;
self.expect_char(c)?;
match self.consume_integer(c) {
Ok(v) => row = v,
Err(_) => {
return Err(self.set_error("Expected row number", position));
}
}
}
None => {
return Err(self.set_error("Expected row number or '['", position));
}
}
self.expect_char('C')?;
match self.peek_char() {
Some('[') => {
self.expect_char('[')?;
absolute_column = false;
let c = match self.read_next_char() {
Some(s) => s,
None => {
return Err(self.set_error("Expected column number", position));
}
};
match self.consume_integer(c) {
Ok(v) => column = v,
Err(_) => {
return Err(self.set_error("Expected column number", position));
}
}
self.expect(TokenType::RightBracket)?;
}
Some(c) => {
absolute_column = true;
self.expect_char(c)?;
match self.consume_integer(c) {
Ok(v) => column = v,
Err(_) => {
return Err(self.set_error("Expected column number", position));
}
}
}
None => {
return Err(self.set_error("Expected column number or '['", position));
}
}
if let Some(c) = self.peek_char() {
if c.is_alphanumeric() {
return Err(self.set_error("Expected end of reference", position));
}
}
Ok(ParsedReference {
column,
row,
absolute_column,
absolute_row,
})
}
}

View File

@@ -0,0 +1,188 @@
// Grammar:
// structured references -> table_name "[" arguments "]"
// arguments -> table_reference | "["specifier"]" "," table_reference
// specifier > "#All" |
// "#This Row" |
// "#Data" |
// "#Headers" |
// "#Totals"
// table_reference -> column_reference | range_reference
// column reference -> column_name | "["column_name"]"
// range_reference -> column_reference":"column_reference
use crate::expressions::token::TokenType;
use crate::expressions::token::{TableReference, TableSpecifier};
use super::Result;
use super::{Lexer, LexerError};
impl Lexer {
fn consume_table_specifier(&mut self) -> Result<Option<TableSpecifier>> {
if self.peek_char() == Some('#') {
// It's a specifier
// TODO(TD): There are better ways of doing this :)
let rest_of_formula: String = self.chars[self.position..self.len].iter().collect();
let specifier = if rest_of_formula.starts_with("#This Row]") {
self.position += "#This Row]".bytes().len();
TableSpecifier::ThisRow
} else if rest_of_formula.starts_with("#All]") {
self.position += "#All]".bytes().len();
TableSpecifier::All
} else if rest_of_formula.starts_with("#Data]") {
self.position += "#Data]".bytes().len();
TableSpecifier::Data
} else if rest_of_formula.starts_with("#Headers]") {
self.position += "#Headers]".bytes().len();
TableSpecifier::Headers
} else if rest_of_formula.starts_with("#Totals]") {
self.position += "#Totals]".bytes().len();
TableSpecifier::Totals
} else {
return Err(LexerError {
position: self.position,
message: "Invalid structured reference".to_string(),
});
};
Ok(Some(specifier))
} else {
Ok(None)
}
}
fn consume_column_reference(&mut self) -> Result<String> {
self.consume_whitespace();
let end_char = if self.peek_char() == Some('[') {
self.position += 1;
']'
} else {
')'
};
let mut position = self.position;
while position < self.len {
let next_char = self.chars[position];
if next_char != end_char {
position += 1;
if next_char == '\'' {
if position == self.len {
return Err(LexerError {
position: self.position,
message: "Invalid column name".to_string(),
});
}
// skip next char
position += 1
}
} else {
break;
}
}
let chars: String = self.chars[self.position..position].iter().collect();
if end_char == ']' {
position += 1;
}
self.position = position;
Ok(chars
.replace("'[", "[")
.replace("']", "]")
.replace("'#", "#")
.replace("'@", "@")
.replace("''", "'"))
}
// Possibilities:
// 1. MyTable[#Totals] or MyTable[#This Row]
// 2. MyTable[MyColumn]
// 3. MyTable[[My Column]]
// 4. MyTable[[#This Row], [My Column]]
// 5. MyTable[[#Totals], [MyColumn]]
// 6. MyTable[[#This Row], [Jan]:[Dec]]
// 7. MyTable[]
//
// Multiple specifiers are not supported yet:
// 1. MyTable[[#Data], [#Totals], [MyColumn]]
//
// In particular note that names of columns are escaped only when they are in the first argument
// We use '[' and ']'
// When there is only a specifier but not a reference the specifier is not in brackets
//
// Invalid:
// * MyTable[#Totals, [Jan]:[March]] => MyTable[[#Totals], [Jan]:[March]]
//
// NOTES:
// * MyTable[[#Totals]] is translated into MyTable[#Totals]
// * Excel shows '@' instead of '#This Row':
// MyTable[[#This Row], [Jan]:[Dec]] => MyTable[@[Jan]:[Dec]]
// But this is only a UI thing that we will ignore for now.
pub(crate) fn consume_structured_reference(&mut self, table_name: &str) -> Result<TokenType> {
self.expect(TokenType::LeftBracket)?;
let peek_char = self.peek_char();
if peek_char == Some(']') {
// This is just a reference to the full table
self.expect(TokenType::RightBracket)?;
return Ok(TokenType::Ident(table_name.to_string()));
}
if peek_char == Some('#') {
// Expecting MyTable[#Totals]
if let Some(specifier) = self.consume_table_specifier()? {
return Ok(TokenType::StructuredReference {
table_name: table_name.to_string(),
specifier: Some(specifier),
table_reference: None,
});
} else {
return Err(LexerError {
position: self.position,
message: "Invalid structured reference".to_string(),
});
}
} else if peek_char != Some('[') {
// Expecting MyTable[MyColumn]
self.position -= 1;
let column_name = self.consume_column_reference()?;
return Ok(TokenType::StructuredReference {
table_name: table_name.to_string(),
specifier: None,
table_reference: Some(TableReference::ColumnReference(column_name)),
});
}
self.expect(TokenType::LeftBracket)?;
let specifier = self.consume_table_specifier()?;
if specifier.is_some() {
let peek_token = self.peek_token();
if peek_token == TokenType::Comma {
self.advance_token();
self.expect(TokenType::LeftBracket)?;
} else if peek_token == TokenType::RightBracket {
return Ok(TokenType::StructuredReference {
table_name: table_name.to_string(),
specifier,
table_reference: None,
});
}
}
// Now it's either:
// [Column Name]
// [Column Name]:[Column Name]
self.position -= 1;
let column_reference = self.consume_column_reference()?;
let table_reference = if self.peek_char() == Some(':') {
self.position += 1;
let column_reference_right = self.consume_column_reference()?;
self.expect(TokenType::RightBracket)?;
Some(TableReference::RangeReference((
column_reference,
column_reference_right,
)))
} else {
self.expect(TokenType::RightBracket)?;
Some(TableReference::ColumnReference(column_reference))
};
Ok(TokenType::StructuredReference {
table_name: table_name.to_string(),
specifier,
table_reference,
})
}
}

View File

@@ -0,0 +1,6 @@
mod test_common;
mod test_language;
mod test_locale;
mod test_ranges;
mod test_tables;
mod test_util;

View File

@@ -0,0 +1,508 @@
#![allow(clippy::unwrap_used)]
use crate::language::get_language;
use crate::locale::get_locale;
use crate::expressions::{
lexer::{Lexer, LexerMode},
token::TokenType::*,
token::{Error, OpSum},
};
fn new_lexer(formula: &str, a1_mode: bool) -> Lexer {
let locale = get_locale("en").unwrap();
let language = get_language("en").unwrap();
let mode = if a1_mode {
LexerMode::A1
} else {
LexerMode::R1C1
};
Lexer::new(formula, mode, locale, language)
}
#[test]
fn test_number_zero() {
let mut lx = new_lexer("0", true);
assert_eq!(lx.next_token(), Number(0.0));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_integer() {
let mut lx = new_lexer("42", true);
assert_eq!(lx.next_token(), Number(42.0));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_pi() {
let mut lx = new_lexer("3.415", true);
assert_eq!(lx.next_token(), Number(3.415));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_less_than_one() {
let mut lx = new_lexer(".1415", true);
assert_eq!(lx.next_token(), Number(0.1415));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_less_than_one_bis() {
let mut lx = new_lexer("0.1415", true);
assert_eq!(lx.next_token(), Number(0.1415));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_scientific() {
let mut lx = new_lexer("1.1415e12", true);
assert_eq!(lx.next_token(), Number(1.1415e12));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_scientific_1() {
let mut lx = new_lexer("2.4e-12", true);
assert_eq!(lx.next_token(), Number(2.4e-12));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_number_scientific_1b() {
let mut lx = new_lexer("2.4E-12", true);
assert_eq!(lx.next_token(), Number(2.4e-12));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_not_a_number() {
let mut lx = new_lexer("..", true);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_string() {
let mut lx = new_lexer("\"Hello World!\"", true);
assert_eq!(lx.next_token(), String("Hello World!".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_string_unicode() {
let mut lx = new_lexer("\"你好,世界!\"", true);
assert_eq!(lx.next_token(), String("你好,世界!".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_boolean() {
let mut lx = new_lexer("FALSE", true);
assert_eq!(lx.next_token(), Boolean(false));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_boolean_true() {
let mut lx = new_lexer("True", true);
assert_eq!(lx.next_token(), Boolean(true));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference() {
let mut lx = new_lexer("A1", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 1,
row: 1,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_absolute() {
let mut lx = new_lexer("$A$1", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 1,
row: 1,
absolute_column: true,
absolute_row: true,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_absolute_1() {
let mut lx = new_lexer("AB$12", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 28,
row: 12,
absolute_column: false,
absolute_row: true,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_absolute_2() {
let mut lx = new_lexer("$CC234", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 81,
row: 234,
absolute_column: true,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_sheet() {
let mut lx = new_lexer("Sheet1!C34", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("Sheet1".to_string()),
column: 3,
row: 34,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_sheet_unicode() {
// Not that also tests the '!'
let mut lx = new_lexer("'A € world!'!C34", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("A € world!".to_string()),
column: 3,
row: 34,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_sheet_unicode_absolute() {
let mut lx = new_lexer("'A €'!$C$34", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("A €".to_string()),
column: 3,
row: 34,
absolute_column: true,
absolute_row: true,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_unmatched_quote() {
let mut lx = new_lexer("'A €!$C$34", true);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_sum() {
let mut lx = new_lexer("2.4+3.415", true);
assert_eq!(lx.next_token(), Number(2.4));
assert_eq!(lx.next_token(), Addition(OpSum::Add));
assert_eq!(lx.next_token(), Number(3.415));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_sum_1() {
let mut lx = new_lexer("A2 + 'First Sheet'!$B$3", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 1,
row: 2,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), Addition(OpSum::Add));
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("First Sheet".to_string()),
column: 2,
row: 3,
absolute_column: true,
absolute_row: true,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_value() {
let mut lx = new_lexer("#VALUE!", true);
assert_eq!(lx.next_token(), Error(Error::VALUE));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_error() {
let mut lx = new_lexer("#ERROR!", true);
assert_eq!(lx.next_token(), Error(Error::ERROR));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_div() {
let mut lx = new_lexer("#DIV/0!", true);
assert_eq!(lx.next_token(), Error(Error::DIV));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_na() {
let mut lx = new_lexer("#N/A", true);
assert_eq!(lx.next_token(), Error(Error::NA));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_name() {
let mut lx = new_lexer("#NAME?", true);
assert_eq!(lx.next_token(), Error(Error::NAME));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_num() {
let mut lx = new_lexer("#NUM!", true);
assert_eq!(lx.next_token(), Error(Error::NUM));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_calc() {
let mut lx = new_lexer("#CALC!", true);
assert_eq!(lx.next_token(), Error(Error::CALC));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_null() {
let mut lx = new_lexer("#NULL!", true);
assert_eq!(lx.next_token(), Error(Error::NULL));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_spill() {
let mut lx = new_lexer("#SPILL!", true);
assert_eq!(lx.next_token(), Error(Error::SPILL));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_circ() {
let mut lx = new_lexer("#CIRC!", true);
assert_eq!(lx.next_token(), Error(Error::CIRC));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_error_invalid() {
let mut lx = new_lexer("#VALU!", true);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_add_errors() {
let mut lx = new_lexer("#DIV/0!+#NUM!", true);
assert_eq!(lx.next_token(), Error(Error::DIV));
assert_eq!(lx.next_token(), Addition(OpSum::Add));
assert_eq!(lx.next_token(), Error(Error::NUM));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_variable_name() {
let mut lx = new_lexer("MyVar", true);
assert_eq!(lx.next_token(), Ident("MyVar".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_last_reference() {
let mut lx = new_lexer("XFD1048576", true);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 16384,
row: 1048576,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_not_a_reference() {
let mut lx = new_lexer("XFE10", true);
assert_eq!(lx.next_token(), Ident("XFE10".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_r1c1() {
let mut lx = new_lexer("R1C1", false);
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
column: 1,
row: 1,
absolute_column: true,
absolute_row: true,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_r1c1_true() {
let mut lx = new_lexer("R1C1", true);
// NOTE: This is what google docs does.
// Excel will not let you enter this formula.
// Online Excel will let you and will mark the cell as in Error
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_name_r1c1p() {
let mut lx = new_lexer("R1C1P", false);
assert_eq!(lx.next_token(), Ident("R1C1P".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_name_wrong_ref() {
let mut lx = new_lexer("Sheet1!2", false);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_1() {
let mut lx = new_lexer("Sheet1!R[1]C[2]", false);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("Sheet1".to_string()),
column: 2,
row: 1,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_quotes() {
let mut lx = new_lexer("'Sheet 1'!R[1]C[2]", false);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("Sheet 1".to_string()),
column: 2,
row: 1,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_escape_quotes() {
let mut lx = new_lexer("'Sheet ''one'' 1'!R[1]C[2]", false);
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("Sheet 'one' 1".to_string()),
column: 2,
row: 1,
absolute_column: false,
absolute_row: false,
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_reference_unfinished_quotes() {
let mut lx = new_lexer("'Sheet 1!R[1]C[2]", false);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_round_function() {
let mut lx = new_lexer("ROUND", false);
assert_eq!(lx.next_token(), Ident("ROUND".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_ident_with_underscore() {
let mut lx = new_lexer("_IDENT", false);
assert_eq!(lx.next_token(), Ident("_IDENT".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_ident_with_period() {
let mut lx = new_lexer("IDENT.IFIER", false);
assert_eq!(lx.next_token(), Ident("IDENT.IFIER".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_ident_cannot_start_with_period() {
let mut lx = new_lexer(".IFIER", false);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_xlfn() {
let mut lx = new_lexer("_xlfn.MyVar", true);
assert_eq!(lx.next_token(), Ident("_xlfn.MyVar".to_string()));
assert_eq!(lx.next_token(), EOF);
}

View File

@@ -0,0 +1,101 @@
#![allow(clippy::unwrap_used)]
use crate::{
expressions::{
lexer::{Lexer, LexerMode},
token::{Error, TokenType},
},
language::get_language,
locale::get_locale,
};
fn new_language_lexer(formula: &str, language: &str) -> Lexer {
let locale = get_locale("en").unwrap();
let language = get_language(language).unwrap();
Lexer::new(formula, LexerMode::A1, locale, language)
}
// Spanish
#[test]
fn test_verdadero_falso() {
let mut lx = new_language_lexer("IF(A1, VERDADERO, FALSO)", "es");
assert_eq!(lx.next_token(), TokenType::Ident("IF".to_string()));
assert_eq!(lx.next_token(), TokenType::LeftParenthesis);
assert!(matches!(lx.next_token(), TokenType::Reference { .. }));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(true));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(false));
assert_eq!(lx.next_token(), TokenType::RightParenthesis);
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_spanish_errors_ref() {
let mut lx = new_language_lexer("#¡REF!", "es");
assert_eq!(lx.next_token(), TokenType::Error(Error::REF));
assert_eq!(lx.next_token(), TokenType::EOF);
}
// German
#[test]
fn test_wahr_falsch() {
let mut lx = new_language_lexer("IF(A1, WAHR, FALSCH)", "de");
assert_eq!(lx.next_token(), TokenType::Ident("IF".to_string()));
assert_eq!(lx.next_token(), TokenType::LeftParenthesis);
assert!(matches!(lx.next_token(), TokenType::Reference { .. }));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(true));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(false));
assert_eq!(lx.next_token(), TokenType::RightParenthesis);
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_german_errors_ref() {
let mut lx = new_language_lexer("#BEZUG!", "de");
assert_eq!(lx.next_token(), TokenType::Error(Error::REF));
assert_eq!(lx.next_token(), TokenType::EOF);
}
// French
#[test]
fn test_vrai_faux() {
let mut lx = new_language_lexer("IF(A1, VRAI, FAUX)", "fr");
assert_eq!(lx.next_token(), TokenType::Ident("IF".to_string()));
assert_eq!(lx.next_token(), TokenType::LeftParenthesis);
assert!(matches!(lx.next_token(), TokenType::Reference { .. }));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(true));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Boolean(false));
assert_eq!(lx.next_token(), TokenType::RightParenthesis);
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_french_errors_ref() {
let mut lx = new_language_lexer("#REF!", "fr");
assert_eq!(lx.next_token(), TokenType::Error(Error::REF));
assert_eq!(lx.next_token(), TokenType::EOF);
}
// English with errors
#[test]
fn test_english_with_spanish_words() {
let mut lx = new_language_lexer("IF(A1, VERDADERO, FALSO)", "en");
assert_eq!(lx.next_token(), TokenType::Ident("IF".to_string()));
assert_eq!(lx.next_token(), TokenType::LeftParenthesis);
assert!(matches!(lx.next_token(), TokenType::Reference { .. }));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Ident("VERDADERO".to_string()));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Ident("FALSO".to_string()));
assert_eq!(lx.next_token(), TokenType::RightParenthesis);
assert_eq!(lx.next_token(), TokenType::EOF);
}

View File

@@ -0,0 +1,48 @@
#![allow(clippy::unwrap_used)]
use crate::{
expressions::{
lexer::{Lexer, LexerMode},
token::TokenType,
},
language::get_language,
locale::get_locale_fix,
};
fn new_language_lexer(formula: &str, locale: &str, language: &str) -> Lexer {
let locale = get_locale_fix(locale).unwrap();
let language = get_language(language).unwrap();
Lexer::new(formula, LexerMode::A1, locale, language)
}
#[test]
fn test_german_locale() {
let mut lx = new_language_lexer("2,34e-3", "de", "en");
assert_eq!(lx.next_token(), TokenType::Number(2.34e-3));
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_german_locale_does_not_parse() {
let mut lx = new_language_lexer("2.34e-3", "de", "en");
assert_eq!(lx.next_token(), TokenType::Number(2.0));
assert!(matches!(lx.next_token(), TokenType::Illegal { .. }));
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_english_locale() {
let mut lx = new_language_lexer("2.34e-3", "en", "en");
assert_eq!(lx.next_token(), TokenType::Number(2.34e-3));
assert_eq!(lx.next_token(), TokenType::EOF);
}
#[test]
fn test_english_locale_does_not_parse() {
// a comma is a separator
let mut lx = new_language_lexer("2,34e-3", "en", "en");
assert_eq!(lx.next_token(), TokenType::Number(2.0));
assert_eq!(lx.next_token(), TokenType::Comma);
assert_eq!(lx.next_token(), TokenType::Number(34e-3));
assert_eq!(lx.next_token(), TokenType::EOF);
}

View File

@@ -0,0 +1,487 @@
#![allow(clippy::unwrap_used)]
use crate::constants::{LAST_COLUMN, LAST_ROW};
use crate::expressions::lexer::LexerError;
use crate::expressions::{
lexer::{Lexer, LexerMode},
token::TokenType::*,
types::ParsedReference,
};
use crate::language::get_language;
use crate::locale::get_locale;
fn new_lexer(formula: &str) -> Lexer {
let locale = get_locale("en").unwrap();
let language = get_language("en").unwrap();
Lexer::new(formula, LexerMode::A1, locale, language)
}
#[test]
fn test_range() {
let mut lx = new_lexer("C4:D4");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 3,
row: 4,
absolute_column: false,
absolute_row: false,
},
right: ParsedReference {
column: 4,
row: 4,
absolute_column: false,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_absolute_column() {
let mut lx = new_lexer("$A1:B$4");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 1,
row: 1,
absolute_column: true,
absolute_row: false,
},
right: ParsedReference {
column: 2,
row: 4,
absolute_column: false,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_with_sheet() {
let mut lx = new_lexer("Sheet1!A1:B4");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("Sheet1".to_string()),
left: ParsedReference {
column: 1,
row: 1,
absolute_column: false,
absolute_row: false,
},
right: ParsedReference {
column: 2,
row: 4,
absolute_column: false,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_with_sheet_with_space() {
let mut lx = new_lexer("'New sheet'!$A$1:B44");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("New sheet".to_string()),
left: ParsedReference {
column: 1,
row: 1,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: 2,
row: 44,
absolute_column: false,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column() {
let mut lx = new_lexer("C:D");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 3,
row: 1,
absolute_column: false,
absolute_row: true,
},
right: ParsedReference {
column: 4,
row: LAST_ROW,
absolute_column: false,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column_out_of_range() {
let mut lx = new_lexer("C:XFE");
assert_eq!(
lx.next_token(),
Illegal(LexerError {
position: 5,
message: "Column is not valid.".to_string(),
})
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column_absolute1() {
let mut lx = new_lexer("$C:D");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 3,
row: 1,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: 4,
row: LAST_ROW,
absolute_column: false,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column_absolute2() {
let mut lx = new_lexer("$C:$AA");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 3,
row: 1,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: 27,
row: LAST_ROW,
absolute_column: true,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_rows() {
let mut lx = new_lexer("3:5");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: false,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 5,
absolute_column: true,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_rows_absolute1() {
let mut lx = new_lexer("$3:5");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 5,
absolute_column: true,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_rows_absolute2() {
let mut lx = new_lexer("$3:$55");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 55,
absolute_column: true,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column_sheet() {
let mut lx = new_lexer("Sheet1!C:D");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("Sheet1".to_string()),
left: ParsedReference {
column: 3,
row: 1,
absolute_column: false,
absolute_row: true,
},
right: ParsedReference {
column: 4,
row: LAST_ROW,
absolute_column: false,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_column_sheet_absolute() {
let mut lx = new_lexer("Sheet1!$C:$D");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("Sheet1".to_string()),
left: ParsedReference {
column: 3,
row: 1,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: 4,
row: LAST_ROW,
absolute_column: true,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
let mut lx = new_lexer("'Woops ans'!$C:$D");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("Woops ans".to_string()),
left: ParsedReference {
column: 3,
row: 1,
absolute_column: true,
absolute_row: true,
},
right: ParsedReference {
column: 4,
row: LAST_ROW,
absolute_column: true,
absolute_row: true,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_rows_sheet() {
let mut lx = new_lexer("'A new sheet'!3:5");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("A new sheet".to_string()),
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: false,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 5,
absolute_column: true,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
let mut lx = new_lexer("Sheet12!3:5");
assert_eq!(
lx.next_token(),
Range {
sheet: Some("Sheet12".to_string()),
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: false,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 5,
absolute_column: true,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
// Non ranges
#[test]
fn test_non_range_variable_name() {
let mut lx = new_lexer("AB");
assert_eq!(lx.next_token(), Ident("AB".to_string()));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_non_range_invalid_variable_name() {
let mut lx = new_lexer("$AB");
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_non_range_invalid_variable_name_a03() {
let mut lx = new_lexer("A03");
assert_eq!(
lx.next_token(),
Reference {
sheet: None,
row: 3,
column: 1,
absolute_column: false,
absolute_row: false
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_non_range_invalid_variable_name_sheet1_a03() {
let mut lx = new_lexer("Sheet1!A03");
assert_eq!(
lx.next_token(),
Reference {
sheet: Some("Sheet1".to_string()),
row: 3,
column: 1,
absolute_column: false,
absolute_row: false
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_rows_with_0() {
let mut lx = new_lexer("03:05");
assert_eq!(
lx.next_token(),
Range {
sheet: None,
left: ParsedReference {
column: 1,
row: 3,
absolute_column: true,
absolute_row: false,
},
right: ParsedReference {
column: LAST_COLUMN,
row: 5,
absolute_column: true,
absolute_row: false,
}
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_incomplete_row() {
let mut lx = new_lexer("R[");
lx.set_lexer_mode(LexerMode::R1C1);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn test_range_incomplete_column() {
let mut lx = new_lexer("R[3][");
lx.set_lexer_mode(LexerMode::R1C1);
assert!(matches!(lx.next_token(), Illegal(_)));
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn range_operator() {
let mut lx = new_lexer("A1:OFFSET(B1,1,2)");
lx.set_lexer_mode(LexerMode::A1);
assert!(matches!(lx.next_token(), Reference { .. }));
assert!(matches!(lx.next_token(), Colon));
assert!(matches!(lx.next_token(), Ident(_)));
assert!(matches!(lx.next_token(), LeftParenthesis));
assert!(matches!(lx.next_token(), Reference { .. }));
assert_eq!(lx.next_token(), Comma);
assert!(matches!(lx.next_token(), Number(_)));
assert_eq!(lx.next_token(), Comma);
assert!(matches!(lx.next_token(), Number(_)));
assert!(matches!(lx.next_token(), RightParenthesis));
assert_eq!(lx.next_token(), EOF);
}

View File

@@ -0,0 +1,73 @@
#![allow(clippy::unwrap_used)]
use crate::expressions::{
lexer::{Lexer, LexerMode},
token::{TableReference, TableSpecifier, TokenType::*},
};
use crate::language::get_language;
use crate::locale::get_locale;
fn new_lexer(formula: &str) -> Lexer {
let locale = get_locale("en").unwrap();
let language = get_language("en").unwrap();
Lexer::new(formula, LexerMode::A1, locale, language)
}
#[test]
fn table_this_row() {
let mut lx = new_lexer("tbInfo[[#This Row], [Jan]:[Dec]]");
assert_eq!(
lx.next_token(),
StructuredReference {
table_name: "tbInfo".to_string(),
specifier: Some(TableSpecifier::ThisRow),
table_reference: Some(TableReference::RangeReference((
"Jan".to_string(),
"Dec".to_string()
)))
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn table_no_specifier() {
let mut lx = new_lexer("tbInfo[December]");
assert_eq!(
lx.next_token(),
StructuredReference {
table_name: "tbInfo".to_string(),
specifier: None,
table_reference: Some(TableReference::ColumnReference("December".to_string()))
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn table_no_specifier_white_spaces() {
let mut lx = new_lexer("tbInfo[[First Month]]");
assert_eq!(
lx.next_token(),
StructuredReference {
table_name: "tbInfo".to_string(),
specifier: None,
table_reference: Some(TableReference::ColumnReference("First Month".to_string()))
}
);
assert_eq!(lx.next_token(), EOF);
}
#[test]
fn table_totals_no_reference() {
let mut lx = new_lexer("tbInfo[#Totals]");
assert_eq!(
lx.next_token(),
StructuredReference {
table_name: "tbInfo".to_string(),
specifier: Some(TableSpecifier::Totals),
table_reference: None
}
);
assert_eq!(lx.next_token(), EOF);
}

View File

@@ -0,0 +1,146 @@
use crate::expressions::{
lexer::util::get_tokens,
token::{OpCompare, OpSum, TokenType},
};
fn get_tokens_types(formula: &str) -> Vec<TokenType> {
let marked_tokens = get_tokens(formula);
marked_tokens.iter().map(|s| s.token.clone()).collect()
}
#[test]
fn test_get_tokens() {
let formula = "1+1";
let t = get_tokens(formula);
assert_eq!(t.len(), 3);
let formula = "1 + AA23 +";
let t = get_tokens(formula);
assert_eq!(t.len(), 4);
let l = t.get(2).expect("expected token");
assert_eq!(l.start, 3);
assert_eq!(l.end, 10);
}
#[test]
fn test_simple_tokens() {
assert_eq!(
get_tokens_types("()"),
vec![TokenType::LeftParenthesis, TokenType::RightParenthesis]
);
assert_eq!(
get_tokens_types("{}"),
vec![TokenType::LeftBrace, TokenType::RightBrace]
);
assert_eq!(
get_tokens_types("[]"),
vec![TokenType::LeftBracket, TokenType::RightBracket]
);
assert_eq!(get_tokens_types("&"), vec![TokenType::And]);
assert_eq!(
get_tokens_types("<"),
vec![TokenType::Compare(OpCompare::LessThan)]
);
assert_eq!(
get_tokens_types(">"),
vec![TokenType::Compare(OpCompare::GreaterThan)]
);
assert_eq!(
get_tokens_types("<="),
vec![TokenType::Compare(OpCompare::LessOrEqualThan)]
);
assert_eq!(
get_tokens_types(">="),
vec![TokenType::Compare(OpCompare::GreaterOrEqualThan)]
);
assert_eq!(
get_tokens_types("IF"),
vec![TokenType::Ident("IF".to_owned())]
);
assert_eq!(get_tokens_types("45"), vec![TokenType::Number(45.0)]);
// The lexer parses this as two tokens
assert_eq!(
get_tokens_types("-45"),
vec![TokenType::Addition(OpSum::Minus), TokenType::Number(45.0)]
);
assert_eq!(
get_tokens_types("23.45e-2"),
vec![TokenType::Number(23.45e-2)]
);
assert_eq!(
get_tokens_types("4-3"),
vec![
TokenType::Number(4.0),
TokenType::Addition(OpSum::Minus),
TokenType::Number(3.0)
]
);
assert_eq!(get_tokens_types("True"), vec![TokenType::Boolean(true)]);
assert_eq!(get_tokens_types("FALSE"), vec![TokenType::Boolean(false)]);
assert_eq!(
get_tokens_types("2,3.5"),
vec![
TokenType::Number(2.0),
TokenType::Comma,
TokenType::Number(3.5)
]
);
assert_eq!(
get_tokens_types("2.4;3.5"),
vec![
TokenType::Number(2.4),
TokenType::Semicolon,
TokenType::Number(3.5)
]
);
assert_eq!(
get_tokens_types("AB34"),
vec![TokenType::Reference {
sheet: None,
row: 34,
column: 28,
absolute_column: false,
absolute_row: false
}]
);
assert_eq!(
get_tokens_types("$A3"),
vec![TokenType::Reference {
sheet: None,
row: 3,
column: 1,
absolute_column: true,
absolute_row: false
}]
);
assert_eq!(
get_tokens_types("AB$34"),
vec![TokenType::Reference {
sheet: None,
row: 34,
column: 28,
absolute_column: false,
absolute_row: true
}]
);
assert_eq!(
get_tokens_types("$AB$34"),
vec![TokenType::Reference {
sheet: None,
row: 34,
column: 28,
absolute_column: true,
absolute_row: true
}]
);
assert_eq!(
get_tokens_types("'My House'!AB34"),
vec![TokenType::Reference {
sheet: Some("My House".to_string()),
row: 34,
column: 28,
absolute_column: false,
absolute_row: false
}]
);
}

View File

@@ -0,0 +1,85 @@
use std::fmt;
use crate::expressions::token;
use crate::language::get_language;
use crate::locale::get_locale;
use super::{Lexer, LexerMode};
/// A MarkedToken is a token together with its position on a formula
#[derive(Debug, PartialEq)]
pub struct MarkedToken {
pub token: token::TokenType,
pub start: i32,
pub end: i32,
}
/// Returns a list of marked tokens for a formula
///
/// # Examples
/// ```
/// use ironcalc_base::expressions::{
/// lexer::util::{get_tokens, MarkedToken},
/// token::{OpSum, TokenType},
/// };
///
/// let marked_tokens = get_tokens("A1+1");
/// let first_t = MarkedToken {
/// token: TokenType::Reference {
/// sheet: None,
/// row: 1,
/// column: 1,
/// absolute_column: false,
/// absolute_row: false,
/// },
/// start: 0,
/// end: 2,
/// };
/// let second_t = MarkedToken {
/// token: TokenType::Addition(OpSum::Add),
/// start:2,
/// end: 3
/// };
/// let third_t = MarkedToken {
/// token: TokenType::Number(1.0),
/// start:3,
/// end: 4
/// };
/// assert_eq!(marked_tokens, vec![first_t, second_t, third_t]);
/// ```
pub fn get_tokens(formula: &str) -> Vec<MarkedToken> {
let mut tokens = Vec::new();
let mut lexer = Lexer::new(
formula,
LexerMode::A1,
get_locale("en").expect(""),
get_language("en").expect(""),
);
let mut start = lexer.get_position();
let mut next_token = lexer.next_token();
let mut end = lexer.get_position();
loop {
match next_token {
token::TokenType::EOF => {
break;
}
_ => {
tokens.push(MarkedToken {
start,
end,
token: next_token,
});
start = lexer.get_position();
next_token = lexer.next_token();
end = lexer.get_position();
}
}
}
tokens
}
impl fmt::Display for MarkedToken {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.token)
}
}