UPDATE: Introducing Arrays
# This PR introduces:
## Parsing arrays:
{1,2,3} and {1;2;3}
Note that array elements can be numbers, booleans and errors (#VALUE!)
## Evaluating arrays in the SUM function
=SUM({1,2,3}) works!
## Evaluating arithmetic operation with arrays
=SUM({1,2,3} * 8) or =SUM({1,2,3}+{2,4,5}) works
This is done with just one function (handle_arithmetic) for most operations
## Some mathematical functions implement arrays
=SUM(SIN({1,2,3})) works
This is done with macros. See fn_single_number
So that implementing new functions that supports array are easy
# Not done in this PR
## Most functions are not supporting arrays
When that happens we either through #N/IMPL! (not implemented error)
or do implicit intersection. Some functions will be rather trivial to "arraify" some will be hard
## The final result in a cell cannot be an array
The formula ={1,2,3} in a cell will result in #N/IMPL!
## Exporting arrays to Excel might not work correctly
Excel uses the cm (cell metadata) for formulas that contain dynamic arrays.
Although the present PR does not introduce dynamic arrays some formulas like =SUM(SIN({1,2,3}))
is considered a dynamic formula
## There are not a lot of tests in this delivery
The bulk of the tests will be added once we start going function by function# This PR introduces:
## Parsing arrays:
{1,2,3} and {1;2;3}
Note that array elements can be numbers, booleans and errors (#VALUE!)
## Evaluating arrays in the SUM function
=SUM({1,2,3}) works!
## Evaluating arithmetic operation with arrays
=SUM({1,2,3} * 8) or =SUM({1,2,3}+{2,4,5}) works
This is done with just one function (handle_arithmetic) for most operations
## Some mathematical functions implement arrays
=SUM(SIN({1,2,3})) works
This is done with macros. See fn_single_number
So that implementing new functions that supports array are easy
# Not done in this PR
## Most functions are not supporting arrays
When that happens we either through #N/IMPL! (not implemented error)
or do implicit intersection. Some functions will be rather trivial to "arraify" some will be hard
## The final result in a cell cannot be an array
The formula ={1,2,3} in a cell will result in #N/IMPL!
## Exporting arrays to Excel might not work correctly
Excel uses the cm (cell metadata) for formulas that contain dynamic arrays.
Although the present PR does not introduce dynamic arrays some formulas like =SUM(SIN({1,2,3}))
is considered a dynamic formula
## There are not a lot of tests in this delivery
The bulk of the tests will be added once we start going function by function
## The array parsing does not respect the locale
Locales that use ',' as a decimal separator need to use something different for arrays
## The might introduce a small performance penalty
We haven't been benchmarking, and having closures for every arithmetic operation and every function
evaluation will introduce a performance hit. Fixing that in he future is not so hard writing tailored
code for the operation
This commit is contained in:
committed by
Nicolás Hatcher Andrés
parent
e07fdd2091
commit
e5ec75495a
@@ -94,6 +94,14 @@ pub(crate) struct Reference<'a> {
|
||||
column: i32,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Debug)]
|
||||
pub enum ArrayNode {
|
||||
Boolean(bool),
|
||||
Number(f64),
|
||||
String(String),
|
||||
Error(token::Error),
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Debug)]
|
||||
pub enum Node {
|
||||
BooleanKind(bool),
|
||||
@@ -167,7 +175,7 @@ pub enum Node {
|
||||
name: String,
|
||||
args: Vec<Node>,
|
||||
},
|
||||
ArrayKind(Vec<Node>),
|
||||
ArrayKind(Vec<Vec<ArrayNode>>),
|
||||
DefinedNameKind(DefinedNameS),
|
||||
TableNameKind(String),
|
||||
WrongVariableKind(String),
|
||||
@@ -454,6 +462,49 @@ impl Parser {
|
||||
self.parse_primary()
|
||||
}
|
||||
|
||||
fn parse_array_row(&mut self) -> Result<Vec<ArrayNode>, Node> {
|
||||
let mut row = Vec::new();
|
||||
// and array can only have numbers, string or booleans
|
||||
// otherwise it is a syntax error
|
||||
let first_element = match self.parse_expr() {
|
||||
Node::BooleanKind(s) => ArrayNode::Boolean(s),
|
||||
Node::NumberKind(s) => ArrayNode::Number(s),
|
||||
Node::StringKind(s) => ArrayNode::String(s),
|
||||
Node::ErrorKind(kind) => ArrayNode::Error(kind),
|
||||
error @ Node::ParseErrorKind { .. } => return Err(error),
|
||||
_ => {
|
||||
return Err(Node::ParseErrorKind {
|
||||
formula: self.lexer.get_formula(),
|
||||
message: "Invalid value in array".to_string(),
|
||||
position: self.lexer.get_position() as usize,
|
||||
});
|
||||
}
|
||||
};
|
||||
row.push(first_element);
|
||||
let mut next_token = self.lexer.peek_token();
|
||||
// FIXME: this is not respecting the locale
|
||||
while next_token == TokenType::Comma {
|
||||
self.lexer.advance_token();
|
||||
let value = match self.parse_expr() {
|
||||
Node::BooleanKind(s) => ArrayNode::Boolean(s),
|
||||
Node::NumberKind(s) => ArrayNode::Number(s),
|
||||
Node::StringKind(s) => ArrayNode::String(s),
|
||||
Node::ErrorKind(kind) => ArrayNode::Error(kind),
|
||||
error @ Node::ParseErrorKind { .. } => return Err(error),
|
||||
_ => {
|
||||
return Err(Node::ParseErrorKind {
|
||||
formula: self.lexer.get_formula(),
|
||||
message: "Invalid value in array".to_string(),
|
||||
position: self.lexer.get_position() as usize,
|
||||
});
|
||||
}
|
||||
};
|
||||
row.push(value);
|
||||
next_token = self.lexer.peek_token();
|
||||
}
|
||||
Ok(row)
|
||||
}
|
||||
|
||||
fn parse_primary(&mut self) -> Node {
|
||||
let next_token = self.lexer.next_token();
|
||||
match next_token {
|
||||
@@ -475,21 +526,35 @@ impl Parser {
|
||||
TokenType::Number(s) => Node::NumberKind(s),
|
||||
TokenType::String(s) => Node::StringKind(s),
|
||||
TokenType::LeftBrace => {
|
||||
let t = self.parse_expr();
|
||||
if let Node::ParseErrorKind { .. } = t {
|
||||
return t;
|
||||
}
|
||||
// It's an array. It's a collection of rows all of the same dimension
|
||||
|
||||
let first_row = match self.parse_array_row() {
|
||||
Ok(s) => s,
|
||||
Err(error) => return error,
|
||||
};
|
||||
let length = first_row.len();
|
||||
|
||||
let mut matrix = Vec::new();
|
||||
matrix.push(first_row);
|
||||
// FIXME: this is not respecting the locale
|
||||
let mut next_token = self.lexer.peek_token();
|
||||
let mut args: Vec<Node> = vec![t];
|
||||
while next_token == TokenType::Semicolon {
|
||||
self.lexer.advance_token();
|
||||
let p = self.parse_expr();
|
||||
if let Node::ParseErrorKind { .. } = p {
|
||||
return p;
|
||||
}
|
||||
let row = match self.parse_array_row() {
|
||||
Ok(s) => s,
|
||||
Err(error) => return error,
|
||||
};
|
||||
next_token = self.lexer.peek_token();
|
||||
args.push(p);
|
||||
if row.len() != length {
|
||||
return Node::ParseErrorKind {
|
||||
formula: self.lexer.get_formula(),
|
||||
position: self.lexer.get_position() as usize,
|
||||
message: "All rows in an array should be the same length".to_string(),
|
||||
};
|
||||
}
|
||||
matrix.push(row);
|
||||
}
|
||||
|
||||
if let Err(err) = self.lexer.expect(TokenType::RightBrace) {
|
||||
return Node::ParseErrorKind {
|
||||
formula: self.lexer.get_formula(),
|
||||
@@ -497,7 +562,7 @@ impl Parser {
|
||||
message: err.message,
|
||||
};
|
||||
}
|
||||
Node::ArrayKind(args)
|
||||
Node::ArrayKind(matrix)
|
||||
}
|
||||
TokenType::Reference {
|
||||
sheet,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::{
|
||||
stringify::{stringify_reference, DisplaceData},
|
||||
Node, Reference,
|
||||
ArrayNode, Node, Reference,
|
||||
};
|
||||
use crate::{
|
||||
constants::{LAST_COLUMN, LAST_ROW},
|
||||
@@ -56,6 +56,15 @@ fn move_function(name: &str, args: &Vec<Node>, move_context: &MoveContext) -> St
|
||||
format!("{}({})", name, arguments)
|
||||
}
|
||||
|
||||
pub(crate) fn to_string_array_node(node: &ArrayNode) -> String {
|
||||
match node {
|
||||
ArrayNode::Boolean(value) => format!("{}", value).to_ascii_uppercase(),
|
||||
ArrayNode::Number(number) => to_excel_precision_str(*number),
|
||||
ArrayNode::String(value) => format!("\"{}\"", value),
|
||||
ArrayNode::Error(kind) => format!("{}", kind),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_string_moved(node: &Node, move_context: &MoveContext) -> String {
|
||||
use self::Node::*;
|
||||
match node {
|
||||
@@ -362,18 +371,39 @@ fn to_string_moved(node: &Node, move_context: &MoveContext) -> String {
|
||||
move_function(name, args, move_context)
|
||||
}
|
||||
ArrayKind(args) => {
|
||||
// This code is a placeholder. Arrays are not yet implemented
|
||||
let mut first = true;
|
||||
let mut arguments = "".to_string();
|
||||
for el in args {
|
||||
if !first {
|
||||
arguments = format!("{},{}", arguments, to_string_moved(el, move_context));
|
||||
let mut first_row = true;
|
||||
let mut matrix_string = String::new();
|
||||
|
||||
// Each element in `args` is assumed to be one "row" (itself a `Vec<T>`).
|
||||
for row in args {
|
||||
if !first_row {
|
||||
matrix_string.push(',');
|
||||
} else {
|
||||
first = false;
|
||||
arguments = to_string_moved(el, move_context);
|
||||
first_row = false;
|
||||
}
|
||||
|
||||
// Build the string for the current row
|
||||
let mut first_col = true;
|
||||
let mut row_string = String::new();
|
||||
for el in row {
|
||||
if !first_col {
|
||||
row_string.push(',');
|
||||
} else {
|
||||
first_col = false;
|
||||
}
|
||||
|
||||
// Reuse your existing element-stringification function
|
||||
row_string.push_str(&to_string_array_node(el));
|
||||
}
|
||||
|
||||
// Enclose the row in braces
|
||||
matrix_string.push('{');
|
||||
matrix_string.push_str(&row_string);
|
||||
matrix_string.push('}');
|
||||
}
|
||||
format!("{{{}}}", arguments)
|
||||
|
||||
// Enclose the whole matrix in braces
|
||||
format!("{{{}}}", matrix_string)
|
||||
}
|
||||
DefinedNameKind((name, ..)) => name.to_string(),
|
||||
TableNameKind(name) => name.to_string(),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use super::{super::utils::quote_name, Node, Reference};
|
||||
use crate::constants::{LAST_COLUMN, LAST_ROW};
|
||||
use crate::expressions::parser::move_formula::to_string_array_node;
|
||||
use crate::expressions::parser::static_analysis::add_implicit_intersection;
|
||||
use crate::expressions::token::OpUnary;
|
||||
use crate::{expressions::types::CellReferenceRC, number_format::to_excel_precision_str};
|
||||
@@ -258,6 +259,31 @@ fn format_function(
|
||||
format!("{}({})", name, arguments)
|
||||
}
|
||||
|
||||
// There is just one representation in the AST (Abstract Syntax Tree) of a formula.
|
||||
// But three different ways to convert it to a string.
|
||||
//
|
||||
// To stringify a formula we need a "context", that is in which cell are we doing the "stringifying"
|
||||
//
|
||||
// But there are three ways to stringify a formula:
|
||||
//
|
||||
// * To show it to the IronCalc user
|
||||
// * To store internally
|
||||
// * To export to Excel
|
||||
//
|
||||
// There are, of course correspondingly three "modes" when parsing a formula.
|
||||
//
|
||||
// The internal representation is the more different as references are stored in the RC representation.
|
||||
// The the AST of the formula is kept close to this representation we don't need a context
|
||||
//
|
||||
// In the export to Excel representation certain things are different:
|
||||
// * We add a _xlfn. in front of some (more modern) functions
|
||||
// * We remove the Implicit Intersection operator when it is automatic and add _xlfn.SINGLE when it is not
|
||||
//
|
||||
// Examples:
|
||||
// * =A1+B2
|
||||
// * =RC+R1C1
|
||||
// * =A1+B1
|
||||
|
||||
fn stringify(
|
||||
node: &Node,
|
||||
context: Option<&CellReferenceRC>,
|
||||
@@ -535,21 +561,28 @@ fn stringify(
|
||||
format_function(&name, args, context, displace_data, export_to_excel)
|
||||
}
|
||||
ArrayKind(args) => {
|
||||
let mut first = true;
|
||||
let mut arguments = "".to_string();
|
||||
for el in args {
|
||||
if !first {
|
||||
arguments = format!(
|
||||
"{},{}",
|
||||
arguments,
|
||||
stringify(el, context, displace_data, export_to_excel)
|
||||
);
|
||||
let mut first_row = true;
|
||||
let mut matrix_string = String::new();
|
||||
|
||||
for row in args {
|
||||
if !first_row {
|
||||
matrix_string.push(';');
|
||||
} else {
|
||||
first = false;
|
||||
arguments = stringify(el, context, displace_data, export_to_excel);
|
||||
first_row = false;
|
||||
}
|
||||
let mut first_column = true;
|
||||
let mut row_string = String::new();
|
||||
for el in row {
|
||||
if !first_column {
|
||||
row_string.push(',');
|
||||
} else {
|
||||
first_column = false;
|
||||
}
|
||||
row_string.push_str(&to_string_array_node(el));
|
||||
}
|
||||
matrix_string.push_str(&row_string);
|
||||
}
|
||||
format!("{{{}}}", arguments)
|
||||
format!("{{{}}}", matrix_string)
|
||||
}
|
||||
TableNameKind(value) => value.to_string(),
|
||||
DefinedNameKind((name, ..)) => name.to_string(),
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod test_add_implicit_intersection;
|
||||
mod test_arrays;
|
||||
mod test_general;
|
||||
mod test_implicit_intersection;
|
||||
mod test_issue_155;
|
||||
|
||||
92
base/src/expressions/parser/tests/test_arrays.rs
Normal file
92
base/src/expressions/parser/tests/test_arrays.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
#![allow(clippy::panic)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::expressions::parser::stringify::{to_rc_format, to_string};
|
||||
use crate::expressions::parser::{ArrayNode, Node, Parser};
|
||||
use crate::expressions::types::CellReferenceRC;
|
||||
|
||||
#[test]
|
||||
fn simple_horizontal() {
|
||||
let worksheets = vec!["Sheet1".to_string()];
|
||||
let mut parser = Parser::new(worksheets, vec![], HashMap::new());
|
||||
|
||||
// Reference cell is Sheet1!A1
|
||||
let cell_reference = CellReferenceRC {
|
||||
sheet: "Sheet1".to_string(),
|
||||
row: 1,
|
||||
column: 1,
|
||||
};
|
||||
let horizontal = parser.parse("{1, 2, 3}", &cell_reference);
|
||||
assert_eq!(
|
||||
horizontal,
|
||||
Node::ArrayKind(vec![vec![
|
||||
ArrayNode::Number(1.0),
|
||||
ArrayNode::Number(2.0),
|
||||
ArrayNode::Number(3.0)
|
||||
]])
|
||||
);
|
||||
|
||||
assert_eq!(to_rc_format(&horizontal), "{1,2,3}");
|
||||
assert_eq!(to_string(&horizontal, &cell_reference), "{1,2,3}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_vertical() {
|
||||
let worksheets = vec!["Sheet1".to_string()];
|
||||
let mut parser = Parser::new(worksheets, vec![], HashMap::new());
|
||||
|
||||
// Reference cell is Sheet1!A1
|
||||
let cell_reference = CellReferenceRC {
|
||||
sheet: "Sheet1".to_string(),
|
||||
row: 1,
|
||||
column: 1,
|
||||
};
|
||||
let vertical = parser.parse("{1;2; 3}", &cell_reference);
|
||||
assert_eq!(
|
||||
vertical,
|
||||
Node::ArrayKind(vec![
|
||||
vec![ArrayNode::Number(1.0)],
|
||||
vec![ArrayNode::Number(2.0)],
|
||||
vec![ArrayNode::Number(3.0)]
|
||||
])
|
||||
);
|
||||
assert_eq!(to_rc_format(&vertical), "{1;2;3}");
|
||||
assert_eq!(to_string(&vertical, &cell_reference), "{1;2;3}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_matrix() {
|
||||
let worksheets = vec!["Sheet1".to_string()];
|
||||
let mut parser = Parser::new(worksheets, vec![], HashMap::new());
|
||||
|
||||
// Reference cell is Sheet1!A1
|
||||
let cell_reference = CellReferenceRC {
|
||||
sheet: "Sheet1".to_string(),
|
||||
row: 1,
|
||||
column: 1,
|
||||
};
|
||||
let matrix = parser.parse("{1,2,3; 4, 5, 6; 7,8,9}", &cell_reference);
|
||||
assert_eq!(
|
||||
matrix,
|
||||
Node::ArrayKind(vec![
|
||||
vec![
|
||||
ArrayNode::Number(1.0),
|
||||
ArrayNode::Number(2.0),
|
||||
ArrayNode::Number(3.0)
|
||||
],
|
||||
vec![
|
||||
ArrayNode::Number(4.0),
|
||||
ArrayNode::Number(5.0),
|
||||
ArrayNode::Number(6.0)
|
||||
],
|
||||
vec![
|
||||
ArrayNode::Number(7.0),
|
||||
ArrayNode::Number(8.0),
|
||||
ArrayNode::Number(9.0)
|
||||
]
|
||||
])
|
||||
);
|
||||
assert_eq!(to_rc_format(&matrix), "{1,2,3;4,5,6;7,8,9}");
|
||||
assert_eq!(to_string(&matrix, &cell_reference), "{1,2,3;4,5,6;7,8,9}");
|
||||
}
|
||||
Reference in New Issue
Block a user