diff --git a/base/src/expressions/parser/mod.rs b/base/src/expressions/parser/mod.rs index af9e33a..2afa993 100644 --- a/base/src/expressions/parser/mod.rs +++ b/base/src/expressions/parser/mod.rs @@ -471,6 +471,20 @@ impl Parser { Node::NumberKind(s) => ArrayNode::Number(s), Node::StringKind(s) => ArrayNode::String(s), Node::ErrorKind(kind) => ArrayNode::Error(kind), + Node::UnaryKind { + kind: OpUnary::Minus, + right, + } => { + if let Node::NumberKind(n) = *right { + ArrayNode::Number(-n) + } else { + return Err(Node::ParseErrorKind { + formula: self.lexer.get_formula(), + message: "Invalid value in array".to_string(), + position: self.lexer.get_position() as usize, + }); + } + } error @ Node::ParseErrorKind { .. } => return Err(error), _ => { return Err(Node::ParseErrorKind { @@ -490,6 +504,20 @@ impl Parser { Node::NumberKind(s) => ArrayNode::Number(s), Node::StringKind(s) => ArrayNode::String(s), Node::ErrorKind(kind) => ArrayNode::Error(kind), + Node::UnaryKind { + kind: OpUnary::Minus, + right, + } => { + if let Node::NumberKind(n) = *right { + ArrayNode::Number(-n) + } else { + return Err(Node::ParseErrorKind { + formula: self.lexer.get_formula(), + message: "Invalid value in array".to_string(), + position: self.lexer.get_position() as usize, + }); + } + } error @ Node::ParseErrorKind { .. } => return Err(error), _ => { return Err(Node::ParseErrorKind { diff --git a/base/src/expressions/parser/static_analysis.rs b/base/src/expressions/parser/static_analysis.rs index e3e5ef1..04edf56 100644 --- a/base/src/expressions/parser/static_analysis.rs +++ b/base/src/expressions/parser/static_analysis.rs @@ -995,6 +995,18 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec vec![Signature::Vector; 2], Function::Slope => vec![Signature::Vector; 2], Function::Steyx => vec![Signature::Vector; 2], + Function::Gauss => args_signature_scalars(arg_count, 1, 0), + Function::Harmean => vec![Signature::Vector; arg_count], + Function::Kurt => vec![Signature::Vector; arg_count], + Function::Large => vec![Signature::Vector, Signature::Scalar], + Function::MaxA => vec![Signature::Vector; arg_count], + Function::Median => vec![Signature::Vector; arg_count], + Function::MinA => vec![Signature::Vector; arg_count], + Function::RankAvg => vec![Signature::Scalar, Signature::Vector, Signature::Scalar], + Function::RankEq => vec![Signature::Scalar, Signature::Vector, Signature::Scalar], + Function::Skew => vec![Signature::Vector; arg_count], + Function::SkewP => vec![Signature::Vector; arg_count], + Function::Small => vec![Signature::Vector, Signature::Scalar], } } @@ -1334,5 +1346,17 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult { Function::Intercept => StaticResult::Scalar, Function::Slope => StaticResult::Scalar, Function::Steyx => StaticResult::Scalar, + Function::Gauss => StaticResult::Scalar, + Function::Harmean => StaticResult::Scalar, + Function::Kurt => StaticResult::Scalar, + Function::Large => StaticResult::Scalar, + Function::MaxA => StaticResult::Scalar, + Function::Median => StaticResult::Scalar, + Function::MinA => StaticResult::Scalar, + Function::RankAvg => StaticResult::Scalar, + Function::RankEq => StaticResult::Scalar, + Function::Skew => StaticResult::Scalar, + Function::SkewP => StaticResult::Scalar, + Function::Small => StaticResult::Scalar, } } diff --git a/base/src/functions/mod.rs b/base/src/functions/mod.rs index a66f7d9..f762722 100644 --- a/base/src/functions/mod.rs +++ b/base/src/functions/mod.rs @@ -207,7 +207,6 @@ pub enum Function { ChisqTest, ConfidenceNorm, ConfidenceT, - // Correl, CovarianceP, CovarianceS, Devsq, @@ -225,20 +224,18 @@ pub enum Function { GammaInv, GammaLn, GammaLnPrecise, - // Gauss, - // Growth, - // Harmean, + Gauss, + Harmean, HypGeomDist, - // Intercept, - // Kurt, - // Large, + Kurt, + Large, // Linest, // Logest, LogNormDist, LogNormInv, - // MaxA, - // Median, - // MinA, + MaxA, + Median, + MinA, // ModeMult, // ModeSingl, NegbinomDist, @@ -258,19 +255,16 @@ pub enum Function { // Prob, // QuartileExc, // QuartileInc, - // RankAvg, - // RankEq, - // Rsq - // Skew, - // SkewP, - // Slope, - // Small, + RankAvg, + RankEq, + Skew, + SkewP, + Small, Standardize, StDevP, StDevS, Stdeva, Stdevpa, - // Steyx, TDist, TDist2T, TDistRT, @@ -430,7 +424,7 @@ pub enum Function { } impl Function { - pub fn into_iter() -> IntoIter { + pub fn into_iter() -> IntoIter { [ Function::And, Function::False, @@ -765,6 +759,18 @@ impl Function { Function::Intercept, Function::Slope, Function::Steyx, + Function::Large, + Function::Median, + Function::Small, + Function::RankAvg, + Function::RankEq, + Function::Skew, + Function::SkewP, + Function::Harmean, + Function::Gauss, + Function::Kurt, + Function::MaxA, + Function::MinA, ] .into_iter() } @@ -887,6 +893,9 @@ impl Function { Function::WeibullDist => "_xlfn.WEIBULL.DIST".to_string(), Function::ZTest => "_xlfn.Z.TEST".to_string(), + Function::SkewP => "_xlfn.SKEW.P".to_string(), + Function::RankAvg => "_xlfn.RANK.AVG".to_string(), + Function::RankEq => "_xlfn.RANK.EQ".to_string(), _ => self.to_string(), } @@ -1251,6 +1260,20 @@ impl Function { "SLOPE" => Some(Function::Slope), "STEYX" => Some(Function::Steyx), + "SKEW.P" | "_XLFN.SKEW.P" => Some(Function::SkewP), + "SKEW" => Some(Function::Skew), + "KURT" => Some(Function::Kurt), + "HARMEAN" => Some(Function::Harmean), + "MEDIAN" => Some(Function::Median), + "GAUSS" => Some(Function::Gauss), + + "MINA" => Some(Function::MinA), + "MAXA" => Some(Function::MaxA), + "SMALL" => Some(Function::Small), + "LARGE" => Some(Function::Large), + "RANK.EQ" | "_XLFN.RANK.EQ" => Some(Function::RankEq), + "RANK.AVG" | "_XLFN.RANK.AVG" => Some(Function::RankAvg), + _ => None, } } @@ -1512,7 +1535,6 @@ impl fmt::Display for Function { Function::Combin => write!(f, "COMBIN"), Function::Combina => write!(f, "COMBINA"), Function::Sumsq => write!(f, "SUMSQ"), - Function::N => write!(f, "N"), Function::Cell => write!(f, "CELL"), Function::Info => write!(f, "INFO"), @@ -1529,7 +1551,6 @@ impl fmt::Display for Function { Function::Dvar => write!(f, "DVAR"), Function::Dvarp => write!(f, "DVARP"), Function::Dstdevp => write!(f, "DSTDEVP"), - Function::BetaDist => write!(f, "BETA.DIST"), Function::BetaInv => write!(f, "BETA.INV"), Function::BinomDist => write!(f, "BINOM.DIST"), @@ -1594,6 +1615,19 @@ impl fmt::Display for Function { Function::Intercept => write!(f, "INTERCEPT"), Function::Slope => write!(f, "SLOPE"), Function::Steyx => write!(f, "STEYX"), + // new ones + Function::Gauss => write!(f, "GAUSS"), + Function::Harmean => write!(f, "HARMEAN"), + Function::Kurt => write!(f, "KURT"), + Function::Large => write!(f, "LARGE"), + Function::MaxA => write!(f, "MAXA"), + Function::Median => write!(f, "MEDIAN"), + Function::MinA => write!(f, "MINA"), + Function::RankAvg => write!(f, "RANK.AVG"), + Function::RankEq => write!(f, "RANK.EQ"), + Function::Skew => write!(f, "SKEW"), + Function::SkewP => write!(f, "SKEW.P"), + Function::Small => write!(f, "SMALL"), } } } @@ -1955,6 +1989,18 @@ impl Model { Function::Intercept => self.fn_intercept(args, cell), Function::Slope => self.fn_slope(args, cell), Function::Steyx => self.fn_steyx(args, cell), + Function::Gauss => self.fn_gauss(args, cell), + Function::Harmean => self.fn_harmean(args, cell), + Function::Kurt => self.fn_kurt(args, cell), + Function::Large => self.fn_large(args, cell), + Function::MaxA => self.fn_maxa(args, cell), + Function::Median => self.fn_median(args, cell), + Function::MinA => self.fn_mina(args, cell), + Function::RankAvg => self.fn_rank_avg(args, cell), + Function::RankEq => self.fn_rank_eq(args, cell), + Function::Skew => self.fn_skew(args, cell), + Function::SkewP => self.fn_skew_p(args, cell), + Function::Small => self.fn_small(args, cell), } } } diff --git a/base/src/functions/statistical/count_and_average.rs b/base/src/functions/statistical/count_and_average.rs index eed5d51..23298f3 100644 --- a/base/src/functions/statistical/count_and_average.rs +++ b/base/src/functions/statistical/count_and_average.rs @@ -1,3 +1,5 @@ +use std::cmp::Ordering; + use crate::constants::{LAST_COLUMN, LAST_ROW}; use crate::expressions::parser::ArrayNode; use crate::expressions::types::CellReferenceIndex; @@ -6,77 +8,219 @@ use crate::{ }; impl Model { - pub(crate) fn fn_average(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - if args.is_empty() { - return CalcResult::new_args_number_error(cell); - } - let mut count = 0.0; - let mut sum = 0.0; + fn for_each_value( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + mut f: F, + ) -> Result<(), CalcResult> + where + F: FnMut(f64), + { for arg in args { match self.evaluate_node_in_context(arg, cell) { CalcResult::Number(value) => { - count += 1.0; - sum += value; + f(value); } - CalcResult::Boolean(b) => { - if let Node::ReferenceKind { .. } = arg { - } else { - sum += if b { 1.0 } else { 0.0 }; - count += 1.0; + CalcResult::Boolean(value) => { + if !matches!(arg, Node::ReferenceKind { .. }) { + f(if value { 1.0 } else { 0.0 }); + } + } + CalcResult::String(value) => { + if !matches!(arg, Node::ReferenceKind { .. }) { + if let Some(parsed) = self.cast_number(&value) { + f(parsed); + } else { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Argument cannot be cast into number".to_string(), + )); + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + f(value); + } + ArrayNode::Boolean(b) => { + f(if b { 1.0 } else { 0.0 }); + } + ArrayNode::Error(error) => { + return Err(CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + }); + } + _ => { + // ignore non-numeric + } + } + } } } CalcResult::Range { left, right } => { if left.sheet != right.sheet { - return CalcResult::new_error( + return Err(CalcResult::new_error( Error::VALUE, cell, "Ranges are in different sheets".to_string(), - ); + )); } - for row in left.row..(right.row + 1) { - for column in left.column..(right.column + 1) { + + for row in left.row..=right.row { + for column in left.column..=right.column { match self.evaluate_cell(CellReferenceIndex { sheet: left.sheet, row, column, }) { CalcResult::Number(value) => { - count += 1.0; - sum += value; + f(value); } - error @ CalcResult::Error { .. } => return error, + error @ CalcResult::Error { .. } => return Err(error), CalcResult::Range { .. } => { - return CalcResult::new_error( + return Err(CalcResult::new_error( Error::ERROR, cell, "Unexpected Range".to_string(), - ); + )); } _ => {} } } } } - error @ CalcResult::Error { .. } => return error, - CalcResult::String(s) => { - if let Node::ReferenceKind { .. } = arg { - // Do nothing - } else if let Ok(t) = s.parse::() { - sum += t; - count += 1.0; - } else { - return CalcResult::Error { - error: Error::VALUE, - origin: cell, - message: "Argument cannot be cast into number".to_string(), - }; + error @ CalcResult::Error { .. } => return Err(error), + // Everything else is ignored + _ => {} + } + } + + Ok(()) + } + + fn for_each_value_a( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + mut f: F, + ) -> Result<(), CalcResult> + where + F: FnMut(f64), + { + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + f(value); + } + CalcResult::Boolean(value) => { + if !matches!(arg, Node::ReferenceKind { .. }) { + f(if value { 1.0 } else { 0.0 }); } } - _ => { - // Ignore everything else + CalcResult::String(value) => { + if !matches!(arg, Node::ReferenceKind { .. }) { + if let Some(parsed) = self.cast_number(&value) { + f(parsed); + } else { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Argument cannot be cast into number".to_string(), + )); + } + } } - }; + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + f(value); + } + ArrayNode::Boolean(b) => { + f(if b { 1.0 } else { 0.0 }); + } + ArrayNode::String(_) => { + f(0.0); + } + ArrayNode::Error(error) => { + return Err(CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + }); + } + } + } + } + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + )); + } + + for row in left.row..=right.row { + for column in left.column..=right.column { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + f(value); + } + CalcResult::Boolean(b) => { + f(if b { 1.0 } else { 0.0 }); + } + CalcResult::String(_) => { + f(0.0); + } + error @ CalcResult::Error { .. } => return Err(error), + CalcResult::Range { .. } => { + return Err(CalcResult::new_error( + Error::ERROR, + cell, + "Unexpected Range".to_string(), + )); + } + _ => {} + } + } + } + } + error @ CalcResult::Error { .. } => return Err(error), + // Everything else is ignored + _ => {} + } } + + Ok(()) + } + + pub(crate) fn fn_average(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + let mut count = 0.0; + let mut sum = 0.0; + if let Err(e) = self.for_each_value(args, cell, |f| { + count += 1.0; + sum += f; + }) { + return e; + } + if count == 0.0 { return CalcResult::Error { error: Error::DIV, @@ -86,6 +230,7 @@ impl Model { } CalcResult::Number(sum / count) } + pub(crate) fn fn_averagea(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.is_empty() { return CalcResult::new_args_number_error(cell); @@ -443,4 +588,484 @@ impl Model { CalcResult::Number(sum_abs_dev / n) } + + pub(crate) fn fn_median(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut values: Vec = Vec::new(); + if let Err(e) = self.for_each_value(args, cell, |f| values.push(f)) { + return e; + } + + if values.is_empty() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "No numeric values for MEDIAN".to_string(), + }; + } + + values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); + + let n = values.len(); + let median = if n % 2 == 1 { + // odd + values[n / 2] + } else { + // even: average of the two middle values + let a = values[(n / 2) - 1]; + let b = values[n / 2]; + (a + b) / 2.0 + }; + + CalcResult::Number(median) + } + + pub(crate) fn fn_harmean(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut values: Vec = Vec::new(); + if let Err(e) = self.for_each_value(args, cell, |f| values.push(f)) { + return e; + } + + if values.is_empty() { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Division by Zero".to_string(), + }; + } + + // Excel HARMEAN: all values must be > 0 + if values.iter().any(|&v| v <= 0.0) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "HARMEAN requires strictly positive values".to_string(), + }; + } + + let n = values.len() as f64; + let sum_recip: f64 = values.iter().map(|v| 1.0 / v).sum(); + + if sum_recip == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Division by Zero".to_string(), + }; + } + + CalcResult::Number(n / sum_recip) + } + + pub(crate) fn fn_mina(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + let mut mina: Option = None; + if let Err(e) = self.for_each_value_a(args, cell, |f| { + if let Some(m) = mina { + mina = Some(m.min(f)); + } else { + mina = Some(f); + } + }) { + return e; + } + if let Some(mina) = mina { + CalcResult::Number(mina) + } else { + CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "No numeric values for MINA".to_string(), + } + } + } + + pub(crate) fn fn_maxa(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + let mut maxa: Option = None; + if let Err(e) = self.for_each_value_a(args, cell, |f| { + if let Some(m) = maxa { + maxa = Some(m.max(f)); + } else { + maxa = Some(f); + } + }) { + return e; + } + if let Some(maxa) = maxa { + CalcResult::Number(maxa) + } else { + CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "No numeric values for MAXA".to_string(), + } + } + } + + pub(crate) fn fn_skew(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // Sample skewness (Excel SKEW) + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut values: Vec = Vec::new(); + if let Err(e) = self.for_each_value(args, cell, |f| values.push(f)) { + return e; + } + + let n = values.len(); + if n < 3 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "SKEW requires at least 3 data points".to_string(), + }; + } + + let n_f = n as f64; + let mean = values.iter().sum::() / n_f; + + let mut m2 = 0.0; + for &x in &values { + let d = x - mean; + m2 += d * d; + } + + if m2 == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Zero variance in SKEW".to_string(), + }; + } + + let s = (m2 / (n_f - 1.0)).sqrt(); + + let mut sum_cubed = 0.0; + for &x in &values { + let z = (x - mean) / s; + sum_cubed += z * z * z; + } + + let skew = (n_f / ((n_f - 1.0) * (n_f - 2.0))) * sum_cubed; + CalcResult::Number(skew) + } + + pub(crate) fn fn_skew_p(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // Population skewness (Excel SKEW.P) + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut values: Vec = Vec::new(); + if let Err(e) = self.for_each_value(args, cell, |f| values.push(f)) { + return e; + } + + let n = values.len(); + if n < 2 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "SKEW.P requires at least 2 data points".to_string(), + }; + } + + let n_f = n as f64; + let mean = values.iter().sum::() / n_f; + + let mut m2 = 0.0; + for &x in &values { + let d = x - mean; + m2 += d * d; + } + + if m2 == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Zero variance in SKEW.P".to_string(), + }; + } + + let sigma = (m2 / n_f).sqrt(); + + let mut sum_cubed = 0.0; + for &x in &values { + let z = (x - mean) / sigma; + sum_cubed += z * z * z; + } + + let skew_p = sum_cubed / n_f; + CalcResult::Number(skew_p) + } + + pub(crate) fn fn_kurt(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut values: Vec = Vec::new(); + if let Err(e) = self.for_each_value(args, cell, |f| values.push(f)) { + return e; + } + + let n = values.len(); + if n < 4 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "KURT requires at least 4 data points".to_string(), + }; + } + + let n_f = n as f64; + let mean = values.iter().sum::() / n_f; + + let mut m2 = 0.0; + for &x in &values { + let d = x - mean; + m2 += d * d; + } + + if m2 == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Zero variance in KURT".to_string(), + }; + } + + let s = (m2 / (n_f - 1.0)).sqrt(); + + let mut sum_fourth = 0.0; + for &x in &values { + let z = (x - mean) / s; + sum_fourth += z * z * z * z; + } + + let term1 = (n_f * (n_f + 1.0)) / ((n_f - 1.0) * (n_f - 2.0) * (n_f - 3.0)) * sum_fourth; + let term2 = 3.0 * (n_f - 1.0) * (n_f - 1.0) / ((n_f - 2.0) * (n_f - 3.0)); + + let kurt = term1 - term2; + CalcResult::Number(kurt) + } + + pub(crate) fn fn_large(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + let values = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Array(array) => match self.values_from_array(array) { + Ok(v) => v, + Err(e) => { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: format!("Unsupported array argument: {}", e), + } + } + }, + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(e) => return e, + }, + CalcResult::Boolean(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + vec![Some(if value { 1.0 } else { 0.0 })] + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + CalcResult::Number(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + vec![Some(value)] + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + CalcResult::String(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + if let Some(parsed) = self.cast_number(&value) { + vec![Some(parsed)] + } else { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + _ => { + return CalcResult::Error { + error: Error::NIMPL, + origin: cell, + message: "Unsupported argument type".to_string(), + } + } + }; + let k = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(s) => return s, + }; + if k < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "K must be >= 1".to_string(), + }; + } + let mut numeric_values: Vec = values.into_iter().flatten().collect(); + if numeric_values.is_empty() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "No numeric values for LARGE".to_string(), + }; + } + numeric_values.sort_by(|a, b| b.partial_cmp(a).unwrap_or(Ordering::Equal)); + let k_usize = k as usize; + if k_usize > numeric_values.len() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "K is larger than the number of data points".to_string(), + }; + } + CalcResult::Number(numeric_values[k_usize - 1]) + } + + pub(crate) fn fn_small(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let values = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Array(array) => match self.values_from_array(array) { + Ok(v) => v, + Err(e) => { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: format!("Unsupported array argument: {}", e), + } + } + }, + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(e) => return e, + }, + CalcResult::Boolean(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + vec![Some(if value { 1.0 } else { 0.0 })] + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + CalcResult::Number(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + vec![Some(value)] + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + CalcResult::String(value) => { + if !matches!(args[0], Node::ReferenceKind { .. }) { + if let Some(parsed) = self.cast_number(&value) { + vec![Some(parsed)] + } else { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } else { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }; + } + } + _ => { + return CalcResult::Error { + error: Error::NIMPL, + origin: cell, + message: "Unsupported argument type".to_string(), + } + } + }; + + let k = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(s) => return s, + }; + + if k < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "K must be >= 1".to_string(), + }; + } + + let mut numeric_values: Vec = values.into_iter().flatten().collect(); + if numeric_values.is_empty() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "No numeric values for SMALL".to_string(), + }; + } + + // For SMALL, sort ascending + numeric_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); + + let k_usize = k as usize; + if k_usize > numeric_values.len() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "K is larger than the number of data points".to_string(), + }; + } + + CalcResult::Number(numeric_values[k_usize - 1]) + } } diff --git a/base/src/functions/statistical/gauss.rs b/base/src/functions/statistical/gauss.rs new file mode 100644 index 0000000..0e4d89d --- /dev/null +++ b/base/src/functions/statistical/gauss.rs @@ -0,0 +1,39 @@ +use statrs::distribution::{ContinuousCDF, Normal}; + +use crate::expressions::token::Error; +use crate::expressions::types::CellReferenceIndex; +use crate::{calc_result::CalcResult, expressions::parser::Node, model::Model}; + +impl Model { + pub(crate) fn fn_gauss(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + let z = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(s) => return s, + }; + let dist = match Normal::new(0.0, 1.0) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::ERROR, + origin: cell, + message: "Failed to construct standard normal distribution".to_string(), + } + } + }; + + let result = dist.cdf(z) - 0.5; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for GAUSS".to_string(), + }; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/mod.rs b/base/src/functions/statistical/mod.rs index 6e31366..7d9eb5c 100644 --- a/base/src/functions/statistical/mod.rs +++ b/base/src/functions/statistical/mod.rs @@ -8,6 +8,7 @@ mod devsq; mod exponential; mod fisher; mod gamma; +mod gauss; mod geomean; mod hypegeom; mod if_ifs; @@ -16,6 +17,7 @@ mod normal; mod pearson; mod phi; mod poisson; +mod rank_eq_avg; mod standard_dev; mod standardize; mod t_dist; diff --git a/base/src/functions/statistical/rank_eq_avg.rs b/base/src/functions/statistical/rank_eq_avg.rs new file mode 100644 index 0000000..c6ea771 --- /dev/null +++ b/base/src/functions/statistical/rank_eq_avg.rs @@ -0,0 +1,202 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // Helper to collect numeric values from the 2nd argument of RANK.* + fn collect_rank_values( + &mut self, + arg: &Node, + cell: CellReferenceIndex, + ) -> Result, CalcResult> { + let values = match self.evaluate_node_in_context(arg, cell) { + CalcResult::Array(array) => match self.values_from_array(array) { + Ok(v) => v, + Err(e) => { + return Err(CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: format!("Unsupported array argument: {}", e), + }) + } + }, + CalcResult::Range { left, right } => self.values_from_range(left, right)?, + CalcResult::Boolean(value) => { + if !matches!(arg, Node::ReferenceKind { .. }) { + vec![Some(if value { 1.0 } else { 0.0 })] + } else { + return Err(CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Unsupported argument type".to_string(), + }); + } + } + _ => { + return Err(CalcResult::Error { + error: Error::NIMPL, + origin: cell, + message: "Unsupported argument type".to_string(), + }) + } + }; + + let numeric_values: Vec = values.into_iter().flatten().collect(); + Ok(numeric_values) + } + + // RANK.EQ(number, ref, [order]) + pub(crate) fn fn_rank_eq(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if !(2..=3).contains(&args.len()) { + return CalcResult::new_args_number_error(cell); + } + + // number + let number = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // ref + let mut values = match self.collect_rank_values(&args[1], cell) { + Ok(v) => v, + Err(e) => return e, + }; + + if values.is_empty() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "No numeric values for RANK.EQ".to_string(), + }; + } + + // order: default 0 (descending) + let order = if args.len() == 2 { + 0.0 + } else { + match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + } + }; + + values.retain(|v| !v.is_nan()); + + // "better" = greater (descending) or smaller (ascending) + let mut better = 0; + let mut equal = 0; + + if order == 0.0 { + // descending + for v in &values { + if *v > number { + better += 1; + } else if *v == number { + equal += 1; + } + } + } else { + // ascending + for v in &values { + if *v < number { + better += 1; + } else if *v == number { + equal += 1; + } + } + } + + if equal == 0 { + return CalcResult::Error { + error: Error::NA, + origin: cell, + message: "Number not found in reference for RANK.EQ".to_string(), + }; + } + + let rank = (better as f64) + 1.0; + CalcResult::Number(rank) + } + + // RANK.AVG(number, ref, [order]) + pub(crate) fn fn_rank_avg(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if !(2..=3).contains(&args.len()) { + return CalcResult::new_args_number_error(cell); + } + + // number + let number = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // ref + let mut values = match self.collect_rank_values(&args[1], cell) { + Ok(v) => v, + Err(e) => return e, + }; + + if values.is_empty() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "No numeric values for RANK.AVG".to_string(), + }; + } + + // order: default 0 (descending) + let order = if args.len() == 2 { + 0.0 + } else { + match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + } + }; + + values.retain(|v| !v.is_nan()); + + // > or < depending on order + let mut better = 0; + let mut equal = 0; + + if order == 0.0 { + // descending + for v in &values { + if *v > number { + better += 1; + } else if *v == number { + equal += 1; + } + } + } else { + // ascending + for v in &values { + if *v < number { + better += 1; + } else if *v == number { + equal += 1; + } + } + } + + if equal == 0 { + return CalcResult::Error { + error: Error::NA, + origin: cell, + message: "Number not found in reference for RANK.AVG".to_string(), + }; + } + + // For ties, average of the ranks. If the equal values occupy positions + // (better+1) ..= (better+equal), the average is: + // better + (equal + 1) / 2 + let better_f = better as f64; + let equal_f = equal as f64; + let rank = better_f + (equal_f + 1.0) / 2.0; + + CalcResult::Number(rank) + } +} diff --git a/base/src/test/statistical/mod.rs b/base/src/test/statistical/mod.rs index 8f7c66a..6d7dd77 100644 --- a/base/src/test/statistical/mod.rs +++ b/base/src/test/statistical/mod.rs @@ -9,6 +9,7 @@ mod test_fn_expon_dist; mod test_fn_f; mod test_fn_f_test; mod test_fn_fisher; +mod test_fn_gauss; mod test_fn_hyp_geom_dist; mod test_fn_log_norm; mod test_fn_norm_dist; diff --git a/base/src/test/statistical/test_fn_gauss.rs b/base/src/test/statistical/test_fn_gauss.rs new file mode 100644 index 0000000..44b3647 --- /dev/null +++ b/base/src/test/statistical/test_fn_gauss.rs @@ -0,0 +1,35 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_gauss_smoke() { + let mut model = new_empty_model(); + model._set("A1", "=GAUSS(-3)"); + model._set("A2", "=GAUSS(-2.3)"); + model._set("A3", "=GAUSS(-1.7)"); + model._set("A4", "=GAUSS(0)"); + model._set("A5", "=GAUSS(0.5)"); + model._set("A6", "=GAUSS(1)"); + model._set("A7", "=GAUSS(1.3)"); + model._set("A8", "=GAUSS(3)"); + model._set("A9", "=GAUSS(4)"); + + model._set("G6", "=GAUSS()"); + model._set("G7", "=GAUSS(1, 1)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"-0.498650102"); + assert_eq!(model._get_text("A2"), *"-0.48927589"); + assert_eq!(model._get_text("A3"), *"-0.455434537"); + assert_eq!(model._get_text("A4"), *"0"); + assert_eq!(model._get_text("A5"), *"0.191462461"); + assert_eq!(model._get_text("A6"), *"0.341344746"); + assert_eq!(model._get_text("A7"), *"0.403199515"); + assert_eq!(model._get_text("A8"), *"0.498650102"); + assert_eq!(model._get_text("A9"), *"0.499968329"); + + assert_eq!(model._get_text("G6"), *"#ERROR!"); + assert_eq!(model._get_text("G7"), *"#ERROR!"); +} diff --git a/xlsx/tests/calc_tests/MINA_MAXA.xlsx b/xlsx/tests/calc_tests/MINA_MAXA.xlsx new file mode 100644 index 0000000..2d2b11d Binary files /dev/null and b/xlsx/tests/calc_tests/MINA_MAXA.xlsx differ diff --git a/xlsx/tests/calc_tests/RANK_EQ_RANK_AVG.xlsx b/xlsx/tests/calc_tests/RANK_EQ_RANK_AVG.xlsx new file mode 100644 index 0000000..51d8807 Binary files /dev/null and b/xlsx/tests/calc_tests/RANK_EQ_RANK_AVG.xlsx differ diff --git a/xlsx/tests/calc_tests/SMALL_LARGE.xlsx b/xlsx/tests/calc_tests/SMALL_LARGE.xlsx new file mode 100644 index 0000000..283cb34 Binary files /dev/null and b/xlsx/tests/calc_tests/SMALL_LARGE.xlsx differ diff --git a/xlsx/tests/statistical/MEADIAN_KURT_SKEW_HARMEAN.xlsx b/xlsx/tests/statistical/MEADIAN_KURT_SKEW_HARMEAN.xlsx new file mode 100644 index 0000000..d694e09 Binary files /dev/null and b/xlsx/tests/statistical/MEADIAN_KURT_SKEW_HARMEAN.xlsx differ