diff --git a/base/src/expressions/parser/static_analysis.rs b/base/src/expressions/parser/static_analysis.rs index 773684e..c99316b 100644 --- a/base/src/expressions/parser/static_analysis.rs +++ b/base/src/expressions/parser/static_analysis.rs @@ -929,6 +929,7 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec args_signature_scalars(arg_count, 3, 0), Function::FInv => args_signature_scalars(arg_count, 3, 0), Function::FInvRT => args_signature_scalars(arg_count, 3, 0), + Function::FTest => vec![Signature::Vector; 2], Function::Fisher => args_signature_scalars(arg_count, 1, 0), Function::FisherInv => args_signature_scalars(arg_count, 1, 0), Function::Gamma => args_signature_scalars(arg_count, 1, 0), @@ -1287,6 +1288,7 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult { Function::FDistRT => StaticResult::Scalar, Function::FInv => StaticResult::Scalar, Function::FInvRT => StaticResult::Scalar, + Function::FTest => StaticResult::Scalar, Function::Fisher => StaticResult::Scalar, Function::FisherInv => StaticResult::Scalar, Function::Gamma => StaticResult::Scalar, diff --git a/base/src/functions/mod.rs b/base/src/functions/mod.rs index 56f9ad1..47f82b5 100644 --- a/base/src/functions/mod.rs +++ b/base/src/functions/mod.rs @@ -212,7 +212,7 @@ pub enum Function { FDistRT, FInv, FInvRT, - // FTest, + FTest, Fisher, FisherInv, // Forecast, @@ -420,7 +420,7 @@ pub enum Function { } impl Function { - pub fn into_iter() -> IntoIter { + pub fn into_iter() -> IntoIter { [ Function::And, Function::False, @@ -711,6 +711,7 @@ impl Function { Function::FDistRT, Function::FInv, Function::FInvRT, + Function::FTest, Function::Fisher, Function::FisherInv, Function::Gamma, @@ -837,6 +838,7 @@ impl Function { Function::FDistRT => "_xlfn.F.DIST.RT".to_string(), Function::FInv => "_xlfn.F.INV".to_string(), Function::FInvRT => "_xlfn.F.INV.RT".to_string(), + Function::FTest => "_xlfn.F.TEST".to_string(), Function::HypGeomDist => "_xlfn.HYPGEOM.DIST".to_string(), @@ -1186,6 +1188,7 @@ impl Function { "F.DIST.RT" | "_XLFN.F.DIST.RT" => Some(Function::FDistRT), "F.INV" | "_XLFN.F.INV" => Some(Function::FInv), "F.INV.RT" | "_XLFN.F.INV.RT" => Some(Function::FInvRT), + "F.TEST" | "_XLFN.F.TEST" => Some(Function::FTest), "FISHER" => Some(Function::Fisher), "FISHERINV" => Some(Function::FisherInv), "GAMMA" | "_XLFN.GAMMA" => Some(Function::Gamma), @@ -1523,6 +1526,7 @@ impl fmt::Display for Function { Function::FInvRT => write!(f, "F.INV.RT"), Function::Fisher => write!(f, "FISHER"), Function::FisherInv => write!(f, "FISHERINV"), + Function::FTest => write!(f, "F.TEST"), Function::Gamma => write!(f, "GAMMA"), Function::GammaDist => write!(f, "GAMMA.DIST"), Function::GammaInv => write!(f, "GAMMA.INV"), @@ -1875,6 +1879,7 @@ impl Model { Function::FInvRT => self.fn_f_inv_rt(args, cell), Function::Fisher => self.fn_fisher(args, cell), Function::FisherInv => self.fn_fisher_inv(args, cell), + Function::FTest => self.fn_f_test(args, cell), Function::Gamma => self.fn_gamma(args, cell), Function::GammaDist => self.fn_gamma_dist(args, cell), Function::GammaInv => self.fn_gamma_inv(args, cell), diff --git a/base/src/functions/statistical/fisher.rs b/base/src/functions/statistical/fisher.rs index 72fa41b..cd7e698 100644 --- a/base/src/functions/statistical/fisher.rs +++ b/base/src/functions/statistical/fisher.rs @@ -1,6 +1,7 @@ use statrs::distribution::{Continuous, ContinuousCDF, FisherSnedecor}; use crate::expressions::types::CellReferenceIndex; +use crate::functions::statistical::t_dist::sample_var; use crate::{ calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, }; @@ -296,4 +297,122 @@ impl Model { CalcResult::Number(x) } + + // F.TEST(array1, array2) + pub(crate) fn fn_f_test(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let values1_opts = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "First argument must be a range or array".to_string(), + ); + } + }; + + // Get second sample as Vec> + let values2_opts = match self.evaluate_node_in_context(&args[1], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Second argument must be a range or array".to_string(), + ); + } + }; + + let values1: Vec = values1_opts.into_iter().flatten().collect(); + let values2: Vec = values2_opts.into_iter().flatten().collect(); + + let n1 = values1.len(); + let n2 = values2.len(); + + // If fewer than 2 numeric values in either sample -> #DIV/0! + if n1 < 2 || n2 < 2 { + return CalcResult::new_error( + Error::DIV, + cell, + "F.TEST requires at least two numeric values in each sample".to_string(), + ); + } + + let v1 = sample_var(&values1); + let v2 = sample_var(&values2); + + if v1 <= 0.0 || v2 <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Variance of one sample is zero in F.TEST".to_string(), + ); + } + + // F ratio: larger variance / smaller variance + let mut f = v1 / v2; + let mut df1 = (n1 - 1) as f64; + let mut df2 = (n2 - 1) as f64; + + if f < 1.0 { + f = 1.0 / f; + std::mem::swap(&mut df1, &mut df2); + } + + let dist = match FisherSnedecor::new(df1, df2) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for F distribution in F.TEST".to_string(), + ); + } + }; + + // One-tailed right-tail probability + let tail = 1.0 - dist.cdf(f); + // F.TEST is two-tailed: p = 2 * tail (with F >= 1) + let mut p = 2.0 * tail; + + // Clamp tiny FP noise + if p < 0.0 && p > -1e-15 { + p = 0.0; + } + if p > 1.0 && p < 1.0 + 1e-15 { + p = 1.0; + } + + CalcResult::Number(p) + } } diff --git a/base/src/functions/statistical/t_dist.rs b/base/src/functions/statistical/t_dist.rs index f545be7..6e64100 100644 --- a/base/src/functions/statistical/t_dist.rs +++ b/base/src/functions/statistical/t_dist.rs @@ -17,7 +17,7 @@ fn mean(xs: &[f64]) -> f64 { s / (n as f64) } -fn sample_var(xs: &[f64]) -> f64 { +pub(crate) fn sample_var(xs: &[f64]) -> f64 { let n = xs.len(); if n < 2 { return 0.0; diff --git a/base/src/test/statistical/mod.rs b/base/src/test/statistical/mod.rs index a96151a..8f7c66a 100644 --- a/base/src/test/statistical/mod.rs +++ b/base/src/test/statistical/mod.rs @@ -7,6 +7,7 @@ mod test_fn_covariance; mod test_fn_devsq; mod test_fn_expon_dist; mod test_fn_f; +mod test_fn_f_test; mod test_fn_fisher; mod test_fn_hyp_geom_dist; mod test_fn_log_norm; diff --git a/base/src/test/statistical/test_fn_f_test.rs b/base/src/test/statistical/test_fn_f_test.rs new file mode 100644 index 0000000..6bda7e9 --- /dev/null +++ b/base/src/test/statistical/test_fn_f_test.rs @@ -0,0 +1,35 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_f_test_sanity() { + let mut model = new_empty_model(); + + // Valid call + model._set("A1", "=F.TEST(A2:A7, B2:B7)"); + model._set("A2", "9"); + model._set("A3", "12"); + model._set("A4", "14"); + model._set("A5", "16"); + model._set("A6", "18"); + model._set("A7", "20"); + model._set("B2", "11"); + model._set("B3", "10"); + model._set("B4", "15"); + model._set("B5", "17"); + model._set("B6", "19"); + model._set("B7", "21"); + + // Too few args + model._set("A8", "=F.TEST(A2:A7)"); + + // Too many args + model._set("A9", "=F.TEST(A2:A7, B2:B7, C2:C7)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.859284302"); + assert_eq!(model._get_text("A8"), *"#ERROR!"); + assert_eq!(model._get_text("A9"), *"#ERROR!"); +} diff --git a/xlsx/tests/statistical/F_TEST.xlsx b/xlsx/tests/statistical/F_TEST.xlsx new file mode 100644 index 0000000..83d4e40 Binary files /dev/null and b/xlsx/tests/statistical/F_TEST.xlsx differ