diff --git a/base/src/expressions/parser/static_analysis.rs b/base/src/expressions/parser/static_analysis.rs index 1e9aeb7..773684e 100644 --- a/base/src/expressions/parser/static_analysis.rs +++ b/base/src/expressions/parser/static_analysis.rs @@ -711,6 +711,7 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec args_signature_scalars(arg_count, 1, 0), Function::Valuetotext => args_signature_scalars(arg_count, 1, 1), Function::Average => vec![Signature::Vector; arg_count], + Function::Avedev => vec![Signature::Vector; arg_count], Function::Averagea => vec![Signature::Vector; arg_count], Function::Averageif => args_signature_sumif(arg_count), Function::Averageifs => vec![Signature::Vector; arg_count], @@ -889,6 +890,105 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec vec![Signature::Vector, Signature::Scalar, Signature::Vector], Function::Dvarp => vec![Signature::Vector, Signature::Scalar, Signature::Vector], Function::Dstdevp => vec![Signature::Vector, Signature::Scalar, Signature::Vector], + + Function::BetaDist => args_signature_scalars(arg_count, 4, 2), + Function::BetaInv => args_signature_scalars(arg_count, 3, 2), + Function::BinomDist => args_signature_scalars(arg_count, 4, 0), + Function::BinomDistRange => args_signature_scalars(arg_count, 3, 1), + Function::BinomInv => args_signature_scalars(arg_count, 3, 0), + Function::ChisqDist => args_signature_scalars(arg_count, 4, 0), + Function::ChisqDistRT => args_signature_scalars(arg_count, 3, 0), + Function::ChisqInv => args_signature_scalars(arg_count, 3, 0), + Function::ChisqInvRT => args_signature_scalars(arg_count, 2, 0), + Function::ChisqTest => { + if arg_count == 2 { + vec![Signature::Vector, Signature::Vector] + } else { + vec![Signature::Error; arg_count] + } + } + Function::ConfidenceNorm => args_signature_scalars(arg_count, 3, 0), + Function::ConfidenceT => args_signature_scalars(arg_count, 3, 0), + Function::CovarianceP => { + if arg_count == 2 { + vec![Signature::Vector, Signature::Vector] + } else { + vec![Signature::Error; arg_count] + } + } + Function::CovarianceS => { + if arg_count == 2 { + vec![Signature::Vector, Signature::Vector] + } else { + vec![Signature::Error; arg_count] + } + } + Function::Devsq => vec![Signature::Vector; arg_count], + Function::ExponDist => args_signature_scalars(arg_count, 3, 0), + Function::FDist => args_signature_scalars(arg_count, 4, 0), + Function::FDistRT => args_signature_scalars(arg_count, 3, 0), + Function::FInv => args_signature_scalars(arg_count, 3, 0), + Function::FInvRT => args_signature_scalars(arg_count, 3, 0), + Function::Fisher => args_signature_scalars(arg_count, 1, 0), + Function::FisherInv => args_signature_scalars(arg_count, 1, 0), + Function::Gamma => args_signature_scalars(arg_count, 1, 0), + Function::GammaDist => args_signature_scalars(arg_count, 4, 0), + Function::GammaInv => args_signature_scalars(arg_count, 3, 0), + Function::GammaLn => args_signature_scalars(arg_count, 1, 0), + Function::GammaLnPrecise => args_signature_scalars(arg_count, 1, 0), + Function::HypGeomDist => args_signature_scalars(arg_count, 5, 0), + Function::LogNormDist => args_signature_scalars(arg_count, 4, 0), + Function::LogNormInv => args_signature_scalars(arg_count, 3, 0), + Function::NegbinomDist => args_signature_scalars(arg_count, 4, 0), + Function::NormDist => args_signature_scalars(arg_count, 4, 0), + Function::NormInv => args_signature_scalars(arg_count, 3, 0), + Function::NormSdist => args_signature_scalars(arg_count, 2, 0), + Function::NormSInv => args_signature_scalars(arg_count, 1, 0), + Function::Pearson => { + if arg_count == 2 { + vec![Signature::Vector, Signature::Vector] + } else { + vec![Signature::Error; arg_count] + } + } + Function::Phi => args_signature_scalars(arg_count, 1, 0), + Function::PoissonDist => args_signature_scalars(arg_count, 3, 0), + Function::Standardize => args_signature_scalars(arg_count, 3, 0), + Function::StDevP => vec![Signature::Vector; arg_count], + Function::StDevS => vec![Signature::Vector; arg_count], + Function::Stdeva => vec![Signature::Vector; arg_count], + Function::Stdevpa => vec![Signature::Vector; arg_count], + Function::TDist => args_signature_scalars(arg_count, 3, 0), + Function::TDist2T => args_signature_scalars(arg_count, 2, 0), + Function::TDistRT => args_signature_scalars(arg_count, 2, 0), + Function::TInv => args_signature_scalars(arg_count, 2, 0), + Function::TInv2T => args_signature_scalars(arg_count, 2, 0), + Function::TTest => { + if arg_count == 4 { + vec![ + Signature::Vector, + Signature::Vector, + Signature::Scalar, + Signature::Scalar, + ] + } else { + vec![Signature::Error; arg_count] + } + } + Function::VarP => vec![Signature::Vector; arg_count], + Function::VarS => vec![Signature::Vector; arg_count], + Function::VarpA => vec![Signature::Vector; arg_count], + Function::VarA => vec![Signature::Vector; arg_count], + Function::WeibullDist => args_signature_scalars(arg_count, 4, 0), + Function::ZTest => { + if arg_count == 2 { + vec![Signature::Vector, Signature::Scalar] + } else if arg_count == 3 { + vec![Signature::Vector, Signature::Scalar, Signature::Scalar] + } else { + vec![Signature::Error; arg_count] + } + } } } @@ -990,6 +1090,7 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult { Function::Valuetotext => not_implemented(args), Function::Average => not_implemented(args), Function::Averagea => not_implemented(args), + Function::Avedev => not_implemented(args), Function::Averageif => not_implemented(args), Function::Averageifs => not_implemented(args), Function::Count => not_implemented(args), @@ -1165,5 +1266,61 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult { Function::Dvar => not_implemented(args), Function::Dvarp => not_implemented(args), Function::Dstdevp => not_implemented(args), + + Function::BetaDist => StaticResult::Scalar, + Function::BetaInv => StaticResult::Scalar, + Function::BinomDist => StaticResult::Scalar, + Function::BinomDistRange => StaticResult::Scalar, + Function::BinomInv => StaticResult::Scalar, + Function::ChisqDist => StaticResult::Scalar, + Function::ChisqDistRT => StaticResult::Scalar, + Function::ChisqInv => StaticResult::Scalar, + Function::ChisqInvRT => StaticResult::Scalar, + Function::ChisqTest => StaticResult::Scalar, + Function::ConfidenceNorm => StaticResult::Scalar, + Function::ConfidenceT => StaticResult::Scalar, + Function::CovarianceP => StaticResult::Scalar, + Function::CovarianceS => StaticResult::Scalar, + Function::Devsq => StaticResult::Scalar, + Function::ExponDist => StaticResult::Scalar, + Function::FDist => StaticResult::Scalar, + Function::FDistRT => StaticResult::Scalar, + Function::FInv => StaticResult::Scalar, + Function::FInvRT => StaticResult::Scalar, + Function::Fisher => StaticResult::Scalar, + Function::FisherInv => StaticResult::Scalar, + Function::Gamma => StaticResult::Scalar, + Function::GammaDist => StaticResult::Scalar, + Function::GammaInv => StaticResult::Scalar, + Function::GammaLn => StaticResult::Scalar, + Function::GammaLnPrecise => StaticResult::Scalar, + Function::HypGeomDist => StaticResult::Scalar, + Function::LogNormDist => StaticResult::Scalar, + Function::LogNormInv => StaticResult::Scalar, + Function::NegbinomDist => StaticResult::Scalar, + Function::NormDist => StaticResult::Scalar, + Function::NormInv => StaticResult::Scalar, + Function::NormSdist => StaticResult::Scalar, + Function::NormSInv => StaticResult::Scalar, + Function::Pearson => StaticResult::Scalar, + Function::Phi => StaticResult::Scalar, + Function::PoissonDist => StaticResult::Scalar, + Function::Standardize => StaticResult::Scalar, + Function::StDevP => StaticResult::Scalar, + Function::StDevS => StaticResult::Scalar, + Function::Stdeva => StaticResult::Scalar, + Function::Stdevpa => StaticResult::Scalar, + Function::TDist => StaticResult::Scalar, + Function::TDist2T => StaticResult::Scalar, + Function::TDistRT => StaticResult::Scalar, + Function::TInv => StaticResult::Scalar, + Function::TInv2T => StaticResult::Scalar, + Function::TTest => StaticResult::Scalar, + Function::VarP => StaticResult::Scalar, + Function::VarS => StaticResult::Scalar, + Function::VarpA => StaticResult::Scalar, + Function::VarA => StaticResult::Scalar, + Function::WeibullDist => StaticResult::Scalar, + Function::ZTest => StaticResult::Scalar, } } diff --git a/base/src/functions/mod.rs b/base/src/functions/mod.rs index 7b84410..af80adc 100644 --- a/base/src/functions/mod.rs +++ b/base/src/functions/mod.rs @@ -190,6 +190,98 @@ pub enum Function { Minifs, Geomean, + Avedev, + BetaDist, + BetaInv, + BinomDist, + BinomDistRange, + BinomInv, + ChisqDist, + ChisqDistRT, + ChisqInv, + ChisqInvRT, + ChisqTest, + ConfidenceNorm, + ConfidenceT, + // Correl, + CovarianceP, + CovarianceS, + Devsq, + ExponDist, + FDist, + FDistRT, + FInv, + FInvRT, + // FTest, + Fisher, + FisherInv, + // Forecast, + Gamma, + GammaDist, + GammaInv, + GammaLn, + GammaLnPrecise, + // Gauss, + // Growth, + // Harmean, + HypGeomDist, + // Intercept, + // Kurt, + // Large, + // Linest, + // Logest, + LogNormDist, + LogNormInv, + // MaxA, + // Median, + // MinA, + // ModeMult, + // ModeSingl, + NegbinomDist, + NormDist, + NormInv, + NormSdist, + NormSInv, + Pearson, + // PercentileExc, + // PercentileInc, + // PercentrankExc, + // PercentrankInc, + // Permut, + // Permutationa, + Phi, + PoissonDist, + // Prob, + // QuartileExc, + // QuartileInc, + // RankAvg, + // RankEq, + // Rsq + // Skew, + // SkewP, + // Slope, + // Small, + Standardize, + StDevP, + StDevS, + Stdeva, + Stdevpa, + // Steyx, + TDist, + TDist2T, + TDistRT, + TInv, + TInv2T, + TTest, + // Trend, + // Trimmean, + VarP, + VarS, + VarpA, + VarA, + WeibullDist, + ZTest, + // Date and time Date, Datedif, @@ -328,7 +420,7 @@ pub enum Function { } impl Function { - pub fn into_iter() -> IntoIter { + pub fn into_iter() -> IntoIter { [ Function::And, Function::False, @@ -453,6 +545,7 @@ impl Function { Function::Type, Function::Sheet, Function::Average, + Function::Avedev, Function::Averagea, Function::Averageif, Function::Averageifs, @@ -598,6 +691,61 @@ impl Function { Function::Dvar, Function::Dvarp, Function::Dstdevp, + Function::BetaDist, + Function::BetaInv, + Function::BinomDist, + Function::BinomDistRange, + Function::BinomInv, + Function::ChisqDist, + Function::ChisqDistRT, + Function::ChisqInv, + Function::ChisqInvRT, + Function::ChisqTest, + Function::ConfidenceNorm, + Function::ConfidenceT, + Function::CovarianceP, + Function::CovarianceS, + Function::Devsq, + Function::ExponDist, + Function::FDist, + Function::FDistRT, + Function::FInv, + Function::FInvRT, + Function::Fisher, + Function::FisherInv, + Function::Gamma, + Function::GammaDist, + Function::GammaInv, + Function::GammaLn, + Function::GammaLnPrecise, + Function::HypGeomDist, + Function::LogNormDist, + Function::LogNormInv, + Function::NegbinomDist, + Function::NormDist, + Function::NormInv, + Function::NormSdist, + Function::NormSInv, + Function::Pearson, + Function::Phi, + Function::PoissonDist, + Function::Standardize, + Function::StDevP, + Function::StDevS, + Function::Stdeva, + Function::Stdevpa, + Function::TDist, + Function::TDist2T, + Function::TDistRT, + Function::TInv, + Function::TInv2T, + Function::TTest, + Function::VarP, + Function::VarS, + Function::VarpA, + Function::VarA, + Function::WeibullDist, + Function::ZTest, ] .into_iter() } @@ -659,6 +807,12 @@ impl Function { Function::Sec => "_xlfn.SEC".to_string(), Function::Sech => "_xlfn.SECH".to_string(), Function::Acot => "_xlfn.ACOT".to_string(), + Function::GammaLnPrecise => "_xlfn.GAMMALN.PRECISE".to_string(), + Function::Gamma => "_xlfn.GAMMA".to_string(), + Function::GammaInv => "_xlfn.GAMMA.INV".to_string(), + Function::GammaLn => "_xlfn.GAMMALN".to_string(), + Function::BetaDist => "_xlfn.BETA.DIST".to_string(), + Function::BetaInv => "_xlfn.BETA.INV".to_string(), _ => self.to_string(), } @@ -811,6 +965,7 @@ impl Function { "AVERAGE" => Some(Function::Average), "AVERAGEA" => Some(Function::Averagea), + "AVEDEV" => Some(Function::Avedev), "AVERAGEIF" => Some(Function::Averageif), "AVERAGEIFS" => Some(Function::Averageifs), "COUNT" => Some(Function::Count), @@ -957,6 +1112,62 @@ impl Function { "DVARP" => Some(Function::Dvarp), "DSTDEVP" => Some(Function::Dstdevp), + "BETA.DIST" | "_XLFN.BETA.DIST" => Some(Function::BetaDist), + "BETA.INV" | "_XLFN.BETA.INV" => Some(Function::BetaInv), + "BINOM.DIST" => Some(Function::BinomDist), + "BINOM.DIST.RANGE" => Some(Function::BinomDistRange), + "BINOM.INV" => Some(Function::BinomInv), + "CHISQ.DIST" => Some(Function::ChisqDist), + "CHISQ.DIST.RT" => Some(Function::ChisqDistRT), + "CHISQ.INV" => Some(Function::ChisqInv), + "CHISQ.INV.RT" => Some(Function::ChisqInvRT), + "CHISQ.TEST" => Some(Function::ChisqTest), + "CONFIDENCE.NORM" => Some(Function::ConfidenceNorm), + "CONFIDENCE.T" => Some(Function::ConfidenceT), + "COVARIANCE.P" => Some(Function::CovarianceP), + "COVARIANCE.S" => Some(Function::CovarianceS), + "DEVSQ" => Some(Function::Devsq), + "EXPON.DIST" => Some(Function::ExponDist), + "F.DIST" => Some(Function::FDist), + "F.DIST.RT" => Some(Function::FDistRT), + "F.INV" => Some(Function::FInv), + "F.INV.RT" => Some(Function::FInvRT), + "FISHER" => Some(Function::Fisher), + "FISHERINV" => Some(Function::FisherInv), + "GAMMA" | "_XLFN.GAMMA" => Some(Function::Gamma), + "GAMMA.DIST" | "_XLFN.GAMMA.DIST" => Some(Function::GammaDist), + "GAMMA.INV" | "_XLFN.GAMMA.INV" => Some(Function::GammaInv), + "GAMMALN" | "_XLFN.GAMMALN" => Some(Function::GammaLn), + "GAMMALN.PRECISE" | "_XLFN.GAMMALN.PRECISE" => Some(Function::GammaLnPrecise), + "HYPGEOM.DIST" => Some(Function::HypGeomDist), + "LOGNORM.DIST" => Some(Function::LogNormDist), + "LOGNORM.INV" => Some(Function::LogNormInv), + "NEGBINOM.DIST" => Some(Function::NegbinomDist), + "NORM.DIST" => Some(Function::NormDist), + "NORM.INV" => Some(Function::NormInv), + "NORM.S.DIST" => Some(Function::NormSdist), + "NORM.S.INV" => Some(Function::NormSInv), + "PEARSON" => Some(Function::Pearson), + "PHI" => Some(Function::Phi), + "POISSON.DIST" => Some(Function::PoissonDist), + "STANDARDIZE" => Some(Function::Standardize), + "STDEV.P" => Some(Function::StDevP), + "STDEV.S" => Some(Function::StDevS), + "STDEVA" => Some(Function::Stdeva), + "STDEVPA" => Some(Function::Stdevpa), + "T.DIST" => Some(Function::TDist), + "T.DIST.2T" => Some(Function::TDist2T), + "T.DIST.RT" => Some(Function::TDistRT), + "T.INV" => Some(Function::TInv), + "T.INV.2T" => Some(Function::TInv2T), + "T.TEST" => Some(Function::TTest), + "VAR.P" => Some(Function::VarP), + "VAR.S" => Some(Function::VarS), + "VARPA" => Some(Function::VarpA), + "VARA" => Some(Function::VarA), + "WEIBULL.DIST" => Some(Function::WeibullDist), + "Z.TEST" => Some(Function::ZTest), + _ => None, } } @@ -1065,6 +1276,7 @@ impl fmt::Display for Function { Function::Sheet => write!(f, "SHEET"), Function::Average => write!(f, "AVERAGE"), Function::Averagea => write!(f, "AVERAGEA"), + Function::Avedev => write!(f, "AVEDEV"), Function::Averageif => write!(f, "AVERAGEIF"), Function::Averageifs => write!(f, "AVERAGEIFS"), Function::Count => write!(f, "COUNT"), @@ -1234,6 +1446,62 @@ impl fmt::Display for Function { Function::Dvar => write!(f, "DVAR"), Function::Dvarp => write!(f, "DVARP"), Function::Dstdevp => write!(f, "DSTDEVP"), + + Function::BetaDist => write!(f, "BETA.DIST"), + Function::BetaInv => write!(f, "BETA.INV"), + Function::BinomDist => write!(f, "BINOM.DIST"), + Function::BinomDistRange => write!(f, "BINOM.DIST.RANGE"), + Function::BinomInv => write!(f, "BINOM.INV"), + Function::ChisqDist => write!(f, "CHISQ.DIST"), + Function::ChisqDistRT => write!(f, "CHISQ.DIST.RT"), + Function::ChisqInv => write!(f, "CHISQ.INV"), + Function::ChisqInvRT => write!(f, "CHISQ.INV.RT"), + Function::ChisqTest => write!(f, "CHISQ.TEST"), + Function::ConfidenceNorm => write!(f, "CONFIDENCE.NORM"), + Function::ConfidenceT => write!(f, "CONFIDENCE.T"), + Function::CovarianceP => write!(f, "COVARIANCE.P"), + Function::CovarianceS => write!(f, "COVARIANCE.S"), + Function::Devsq => write!(f, "DEVSQ"), + Function::ExponDist => write!(f, "EXPON.DIST"), + Function::FDist => write!(f, "F.DIST"), + Function::FDistRT => write!(f, "F.DIST.RT"), + Function::FInv => write!(f, "F.INV"), + Function::FInvRT => write!(f, "F.INV.RT"), + Function::Fisher => write!(f, "FISHER"), + Function::FisherInv => write!(f, "FISHERINV"), + Function::Gamma => write!(f, "GAMMA"), + Function::GammaDist => write!(f, "GAMMA.DIST"), + Function::GammaInv => write!(f, "GAMMA.INV"), + Function::GammaLn => write!(f, "GAMMALN"), + Function::GammaLnPrecise => write!(f, "GAMMALN.PRECISE"), + Function::HypGeomDist => write!(f, "HYPGEOM.DIST"), + Function::LogNormDist => write!(f, "LOGNORM.DIST"), + Function::LogNormInv => write!(f, "LOGNORM.INV"), + Function::NegbinomDist => write!(f, "NEGBINOM.DIST"), + Function::NormDist => write!(f, "NORM.DIST"), + Function::NormInv => write!(f, "NORM.INV"), + Function::NormSdist => write!(f, "NORM.S.DIST"), + Function::NormSInv => write!(f, "NORM.S.INV"), + Function::Pearson => write!(f, "PEARSON"), + Function::Phi => write!(f, "PHI"), + Function::PoissonDist => write!(f, "POISSON.DIST"), + Function::Standardize => write!(f, "STANDARDIZE"), + Function::StDevP => write!(f, "STDEV.P"), + Function::StDevS => write!(f, "STDEV.S"), + Function::Stdeva => write!(f, "STDEVA"), + Function::Stdevpa => write!(f, "STDEVPA"), + Function::TDist => write!(f, "T.DIST"), + Function::TDist2T => write!(f, "T.DIST.2T"), + Function::TDistRT => write!(f, "T.DIST.RT"), + Function::TInv => write!(f, "T.INV"), + Function::TInv2T => write!(f, "T.INV.2T"), + Function::TTest => write!(f, "T.TEST"), + Function::VarP => write!(f, "VAR.P"), + Function::VarS => write!(f, "VAR.S"), + Function::VarpA => write!(f, "VARPA"), + Function::VarA => write!(f, "VARA"), + Function::WeibullDist => write!(f, "WEIBULL.DIST"), + Function::ZTest => write!(f, "Z.TEST"), } } } @@ -1354,6 +1622,7 @@ impl Model { Function::Sheet => self.fn_sheet(args, cell), Function::Average => self.fn_average(args, cell), Function::Averagea => self.fn_averagea(args, cell), + Function::Avedev => self.fn_avedev(args, cell), Function::Averageif => self.fn_averageif(args, cell), Function::Averageifs => self.fn_averageifs(args, cell), Function::Count => self.fn_count(args, cell), @@ -1530,6 +1799,61 @@ impl Model { Function::Dvar => self.fn_dvar(args, cell), Function::Dvarp => self.fn_dvarp(args, cell), Function::Dstdevp => self.fn_dstdevp(args, cell), + Function::BetaDist => self.fn_beta_dist(args, cell), + Function::BetaInv => self.fn_beta_inv(args, cell), + Function::BinomDist => self.fn_binom_dist(args, cell), + Function::BinomDistRange => self.fn_binom_dist_range(args, cell), + Function::BinomInv => self.fn_binom_inv(args, cell), + Function::ChisqDist => self.fn_chisq_dist(args, cell), + Function::ChisqDistRT => self.fn_chisq_dist_rt(args, cell), + Function::ChisqInv => self.fn_chisq_inv(args, cell), + Function::ChisqInvRT => self.fn_chisq_inv_rt(args, cell), + Function::ChisqTest => self.fn_chisq_test(args, cell), + Function::ConfidenceNorm => self.fn_confidence_norm(args, cell), + Function::ConfidenceT => self.fn_confidence_t(args, cell), + Function::CovarianceP => self.fn_covariance_p(args, cell), + Function::CovarianceS => self.fn_covariance_s(args, cell), + Function::Devsq => self.fn_devsq(args, cell), + Function::ExponDist => self.fn_expon_dist(args, cell), + Function::FDist => self.fn_f_dist(args, cell), + Function::FDistRT => self.fn_f_dist_rt(args, cell), + Function::FInv => self.fn_f_inv(args, cell), + Function::FInvRT => self.fn_f_inv_rt(args, cell), + Function::Fisher => self.fn_fisher(args, cell), + Function::FisherInv => self.fn_fisher_inv(args, cell), + Function::Gamma => self.fn_gamma(args, cell), + Function::GammaDist => self.fn_gamma_dist(args, cell), + Function::GammaInv => self.fn_gamma_inv(args, cell), + Function::GammaLn => self.fn_gamma_ln(args, cell), + Function::GammaLnPrecise => self.fn_gamma_ln_precise(args, cell), + Function::HypGeomDist => self.fn_hyp_geom_dist(args, cell), + Function::LogNormDist => self.fn_log_norm_dist(args, cell), + Function::LogNormInv => self.fn_log_norm_inv(args, cell), + Function::NegbinomDist => self.fn_negbinom_dist(args, cell), + Function::NormDist => self.fn_norm_dist(args, cell), + Function::NormInv => self.fn_norm_inv(args, cell), + Function::NormSdist => self.fn_norm_s_dist(args, cell), + Function::NormSInv => self.fn_norm_s_inv(args, cell), + Function::Pearson => self.fn_pearson(args, cell), + Function::Phi => self.fn_phi(args, cell), + Function::PoissonDist => self.fn_poisson_dist(args, cell), + Function::Standardize => self.fn_standardize(args, cell), + Function::StDevP => self.fn_stdev_p(args, cell), + Function::StDevS => self.fn_stdev_s(args, cell), + Function::Stdeva => self.fn_stdeva(args, cell), + Function::Stdevpa => self.fn_stdevpa(args, cell), + Function::TDist => self.fn_t_dist(args, cell), + Function::TDist2T => self.fn_t_dist_2t(args, cell), + Function::TDistRT => self.fn_t_dist_rt(args, cell), + Function::TInv => self.fn_t_inv(args, cell), + Function::TInv2T => self.fn_t_inv_2t(args, cell), + Function::TTest => self.fn_t_test(args, cell), + Function::VarP => self.fn_var_p(args, cell), + Function::VarS => self.fn_var_s(args, cell), + Function::VarpA => self.fn_varpa(args, cell), + Function::VarA => self.fn_vara(args, cell), + Function::WeibullDist => self.fn_weibull_dist(args, cell), + Function::ZTest => self.fn_z_test(args, cell), } } } diff --git a/base/src/functions/statistical/beta.rs b/base/src/functions/statistical/beta.rs new file mode 100644 index 0000000..a66ece8 --- /dev/null +++ b/base/src/functions/statistical/beta.rs @@ -0,0 +1,213 @@ +use statrs::distribution::{Beta, Continuous, ContinuousCDF}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // BETA.DIST(x, alpha, beta, cumulative, [A], [B]) + pub(crate) fn fn_beta_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let arg_count = args.len(); + if !(4..=6).contains(&arg_count) { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let alpha = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let beta_param = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // cumulative argument: interpret like Excel + let cumulative = match self.evaluate_node_in_context(&args[3], cell) { + CalcResult::Boolean(b) => b, + CalcResult::Number(n) => n != 0.0, + CalcResult::String(s) => { + let up = s.to_ascii_uppercase(); + if up == "TRUE" { + true + } else if up == "FALSE" { + false + } else { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "cumulative must be TRUE/FALSE or numeric".to_string(), + }; + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "Invalid cumulative argument".to_string(), + } + } + }; + + // Optional A, B + let a = if arg_count >= 5 { + match self.get_number_no_bools(&args[4], cell) { + Ok(f) => f, + Err(e) => return e, + } + } else { + 0.0 + }; + + let b = if arg_count >= 6 { + match self.get_number_no_bools(&args[5], cell) { + Ok(f) => f, + Err(e) => return e, + } + } else { + 1.0 + }; + + // Excel: alpha <= 0 or beta <= 0 → #NUM! + if alpha <= 0.0 || beta_param <= 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "alpha and beta must be > 0 in BETA.DIST".to_string(), + ); + } + + // Excel: if x < A, x > B, or A = B → #NUM! + if b == a || x < a || x > b { + return CalcResult::new_error( + Error::NUM, + cell, + "x must be between A and B and A < B in BETA.DIST".to_string(), + ); + } + + // Transform to standard Beta(0,1) + let width = b - a; + let t = (x - a) / width; + + let dist = match Beta::new(alpha, beta_param) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Beta distribution".to_string(), + ) + } + }; + + let result = if cumulative { + dist.cdf(t) + } else { + // general-interval beta pdf: f_X(x) = f_T(t) / (B - A), t=(x-A)/(B-A) + dist.pdf(t) / width + }; + + if result.is_nan() || result.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for BETA.DIST".to_string(), + ); + } + + CalcResult::Number(result) + } + + pub(crate) fn fn_beta_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let arg_count = args.len(); + if !(3..=5).contains(&arg_count) { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let alpha = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let beta_param = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let a = if arg_count >= 4 { + match self.get_number_no_bools(&args[3], cell) { + Ok(f) => f, + Err(e) => return e, + } + } else { + 0.0 + }; + + let b = if arg_count >= 5 { + match self.get_number_no_bools(&args[4], cell) { + Ok(f) => f, + Err(e) => return e, + } + } else { + 1.0 + }; + + if alpha <= 0.0 || beta_param <= 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "alpha and beta must be > 0 in BETA.INV".to_string(), + ); + } + + // probability <= 0 or probability > 1 → #NUM! + if p <= 0.0 || p > 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in (0,1] in BETA.INV".to_string(), + ); + } + + if b <= a { + return CalcResult::new_error( + Error::NUM, + cell, + "A must be < B in BETA.INV".to_string(), + ); + } + + let dist = match Beta::new(alpha, beta_param) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Beta distribution".to_string(), + ) + } + }; + + let t = dist.inverse_cdf(p); + if t.is_nan() || t.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for BETA.INV".to_string(), + ); + } + + // Map back from [0,1] to [A,B] + let x = a + t * (b - a); + CalcResult::Number(x) + } +} diff --git a/base/src/functions/statistical/binom.rs b/base/src/functions/statistical/binom.rs new file mode 100644 index 0000000..1cca7b2 --- /dev/null +++ b/base/src/functions/statistical/binom.rs @@ -0,0 +1,311 @@ +use statrs::distribution::{Binomial, Discrete, DiscreteCDF}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_binom_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + // number_s + let number_s = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // trials + let trials = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // probability_s + let p = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // cumulative (logical) + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + // Domain checks + if trials < 0.0 + || number_s < 0.0 + || number_s > trials + || p.is_nan() + || !(0.0..=1.0).contains(&p) + { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for BINOM.DIST".to_string(), + ); + } + + // Limit to u64 + if trials > u64::MAX as f64 { + return CalcResult::new_error( + Error::NUM, + cell, + "Number of trials too large".to_string(), + ); + } + + let n = trials as u64; + let k = number_s as u64; + + let dist = match Binomial::new(p, n) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for binomial distribution".to_string(), + ) + } + }; + + let prob = if cumulative { dist.cdf(k) } else { dist.pmf(k) }; + + if prob.is_nan() || prob.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for BINOM.DIST".to_string(), + ); + } + + CalcResult::Number(prob) + } + + pub(crate) fn fn_binom_dist_range( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() < 3 || args.len() > 4 { + return CalcResult::new_args_number_error(cell); + } + + // trials + let trials = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // probability_s + let p = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // number_s (lower) + let number_s = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // number_s2 (upper, optional) + let number_s2 = if args.len() == 4 { + match self.get_number_no_bools(&args[3], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + } + } else { + number_s + }; + + if trials < 0.0 + || number_s < 0.0 + || number_s2 < 0.0 + || number_s > number_s2 + || number_s2 > trials + || p.is_nan() + || !(0.0..=1.0).contains(&p) + { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for BINOM.DIST.RANGE".to_string(), + ); + } + + if trials > u64::MAX as f64 { + return CalcResult::new_error( + Error::NUM, + cell, + "Number of trials too large".to_string(), + ); + } + + let n = trials as u64; + let lower = number_s as u64; + let upper = number_s2 as u64; + + let dist = match Binomial::new(p, n) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for binomial distribution".to_string(), + ) + } + }; + + let prob = if lower == 0 { + dist.cdf(upper) + } else { + let cdf_upper = dist.cdf(upper); + let cdf_below_lower = dist.cdf(lower - 1); + cdf_upper - cdf_below_lower + }; + + if prob.is_nan() || prob.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for BINOM.DIST.RANGE".to_string(), + ); + } + + CalcResult::Number(prob) + } + + pub(crate) fn fn_binom_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + // trials + let trials = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // probability_s + let p = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // alpha + let alpha = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if trials < 0.0 + || trials > u64::MAX as f64 + || p.is_nan() + || !(0.0..=1.0).contains(&p) + || alpha.is_nan() + || !(0.0..=1.0).contains(&alpha) + { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for BINOM.INV".to_string(), + ); + } + + let n = trials as u64; + + let dist = match Binomial::new(p, n) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for binomial distribution".to_string(), + ) + } + }; + + // DiscreteCDF::inverse_cdf returns u64 for binomial + let k = statrs::distribution::DiscreteCDF::inverse_cdf(&dist, alpha); + + CalcResult::Number(k as f64) + } + + pub(crate) fn fn_negbinom_dist( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + use statrs::distribution::{Discrete, DiscreteCDF, NegativeBinomial}; + + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let number_f = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let number_s = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let probability_s = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if number_f < 0.0 || number_s < 1.0 || !(0.0..=1.0).contains(&probability_s) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for NEGBINOM.DIST".to_string(), + }; + } + + // Guard against absurdly large failures that won't fit in u64 + if number_f > (u64::MAX as f64) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for NEGBINOM.DIST".to_string(), + }; + } + + let dist = match NegativeBinomial::new(number_s, probability_s) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for NEGBINOM.DIST".to_string(), + } + } + }; + + let f_u = number_f as u64; + let result = if cumulative { + dist.cdf(f_u) + } else { + dist.pmf(f_u) + }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for NEGBINOM.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/chisq.rs b/base/src/functions/statistical/chisq.rs new file mode 100644 index 0000000..e997f00 --- /dev/null +++ b/base/src/functions/statistical/chisq.rs @@ -0,0 +1,565 @@ +use statrs::distribution::{ChiSquared, Continuous, ContinuousCDF}; + +use crate::expressions::parser::ArrayNode; +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +// Helper to check if two shapes are the same or compatible 1D shapes +pub(crate) fn is_same_shape_or_1d(rows1: i32, cols1: i32, rows2: i32, cols2: i32) -> bool { + (rows1 == rows2 && cols1 == cols2) + || (rows1 == 1 && cols2 == 1 && cols1 == rows2) + || (rows2 == 1 && cols1 == 1 && cols2 == rows1) +} + +impl Model { + // CHISQ.DIST(x, deg_freedom, cumulative) + pub(crate) fn fn_chisq_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[2], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "x must be >= 0 in CHISQ.DIST".to_string(), + ); + } + if df < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in CHISQ.DIST".to_string(), + ); + } + + let dist = match ChiSquared::new(df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Chi-squared distribution".to_string(), + ) + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if result.is_nan() || result.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for CHISQ.DIST".to_string(), + ); + } + + CalcResult::Number(result) + } + + // CHISQ.DIST.RT(x, deg_freedom) + pub(crate) fn fn_chisq_dist_rt( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df_raw = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = df_raw.trunc(); + + if x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "x must be >= 0 in CHISQ.DIST.RT".to_string(), + ); + } + if df < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in CHISQ.DIST.RT".to_string(), + ); + } + + let dist = match ChiSquared::new(df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Chi-squared distribution".to_string(), + ) + } + }; + + // Right-tail probability: P(X > x). + // Use sf(x) directly for better numerical properties than 1 - cdf(x). + let result = dist.sf(x); + + if result.is_nan() || result.is_infinite() || result < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for CHISQ.DIST.RT".to_string(), + ); + } + + CalcResult::Number(result) + } + + // CHISQ.INV(probability, deg_freedom) + pub(crate) fn fn_chisq_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // if probability < 0 or > 1 → #NUM! + if !(0.0..=1.0).contains(&p) { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in [0,1] in CHISQ.INV".to_string(), + ); + } + if df < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in CHISQ.INV".to_string(), + ); + } + + let dist = match ChiSquared::new(df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Chi-squared distribution".to_string(), + ) + } + }; + + let x = dist.inverse_cdf(p); + + if x.is_nan() || x.is_infinite() || x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for CHISQ.INV".to_string(), + ); + } + + CalcResult::Number(x) + } + + // CHISQ.INV.RT(probability, deg_freedom) + pub(crate) fn fn_chisq_inv_rt( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df_raw = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = df_raw.trunc(); + + // if probability < 0 or > 1 → #NUM! + if !(0.0..=1.0).contains(&p) { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in [0,1] in CHISQ.INV.RT".to_string(), + ); + } + if df < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in CHISQ.INV.RT".to_string(), + ); + } + + let dist = match ChiSquared::new(df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Chi-squared distribution".to_string(), + ) + } + }; + + // Right-tail inverse: p = P(X > x) = SF(x) = 1 - CDF(x) + // So x = inverse_cdf(1 - p). + let x = dist.inverse_cdf(1.0 - p); + + if x.is_nan() || x.is_infinite() || x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for CHISQ.INV.RT".to_string(), + ); + } + + CalcResult::Number(x) + } + + pub(crate) fn values_from_range( + &mut self, + left: CellReferenceIndex, + right: CellReferenceIndex, + ) -> Result>, CalcResult> { + let mut values = Vec::new(); + for row_offset in 0..=(right.row - left.row) { + for col_offset in 0..=(right.column - left.column) { + let cell_ref = CellReferenceIndex { + sheet: left.sheet, + row: left.row + row_offset, + column: left.column + col_offset, + }; + let cell_value = self.evaluate_cell(cell_ref); + match cell_value { + CalcResult::Number(v) => { + values.push(Some(v)); + } + error @ CalcResult::Error { .. } => return Err(error), + _ => { + values.push(None); + } + } + } + } + Ok(values) + } + + pub(crate) fn values_from_array( + &mut self, + array: Vec>, + ) -> Result>, Error> { + let mut values = Vec::new(); + for row in array { + for item in row { + match item { + ArrayNode::Number(f) => { + values.push(Some(f)); + } + ArrayNode::Error(error) => { + return Err(error); + } + _ => { + values.push(None); + } + } + } + } + Ok(values) + } + + // CHISQ.TEST(actual_range, expected_range) + pub(crate) fn fn_chisq_test(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + let actual_range = self.evaluate_node_in_context(&args[0], cell); + let expected_range = self.evaluate_node_in_context(&args[1], cell); + + let (width, height, values_left, values_right) = match (actual_range, expected_range) { + ( + CalcResult::Range { + left: l1, + right: r1, + }, + CalcResult::Range { + left: l2, + right: r2, + }, + ) => { + if l1.sheet != l2.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + let rows1 = r1.row - l1.row + 1; + let cols1 = r1.column - l1.column + 1; + let rows2 = r2.row - l2.row + 1; + let cols2 = r2.column - l2.column + 1; + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges must be of the same shape".to_string(), + ); + } + let values_left = match self.values_from_range(l1, r1) { + Err(error) => { + return error; + } + Ok(v) => v, + }; + let values_right = match self.values_from_range(l2, r2) { + Err(error) => { + return error; + } + Ok(v) => v, + }; + (rows1, cols1, values_left, values_right) + } + ( + CalcResult::Array(left), + CalcResult::Range { + left: l2, + right: r2, + }, + ) => { + let rows2 = r2.row - l2.row + 1; + let cols2 = r2.column - l2.column + 1; + + let rows1 = left.len() as i32; + let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Array and range must be of the same shape".to_string(), + ); + } + let values_left = match self.values_from_array(left) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + Ok(v) => v, + }; + let values_right = match self.values_from_range(l2, r2) { + Err(error) => { + return error; + } + Ok(v) => v, + }; + (rows2, cols2, values_left, values_right) + } + ( + CalcResult::Range { + left: l1, + right: r1, + }, + CalcResult::Array(right), + ) => { + let rows1 = r1.row - l1.row + 1; + let cols1 = r1.column - l1.column + 1; + + let rows2 = right.len() as i32; + let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Range and array must be of the same shape".to_string(), + ); + } + let values_left = match self.values_from_range(l1, r1) { + Err(error) => { + return error; + } + Ok(v) => v, + }; + let values_right = match self.values_from_array(right) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + Ok(v) => v, + }; + (rows1, cols1, values_left, values_right) + } + (CalcResult::Array(left), CalcResult::Array(right)) => { + let rows1 = left.len() as i32; + let rows2 = right.len() as i32; + let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; + let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; + + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Arrays must be of the same shape".to_string(), + ); + } + let values_left = match self.values_from_array(left) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + Ok(v) => v, + }; + let values_right = match self.values_from_array(right) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + Ok(v) => v, + }; + (rows1, cols1, values_left, values_right) + } + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Both arguments must be ranges or arrays".to_string(), + ); + } + }; + + let mut values = Vec::with_capacity(values_left.len()); + + // Now we have: + // - values: flattened (observed, expected) + // - width, height: shape + for i in 0..values_left.len() { + match (values_left[i], values_right[i]) { + (Some(v1), Some(v2)) => { + values.push((v1, v2)); + } + _ => { + values.push((1.0, 1.0)); + } + } + } + if width == 0 || height == 0 || values.len() < 2 { + return CalcResult::new_error( + Error::NUM, + cell, + "CHISQ.TEST requires at least two data points".to_string(), + ); + } + + let mut chi2 = 0.0; + for (obs, exp) in &values { + if *obs < 0.0 || *exp < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Negative value in CHISQ.TEST data".to_string(), + ); + } + if *exp == 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Zero expected value in CHISQ.TEST".to_string(), + ); + } + let diff = obs - exp; + chi2 += (diff * diff) / exp; + } + + if chi2 < 0.0 && chi2 > -1e-12 { + chi2 = 0.0; + } + + let total = width * height; + if total <= 1 { + return CalcResult::new_error( + Error::NUM, + cell, + "CHISQ.TEST degrees of freedom is zero".to_string(), + ); + } + + let df = if width > 1 && height > 1 { + (width - 1) * (height - 1) + } else { + total - 1 + }; + + let dist = match ChiSquared::new(df as f64) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid degrees of freedom in CHISQ.TEST".to_string(), + ); + } + }; + + let mut p = 1.0 - dist.cdf(chi2); + + // clamp tiny fp noise + if p < 0.0 && p > -1e-15 { + p = 0.0; + } + if p > 1.0 && p < 1.0 + 1e-15 { + p = 1.0; + } + + CalcResult::Number(p) + } +} diff --git a/base/src/functions/statistical.rs b/base/src/functions/statistical/count_and_average.rs similarity index 50% rename from base/src/functions/statistical.rs rename to base/src/functions/statistical/count_and_average.rs index a013688..eed5d51 100644 --- a/base/src/functions/statistical.rs +++ b/base/src/functions/statistical/count_and_average.rs @@ -1,14 +1,10 @@ use crate::constants::{LAST_COLUMN, LAST_ROW}; +use crate::expressions::parser::ArrayNode; use crate::expressions::types::CellReferenceIndex; use crate::{ - calc_result::{CalcResult, Range}, - expressions::parser::Node, - expressions::token::Error, - model::Model, + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, }; -use super::util::build_criteria; - impl Model { pub(crate) fn fn_average(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.is_empty() { @@ -90,7 +86,6 @@ impl Model { } CalcResult::Number(sum / count) } - pub(crate) fn fn_averagea(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.is_empty() { return CalcResult::new_args_number_error(cell); @@ -324,350 +319,26 @@ impl Model { CalcResult::Number(result) } - pub(crate) fn fn_countif(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - if args.len() == 2 { - let arguments = vec![args[0].clone(), args[1].clone()]; - self.fn_countifs(&arguments, cell) - } else { - CalcResult::new_args_number_error(cell) - } - } - - /// AVERAGEIF(criteria_range, criteria, [average_range]) - /// if average_rage is missing then criteria_range will be used - pub(crate) fn fn_averageif(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - if args.len() == 2 { - let arguments = vec![args[0].clone(), args[0].clone(), args[1].clone()]; - self.fn_averageifs(&arguments, cell) - } else if args.len() == 3 { - let arguments = vec![args[2].clone(), args[0].clone(), args[1].clone()]; - self.fn_averageifs(&arguments, cell) - } else { - CalcResult::new_args_number_error(cell) - } - } - - // FIXME: This function shares a lot of code with apply_ifs. Can we merge them? - pub(crate) fn fn_countifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - let args_count = args.len(); - if args_count < 2 || !args_count.is_multiple_of(2) { - return CalcResult::new_args_number_error(cell); - } - - let case_count = args_count / 2; - // NB: this is a beautiful example of the borrow checker - // The order of these two definitions cannot be swapped. - let mut criteria = Vec::new(); - let mut fn_criteria = Vec::new(); - let ranges = &mut Vec::new(); - for case_index in 0..case_count { - let criterion = self.evaluate_node_in_context(&args[case_index * 2 + 1], cell); - criteria.push(criterion); - // NB: We cannot do: - // fn_criteria.push(build_criteria(&criterion)); - // because criterion doesn't live long enough - let result = self.evaluate_node_in_context(&args[case_index * 2], cell); - if result.is_error() { - return result; - } - if let CalcResult::Range { left, right } = result { - if left.sheet != right.sheet { - return CalcResult::new_error( - Error::VALUE, - cell, - "Ranges are in different sheets".to_string(), - ); - } - // TODO test ranges are of the same size as sum_range - ranges.push(Range { left, right }); - } else { - return CalcResult::new_error(Error::VALUE, cell, "Expected a range".to_string()); - } - } - for criterion in criteria.iter() { - fn_criteria.push(build_criteria(criterion)); - } - - let mut total = 0.0; - let first_range = &ranges[0]; - let left_row = first_range.left.row; - let left_column = first_range.left.column; - let right_row = first_range.right.row; - let right_column = first_range.right.column; - - let dimension = match self.workbook.worksheet(first_range.left.sheet) { - Ok(s) => s.dimension(), - Err(_) => { - return CalcResult::new_error( - Error::ERROR, - cell, - format!("Invalid worksheet index: '{}'", first_range.left.sheet), - ) - } - }; - let max_row = dimension.max_row; - let max_column = dimension.max_column; - - let open_row = left_row == 1 && right_row == LAST_ROW; - let open_column = left_column == 1 && right_column == LAST_COLUMN; - - for row in left_row..right_row + 1 { - if open_row && row > max_row { - // If the row is larger than the max row in the sheet then all cells are empty. - // We compute it only once - let mut is_true = true; - for fn_criterion in fn_criteria.iter() { - if !fn_criterion(&CalcResult::EmptyCell) { - is_true = false; - break; - } - } - if is_true { - total += ((LAST_ROW - max_row) * (right_column - left_column + 1)) as f64; - } - break; - } - for column in left_column..right_column + 1 { - if open_column && column > max_column { - // If the column is larger than the max column in the sheet then all cells are empty. - // We compute it only once - let mut is_true = true; - for fn_criterion in fn_criteria.iter() { - if !fn_criterion(&CalcResult::EmptyCell) { - is_true = false; - break; - } - } - if is_true { - total += (LAST_COLUMN - max_column) as f64; - } - break; - } - let mut is_true = true; - for case_index in 0..case_count { - // We check if value in range n meets criterion n - let range = &ranges[case_index]; - let fn_criterion = &fn_criteria[case_index]; - let value = self.evaluate_cell(CellReferenceIndex { - sheet: range.left.sheet, - row: range.left.row + row - first_range.left.row, - column: range.left.column + column - first_range.left.column, - }); - if !fn_criterion(&value) { - is_true = false; - break; - } - } - if is_true { - total += 1.0; - } - } - } - CalcResult::Number(total) - } - - pub(crate) fn apply_ifs( - &mut self, - args: &[Node], - cell: CellReferenceIndex, - mut apply: F, - ) -> Result<(), CalcResult> - where - F: FnMut(f64), - { - let args_count = args.len(); - if args_count < 3 || args_count.is_multiple_of(2) { - return Err(CalcResult::new_args_number_error(cell)); - } - let arg_0 = self.evaluate_node_in_context(&args[0], cell); - if arg_0.is_error() { - return Err(arg_0); - } - let sum_range = if let CalcResult::Range { left, right } = arg_0 { - if left.sheet != right.sheet { - return Err(CalcResult::new_error( - Error::VALUE, - cell, - "Ranges are in different sheets".to_string(), - )); - } - Range { left, right } - } else { - return Err(CalcResult::new_error( - Error::VALUE, - cell, - "Expected a range".to_string(), - )); - }; - - let case_count = (args_count - 1) / 2; - // NB: this is a beautiful example of the borrow checker - // The order of these two definitions cannot be swapped. - let mut criteria = Vec::new(); - let mut fn_criteria = Vec::new(); - let ranges = &mut Vec::new(); - for case_index in 1..=case_count { - let criterion = self.evaluate_node_in_context(&args[case_index * 2], cell); - // NB: criterion might be an error. That's ok - criteria.push(criterion); - // NB: We cannot do: - // fn_criteria.push(build_criteria(&criterion)); - // because criterion doesn't live long enough - let result = self.evaluate_node_in_context(&args[case_index * 2 - 1], cell); - if result.is_error() { - return Err(result); - } - if let CalcResult::Range { left, right } = result { - if left.sheet != right.sheet { - return Err(CalcResult::new_error( - Error::VALUE, - cell, - "Ranges are in different sheets".to_string(), - )); - } - // TODO test ranges are of the same size as sum_range - ranges.push(Range { left, right }); - } else { - return Err(CalcResult::new_error( - Error::VALUE, - cell, - "Expected a range".to_string(), - )); - } - } - for criterion in criteria.iter() { - fn_criteria.push(build_criteria(criterion)); - } - - let left_row = sum_range.left.row; - let left_column = sum_range.left.column; - let mut right_row = sum_range.right.row; - let mut right_column = sum_range.right.column; - - if left_row == 1 && right_row == LAST_ROW { - right_row = match self.workbook.worksheet(sum_range.left.sheet) { - Ok(s) => s.dimension().max_row, - Err(_) => { - return Err(CalcResult::new_error( - Error::ERROR, - cell, - format!("Invalid worksheet index: '{}'", sum_range.left.sheet), - )); - } - }; - } - if left_column == 1 && right_column == LAST_COLUMN { - right_column = match self.workbook.worksheet(sum_range.left.sheet) { - Ok(s) => s.dimension().max_column, - Err(_) => { - return Err(CalcResult::new_error( - Error::ERROR, - cell, - format!("Invalid worksheet index: '{}'", sum_range.left.sheet), - )); - } - }; - } - - for row in left_row..right_row + 1 { - for column in left_column..right_column + 1 { - let mut is_true = true; - for case_index in 0..case_count { - // We check if value in range n meets criterion n - let range = &ranges[case_index]; - let fn_criterion = &fn_criteria[case_index]; - let value = self.evaluate_cell(CellReferenceIndex { - sheet: range.left.sheet, - row: range.left.row + row - sum_range.left.row, - column: range.left.column + column - sum_range.left.column, - }); - if !fn_criterion(&value) { - is_true = false; - break; - } - } - if is_true { - let v = self.evaluate_cell(CellReferenceIndex { - sheet: sum_range.left.sheet, - row, - column, - }); - match v { - CalcResult::Number(n) => apply(n), - CalcResult::Error { .. } => return Err(v), - _ => {} - } - } - } - } - Ok(()) - } - - pub(crate) fn fn_averageifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - let mut total = 0.0; - let mut count = 0.0; - - let average = |value: f64| { - total += value; - count += 1.0; - }; - if let Err(e) = self.apply_ifs(args, cell, average) { - return e; - } - - if count == 0.0 { - return CalcResult::Error { - error: Error::DIV, - origin: cell, - message: "division by 0".to_string(), - }; - } - CalcResult::Number(total / count) - } - - pub(crate) fn fn_minifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - let mut min = f64::INFINITY; - let apply_min = |value: f64| min = value.min(min); - if let Err(e) = self.apply_ifs(args, cell, apply_min) { - return e; - } - - if min.is_infinite() { - min = 0.0; - } - CalcResult::Number(min) - } - - pub(crate) fn fn_maxifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { - let mut max = -f64::INFINITY; - let apply_max = |value: f64| max = value.max(max); - if let Err(e) = self.apply_ifs(args, cell, apply_max) { - return e; - } - if max.is_infinite() { - max = 0.0; - } - CalcResult::Number(max) - } - - pub(crate) fn fn_geomean(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + pub(crate) fn fn_avedev(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.is_empty() { return CalcResult::new_args_number_error(cell); } - let mut count = 0.0; - let mut product = 1.0; + + let mut values: Vec = Vec::new(); + let mut sum = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(values: &mut Vec, sum: &mut f64, count: &mut u64, value: f64) { + values.push(value); + *sum += value; + *count += 1; + } + for arg in args { match self.evaluate_node_in_context(arg, cell) { CalcResult::Number(value) => { - count += 1.0; - product *= value; - } - CalcResult::Boolean(b) => { - if let Node::ReferenceKind { .. } = arg { - } else { - product *= if b { 1.0 } else { 0.0 }; - count += 1.0; - } + accumulate(&mut values, &mut sum, &mut count, value); } CalcResult::Range { left, right } => { if left.sheet != right.sheet { @@ -677,57 +348,99 @@ impl Model { "Ranges are in different sheets".to_string(), ); } - for row in left.row..(right.row + 1) { - for column in left.column..(right.column + 1) { + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..=row2 { + for column in column1..=column2 { match self.evaluate_cell(CellReferenceIndex { sheet: left.sheet, row, column, }) { CalcResult::Number(value) => { - count += 1.0; - product *= value; + accumulate(&mut values, &mut sum, &mut count, value); } error @ CalcResult::Error { .. } => return error, - CalcResult::Range { .. } => { - return CalcResult::new_error( - Error::ERROR, - cell, - "Unexpected Range".to_string(), - ); + _ => { + // ignore non-numeric + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut values, &mut sum, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric } - _ => {} } } } } error @ CalcResult::Error { .. } => return error, - CalcResult::String(s) => { - if let Node::ReferenceKind { .. } = arg { - // Do nothing - } else if let Ok(t) = s.parse::() { - product *= t; - count += 1.0; - } else { - return CalcResult::Error { - error: Error::VALUE, - origin: cell, - message: "Argument cannot be cast into number".to_string(), - }; - } - } _ => { - // Ignore everything else + // ignore non-numeric } - }; + } } - if count == 0.0 { - return CalcResult::Error { - error: Error::DIV, - origin: cell, - message: "Division by Zero".to_string(), - }; + + if count == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "AVEDEV with no numeric data".to_string(), + ); } - CalcResult::Number(product.powf(1.0 / count)) + + let n = count as f64; + let mean = sum / n; + + let mut sum_abs_dev = 0.0; + for v in &values { + sum_abs_dev += (v - mean).abs(); + } + + CalcResult::Number(sum_abs_dev / n) } } diff --git a/base/src/functions/statistical/covariance.rs b/base/src/functions/statistical/covariance.rs new file mode 100644 index 0000000..61eccbb --- /dev/null +++ b/base/src/functions/statistical/covariance.rs @@ -0,0 +1,264 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_covariance_p( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let values1_opts = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "First argument must be a range or array".to_string(), + ); + } + }; + + let values2_opts = match self.evaluate_node_in_context(&args[1], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Second argument must be a range or array".to_string(), + ); + } + }; + + // Same number of cells + if values1_opts.len() != values2_opts.len() { + return CalcResult::new_error( + Error::NA, + cell, + "COVARIANCE.P requires arrays of the same size".to_string(), + ); + } + + // Count numeric data points in each array (ignoring text/booleans/empty) + let count1 = values1_opts.iter().filter(|v| v.is_some()).count(); + let count2 = values2_opts.iter().filter(|v| v.is_some()).count(); + + if count1 == 0 || count2 == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "COVARIANCE.P requires at least one numeric value in each array".to_string(), + ); + } + + if count1 != count2 { + return CalcResult::new_error( + Error::NA, + cell, + "COVARIANCE.P arrays must have the same number of numeric data points".to_string(), + ); + } + + // Build paired numeric vectors, position by position + let mut xs: Vec = Vec::with_capacity(count1); + let mut ys: Vec = Vec::with_capacity(count2); + + for (v1_opt, v2_opt) in values1_opts.into_iter().zip(values2_opts.into_iter()) { + if let (Some(x), Some(y)) = (v1_opt, v2_opt) { + xs.push(x); + ys.push(y); + } + } + + let n = xs.len(); + if n == 0 { + // Should be impossible given the checks above, but guard anyway + return CalcResult::new_error( + Error::DIV, + cell, + "COVARIANCE.P has no paired numeric data points".to_string(), + ); + } + + let n_f = n as f64; + + let mut sum_x = 0.0; + let mut sum_y = 0.0; + for i in 0..n { + sum_x += xs[i]; + sum_y += ys[i]; + } + let mean_x = sum_x / n_f; + let mean_y = sum_y / n_f; + + let mut sum_prod = 0.0; + for i in 0..n { + let dx = xs[i] - mean_x; + let dy = ys[i] - mean_y; + sum_prod += dx * dy; + } + + let cov = sum_prod / n_f; + CalcResult::Number(cov) + } + + pub(crate) fn fn_covariance_s( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let values1_opts = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "First argument must be a range or array".to_string(), + ); + } + }; + + let values2_opts = match self.evaluate_node_in_context(&args[1], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Second argument must be a range or array".to_string(), + ); + } + }; + + // Same number of cells + if values1_opts.len() != values2_opts.len() { + return CalcResult::new_error( + Error::NA, + cell, + "COVARIANCE.S requires arrays of the same size".to_string(), + ); + } + + // Count numeric data points in each array (ignoring text/booleans/empty) + let count1 = values1_opts.iter().filter(|v| v.is_some()).count(); + let count2 = values2_opts.iter().filter(|v| v.is_some()).count(); + + if count1 == 0 || count2 == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "COVARIANCE.S requires numeric values in each array".to_string(), + ); + } + + if count1 != count2 { + return CalcResult::new_error( + Error::NA, + cell, + "COVARIANCE.S arrays must have the same number of numeric data points".to_string(), + ); + } + + // Build paired numeric vectors + let mut xs: Vec = Vec::with_capacity(count1); + let mut ys: Vec = Vec::with_capacity(count2); + + for (v1_opt, v2_opt) in values1_opts.into_iter().zip(values2_opts.into_iter()) { + if let (Some(x), Some(y)) = (v1_opt, v2_opt) { + xs.push(x); + ys.push(y); + } + } + + let n = xs.len(); + if n < 2 { + return CalcResult::new_error( + Error::DIV, + cell, + "COVARIANCE.S requires at least two paired data points".to_string(), + ); + } + + let n_f = n as f64; + + let mut sum_x = 0.0; + let mut sum_y = 0.0; + for i in 0..n { + sum_x += xs[i]; + sum_y += ys[i]; + } + let mean_x = sum_x / n_f; + let mean_y = sum_y / n_f; + + let mut sum_prod = 0.0; + for i in 0..n { + let dx = xs[i] - mean_x; + let dy = ys[i] - mean_y; + sum_prod += dx * dy; + } + + let cov = sum_prod / (n_f - 1.0); + + CalcResult::Number(cov) + } +} diff --git a/base/src/functions/statistical/devsq.rs b/base/src/functions/statistical/devsq.rs new file mode 100644 index 0000000..13d666e --- /dev/null +++ b/base/src/functions/statistical/devsq.rs @@ -0,0 +1,135 @@ +use crate::constants::{LAST_COLUMN, LAST_ROW}; +use crate::expressions::parser::ArrayNode; +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // DEVSQ(number1, [number2], ...) + pub(crate) fn fn_devsq(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + // tiny helper so we don't repeat ourselves + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // We ignore booleans and strings + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // We ignore booleans and strings + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // We ignore booleans and strings + } + }; + } + + if count == 0 { + // No numeric data at all + return CalcResult::new_error( + Error::DIV, + cell, + "DEVSQ with no numeric data".to_string(), + ); + } + + let n = count as f64; + let mut result = sumsq - (sum * sum) / n; + + // Numerical noise can make result slightly negative when it should be 0 + if result < 0.0 && result > -1e-12 { + result = 0.0; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/exponential.rs b/base/src/functions/statistical/exponential.rs new file mode 100644 index 0000000..4e33d3a --- /dev/null +++ b/base/src/functions/statistical/exponential.rs @@ -0,0 +1,54 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_expon_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // EXPON.DIST(x, lambda, cumulative) + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let lambda = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[2], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if x < 0.0 || lambda <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for EXPON.DIST".to_string(), + }; + } + + let result = if cumulative { + // CDF + 1.0 - (-lambda * x).exp() + } else { + // PDF + lambda * (-lambda * x).exp() + }; + + if result.is_nan() || result.is_infinite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for EXPON.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/fisher.rs b/base/src/functions/statistical/fisher.rs new file mode 100644 index 0000000..72fa41b --- /dev/null +++ b/base/src/functions/statistical/fisher.rs @@ -0,0 +1,299 @@ +use statrs::distribution::{Continuous, ContinuousCDF, FisherSnedecor}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // FISHER(x) = 0.5 * ln((1 + x) / (1 - x)) + pub(crate) fn fn_fisher(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if x <= -1.0 || x >= 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "x must be between -1 and 1 (exclusive) in FISHER".to_string(), + }; + } + + let ratio = (1.0 + x) / (1.0 - x); + let result = 0.5 * ratio.ln(); + + if result.is_nan() || result.is_infinite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for FISHER".to_string(), + }; + } + + CalcResult::Number(result) + } + + // FISHERINV(y) = (e^(2y) - 1) / (e^(2y) + 1) = tanh(y) + pub(crate) fn fn_fisher_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + + let y = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // Use tanh directly to avoid overflow from exp(2y) + let result = y.tanh(); + + if result.is_nan() || result.is_infinite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for FISHERINV".to_string(), + }; + } + + CalcResult::Number(result) + } + + // F.DIST(x, deg_freedom1, deg_freedom2, cumulative) + pub(crate) fn fn_f_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df1 = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let df2 = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + // Excel domain checks + if x < 0.0 { + return CalcResult::new_error(Error::NUM, cell, "x must be >= 0 in F.DIST".to_string()); + } + if df1 < 1.0 || df2 < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in F.DIST".to_string(), + ); + } + + let dist = match FisherSnedecor::new(df1, df2) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for F distribution".to_string(), + ) + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if result.is_nan() || result.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for F.DIST".to_string(), + ); + } + + CalcResult::Number(result) + } + + pub(crate) fn fn_f_dist_rt(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // F.DIST.RT(x, deg_freedom1, deg_freedom2) + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df1 = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let df2 = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "x must be >= 0 in F.DIST.RT".to_string(), + ); + } + if df1 < 1.0 || df2 < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in F.DIST.RT".to_string(), + ); + } + + let dist = match FisherSnedecor::new(df1, df2) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for F distribution".to_string(), + ) + } + }; + + // Right-tail probability: P(F > x) = 1 - CDF(x) + let result = 1.0 - dist.cdf(x); + + if result.is_nan() || result.is_infinite() || result < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for F.DIST.RT".to_string(), + ); + } + + CalcResult::Number(result) + } + + // F.INV(probability, deg_freedom1, deg_freedom2) + pub(crate) fn fn_f_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let df1 = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let df2 = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // probability < 0 or > 1 → #NUM! + if !(0.0..=1.0).contains(&p) { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in [0,1] in F.INV".to_string(), + ); + } + if df1 < 1.0 || df2 < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in F.INV".to_string(), + ); + } + + let dist = match FisherSnedecor::new(df1, df2) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for F distribution".to_string(), + ) + } + }; + + let x = dist.inverse_cdf(p); + if x.is_nan() || x.is_infinite() || x < 0.0 { + return CalcResult::new_error(Error::NUM, cell, "Invalid result for F.INV".to_string()); + } + + CalcResult::Number(x) + } + + // F.INV.RT(probability, deg_freedom1, deg_freedom2) + pub(crate) fn fn_f_inv_rt(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let df1 = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + let df2 = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if p <= 0.0 || p > 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in (0,1] in F.INV.RT".to_string(), + ); + } + if df1 < 1.0 || df2 < 1.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "degrees of freedom must be >= 1 in F.INV.RT".to_string(), + ); + } + + let dist = match FisherSnedecor::new(df1, df2) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for F distribution".to_string(), + ) + } + }; + + // p is right-tail: p = P(F > x) = 1 - CDF(x) + let x = dist.inverse_cdf(1.0 - p); + if x.is_nan() || x.is_infinite() || x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for F.INV.RT".to_string(), + ); + } + + CalcResult::Number(x) + } +} diff --git a/base/src/functions/statistical/gamma.rs b/base/src/functions/statistical/gamma.rs new file mode 100644 index 0000000..1180bd0 --- /dev/null +++ b/base/src/functions/statistical/gamma.rs @@ -0,0 +1,194 @@ +use statrs::distribution::{Continuous, ContinuousCDF, Gamma}; +use statrs::function::gamma::{gamma, ln_gamma}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_gamma(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(s) => return s, + }; + if x < 0.0 && x.floor() == x { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for Gamma function".to_string(), + }; + } + let result = gamma(x); + if result.is_nan() || result.is_infinite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for Gamma function".to_string(), + }; + } + CalcResult::Number(result) + } + + pub(crate) fn fn_gamma_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // GAMMA.DIST(x, alpha, beta, cumulative) + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let alpha = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let beta_scale = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "x must be >= 0 in GAMMA.DIST".to_string(), + ); + } + if alpha <= 0.0 || beta_scale <= 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "alpha and beta must be > 0 in GAMMA.DIST".to_string(), + ); + } + + let rate = 1.0 / beta_scale; + + let dist = match Gamma::new(alpha, rate) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Gamma distribution".to_string(), + ) + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if result.is_nan() || result.is_infinite() { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for GAMMA.DIST".to_string(), + ); + } + + CalcResult::Number(result) + } + + pub(crate) fn fn_gamma_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // GAMMA.INV(probability, alpha, beta) + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let alpha = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let beta_scale = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if !(0.0..=1.0).contains(&p) { + return CalcResult::new_error( + Error::NUM, + cell, + "probability must be in [0,1] in GAMMA.INV".to_string(), + ); + } + + if alpha <= 0.0 || beta_scale <= 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "alpha and beta must be > 0 in GAMMA.INV".to_string(), + ); + } + + let rate = 1.0 / beta_scale; + + let dist = match Gamma::new(alpha, rate) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Gamma distribution".to_string(), + ) + } + }; + + let x = dist.inverse_cdf(p); + if x.is_nan() || x.is_infinite() || x < 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid result for GAMMA.INV".to_string(), + ); + } + + CalcResult::Number(x) + } + + pub(crate) fn fn_gamma_ln(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(s) => return s, + }; + if x < 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for Gamma function".to_string(), + }; + } + let result = ln_gamma(x); + if result.is_nan() || result.is_infinite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for Gamma Ln function".to_string(), + }; + } + CalcResult::Number(result) + } + + pub(crate) fn fn_gamma_ln_precise( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + self.fn_gamma_ln(args, cell) + } +} diff --git a/base/src/functions/statistical/geomean.rs b/base/src/functions/statistical/geomean.rs new file mode 100644 index 0000000..b1ba362 --- /dev/null +++ b/base/src/functions/statistical/geomean.rs @@ -0,0 +1,87 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_geomean(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + let mut count = 0.0; + let mut product = 1.0; + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + count += 1.0; + product *= value; + } + CalcResult::Boolean(b) => { + if let Node::ReferenceKind { .. } = arg { + } else { + product *= if b { 1.0 } else { 0.0 }; + count += 1.0; + } + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + for row in left.row..(right.row + 1) { + for column in left.column..(right.column + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + count += 1.0; + product *= value; + } + error @ CalcResult::Error { .. } => return error, + CalcResult::Range { .. } => { + return CalcResult::new_error( + Error::ERROR, + cell, + "Unexpected Range".to_string(), + ); + } + _ => {} + } + } + } + } + error @ CalcResult::Error { .. } => return error, + CalcResult::String(s) => { + if let Node::ReferenceKind { .. } = arg { + // Do nothing + } else if let Ok(t) = s.parse::() { + product *= t; + count += 1.0; + } else { + return CalcResult::Error { + error: Error::VALUE, + origin: cell, + message: "Argument cannot be cast into number".to_string(), + }; + } + } + _ => { + // Ignore everything else + } + }; + } + if count == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Division by Zero".to_string(), + }; + } + CalcResult::Number(product.powf(1.0 / count)) + } +} diff --git a/base/src/functions/statistical/hypegeom.rs b/base/src/functions/statistical/hypegeom.rs new file mode 100644 index 0000000..1b7027a --- /dev/null +++ b/base/src/functions/statistical/hypegeom.rs @@ -0,0 +1,108 @@ +use statrs::distribution::{Discrete, DiscreteCDF, Hypergeometric}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // =HYPGEOM.DIST(sample_s, number_sample, population_s, number_pop, cumulative) + pub(crate) fn fn_hyp_geom_dist( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 5 { + return CalcResult::new_args_number_error(cell); + } + + // sample_s (number of successes in the sample) + let sample_s = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // number_sample (sample size) + let number_sample = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // population_s (number of successes in the population) + let population_s = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // number_pop (population size) + let number_pop = match self.get_number_no_bools(&args[3], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[4], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if sample_s < 0.0 || sample_s > f64::min(number_sample, population_s) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for HYPGEOM.DIST".to_string(), + }; + } + + if sample_s < f64::max(0.0, number_sample + population_s - number_pop) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for HYPGEOM.DIST".to_string(), + }; + } + + if number_sample <= 0.0 || number_sample > number_pop { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for HYPGEOM.DIST".to_string(), + }; + } + + if population_s <= 0.0 || population_s > number_pop { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for HYPGEOM.DIST".to_string(), + }; + } + + let n_pop = number_pop as u64; + let k_pop = population_s as u64; + let n_sample = number_sample as u64; + let k = sample_s as u64; + + let dist = match Hypergeometric::new(n_pop, k_pop, n_sample) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for hypergeometric distribution".to_string(), + ) + } + }; + + let prob = if cumulative { dist.cdf(k) } else { dist.pmf(k) }; + + if !prob.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for HYPGEOM.DIST".to_string(), + }; + } + + CalcResult::Number(prob) + } +} diff --git a/base/src/functions/statistical/if_ifs.rs b/base/src/functions/statistical/if_ifs.rs new file mode 100644 index 0000000..79bb89c --- /dev/null +++ b/base/src/functions/statistical/if_ifs.rs @@ -0,0 +1,337 @@ +use crate::constants::{LAST_COLUMN, LAST_ROW}; +use crate::expressions::types::CellReferenceIndex; +use crate::functions::util::build_criteria; +use crate::{ + calc_result::{CalcResult, Range}, + expressions::parser::Node, + expressions::token::Error, + model::Model, +}; + +impl Model { + pub(crate) fn fn_countif(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() == 2 { + let arguments = vec![args[0].clone(), args[1].clone()]; + self.fn_countifs(&arguments, cell) + } else { + CalcResult::new_args_number_error(cell) + } + } + + /// AVERAGEIF(criteria_range, criteria, [average_range]) + /// if average_rage is missing then criteria_range will be used + pub(crate) fn fn_averageif(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() == 2 { + let arguments = vec![args[0].clone(), args[0].clone(), args[1].clone()]; + self.fn_averageifs(&arguments, cell) + } else if args.len() == 3 { + let arguments = vec![args[2].clone(), args[0].clone(), args[1].clone()]; + self.fn_averageifs(&arguments, cell) + } else { + CalcResult::new_args_number_error(cell) + } + } + + // FIXME: This function shares a lot of code with apply_ifs. Can we merge them? + pub(crate) fn fn_countifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let args_count = args.len(); + if args_count < 2 || !args_count.is_multiple_of(2) { + return CalcResult::new_args_number_error(cell); + } + + let case_count = args_count / 2; + // NB: this is a beautiful example of the borrow checker + // The order of these two definitions cannot be swapped. + let mut criteria = Vec::new(); + let mut fn_criteria = Vec::new(); + let ranges = &mut Vec::new(); + for case_index in 0..case_count { + let criterion = self.evaluate_node_in_context(&args[case_index * 2 + 1], cell); + criteria.push(criterion); + // NB: We cannot do: + // fn_criteria.push(build_criteria(&criterion)); + // because criterion doesn't live long enough + let result = self.evaluate_node_in_context(&args[case_index * 2], cell); + if result.is_error() { + return result; + } + if let CalcResult::Range { left, right } = result { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + // TODO test ranges are of the same size as sum_range + ranges.push(Range { left, right }); + } else { + return CalcResult::new_error(Error::VALUE, cell, "Expected a range".to_string()); + } + } + for criterion in criteria.iter() { + fn_criteria.push(build_criteria(criterion)); + } + + let mut total = 0.0; + let first_range = &ranges[0]; + let left_row = first_range.left.row; + let left_column = first_range.left.column; + let right_row = first_range.right.row; + let right_column = first_range.right.column; + + let dimension = match self.workbook.worksheet(first_range.left.sheet) { + Ok(s) => s.dimension(), + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", first_range.left.sheet), + ) + } + }; + let max_row = dimension.max_row; + let max_column = dimension.max_column; + + let open_row = left_row == 1 && right_row == LAST_ROW; + let open_column = left_column == 1 && right_column == LAST_COLUMN; + + for row in left_row..right_row + 1 { + if open_row && row > max_row { + // If the row is larger than the max row in the sheet then all cells are empty. + // We compute it only once + let mut is_true = true; + for fn_criterion in fn_criteria.iter() { + if !fn_criterion(&CalcResult::EmptyCell) { + is_true = false; + break; + } + } + if is_true { + total += ((LAST_ROW - max_row) * (right_column - left_column + 1)) as f64; + } + break; + } + for column in left_column..right_column + 1 { + if open_column && column > max_column { + // If the column is larger than the max column in the sheet then all cells are empty. + // We compute it only once + let mut is_true = true; + for fn_criterion in fn_criteria.iter() { + if !fn_criterion(&CalcResult::EmptyCell) { + is_true = false; + break; + } + } + if is_true { + total += (LAST_COLUMN - max_column) as f64; + } + break; + } + let mut is_true = true; + for case_index in 0..case_count { + // We check if value in range n meets criterion n + let range = &ranges[case_index]; + let fn_criterion = &fn_criteria[case_index]; + let value = self.evaluate_cell(CellReferenceIndex { + sheet: range.left.sheet, + row: range.left.row + row - first_range.left.row, + column: range.left.column + column - first_range.left.column, + }); + if !fn_criterion(&value) { + is_true = false; + break; + } + } + if is_true { + total += 1.0; + } + } + } + CalcResult::Number(total) + } + + pub(crate) fn apply_ifs( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + mut apply: F, + ) -> Result<(), CalcResult> + where + F: FnMut(f64), + { + let args_count = args.len(); + if args_count < 3 || args_count.is_multiple_of(2) { + return Err(CalcResult::new_args_number_error(cell)); + } + let arg_0 = self.evaluate_node_in_context(&args[0], cell); + if arg_0.is_error() { + return Err(arg_0); + } + let sum_range = if let CalcResult::Range { left, right } = arg_0 { + if left.sheet != right.sheet { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + )); + } + Range { left, right } + } else { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Expected a range".to_string(), + )); + }; + + let case_count = (args_count - 1) / 2; + // NB: this is a beautiful example of the borrow checker + // The order of these two definitions cannot be swapped. + let mut criteria = Vec::new(); + let mut fn_criteria = Vec::new(); + let ranges = &mut Vec::new(); + for case_index in 1..=case_count { + let criterion = self.evaluate_node_in_context(&args[case_index * 2], cell); + // NB: criterion might be an error. That's ok + criteria.push(criterion); + // NB: We cannot do: + // fn_criteria.push(build_criteria(&criterion)); + // because criterion doesn't live long enough + let result = self.evaluate_node_in_context(&args[case_index * 2 - 1], cell); + if result.is_error() { + return Err(result); + } + if let CalcResult::Range { left, right } = result { + if left.sheet != right.sheet { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + )); + } + // TODO test ranges are of the same size as sum_range + ranges.push(Range { left, right }); + } else { + return Err(CalcResult::new_error( + Error::VALUE, + cell, + "Expected a range".to_string(), + )); + } + } + for criterion in criteria.iter() { + fn_criteria.push(build_criteria(criterion)); + } + + let left_row = sum_range.left.row; + let left_column = sum_range.left.column; + let mut right_row = sum_range.right.row; + let mut right_column = sum_range.right.column; + + if left_row == 1 && right_row == LAST_ROW { + right_row = match self.workbook.worksheet(sum_range.left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return Err(CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", sum_range.left.sheet), + )); + } + }; + } + if left_column == 1 && right_column == LAST_COLUMN { + right_column = match self.workbook.worksheet(sum_range.left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return Err(CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", sum_range.left.sheet), + )); + } + }; + } + + for row in left_row..right_row + 1 { + for column in left_column..right_column + 1 { + let mut is_true = true; + for case_index in 0..case_count { + // We check if value in range n meets criterion n + let range = &ranges[case_index]; + let fn_criterion = &fn_criteria[case_index]; + let value = self.evaluate_cell(CellReferenceIndex { + sheet: range.left.sheet, + row: range.left.row + row - sum_range.left.row, + column: range.left.column + column - sum_range.left.column, + }); + if !fn_criterion(&value) { + is_true = false; + break; + } + } + if is_true { + let v = self.evaluate_cell(CellReferenceIndex { + sheet: sum_range.left.sheet, + row, + column, + }); + match v { + CalcResult::Number(n) => apply(n), + CalcResult::Error { .. } => return Err(v), + _ => {} + } + } + } + } + Ok(()) + } + + pub(crate) fn fn_averageifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let mut total = 0.0; + let mut count = 0.0; + + let average = |value: f64| { + total += value; + count += 1.0; + }; + if let Err(e) = self.apply_ifs(args, cell, average) { + return e; + } + + if count == 0.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "division by 0".to_string(), + }; + } + CalcResult::Number(total / count) + } + + pub(crate) fn fn_minifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let mut min = f64::INFINITY; + let apply_min = |value: f64| min = value.min(min); + if let Err(e) = self.apply_ifs(args, cell, apply_min) { + return e; + } + + if min.is_infinite() { + min = 0.0; + } + CalcResult::Number(min) + } + + pub(crate) fn fn_maxifs(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let mut max = -f64::INFINITY; + let apply_max = |value: f64| max = value.max(max); + if let Err(e) = self.apply_ifs(args, cell, apply_max) { + return e; + } + if max.is_infinite() { + max = 0.0; + } + CalcResult::Number(max) + } +} diff --git a/base/src/functions/statistical/log_normal.rs b/base/src/functions/statistical/log_normal.rs new file mode 100644 index 0000000..5a17e1e --- /dev/null +++ b/base/src/functions/statistical/log_normal.rs @@ -0,0 +1,124 @@ +use statrs::distribution::{Continuous, ContinuousCDF, LogNormal}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_log_norm_dist( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let mean = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + // Excel domain checks + if x <= 0.0 || std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.DIST".to_string(), + }; + } + + let dist = match LogNormal::new(mean, std_dev) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.DIST".to_string(), + } + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } + + pub(crate) fn fn_log_norm_inv( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + use statrs::distribution::{ContinuousCDF, LogNormal}; + + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let mean = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // Excel domain checks + if p <= 0.0 || p >= 1.0 || std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.INV".to_string(), + }; + } + + let dist = match LogNormal::new(mean, std_dev) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.INV".to_string(), + } + } + }; + + let result = dist.inverse_cdf(p); + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameter for LOGNORM.INV".to_string(), + }; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/mod.rs b/base/src/functions/statistical/mod.rs new file mode 100644 index 0000000..cb08021 --- /dev/null +++ b/base/src/functions/statistical/mod.rs @@ -0,0 +1,23 @@ +mod beta; +mod binom; +mod chisq; +mod count_and_average; +mod covariance; +mod devsq; +mod exponential; +mod fisher; +mod gamma; +mod geomean; +mod hypegeom; +mod if_ifs; +mod log_normal; +mod normal; +mod pearson; +mod phi; +mod poisson; +mod standard_dev; +mod standardize; +mod t_dist; +mod variance; +mod weibull; +mod z_test; diff --git a/base/src/functions/statistical/normal.rs b/base/src/functions/statistical/normal.rs new file mode 100644 index 0000000..d49347c --- /dev/null +++ b/base/src/functions/statistical/normal.rs @@ -0,0 +1,325 @@ +use statrs::distribution::{Continuous, ContinuousCDF, Normal, StudentsT}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // NORM.DIST(x, mean, standard_dev, cumulative) + pub(crate) fn fn_norm_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let mean = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + // Excel: standard_dev must be > 0 + if std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "standard_dev must be > 0 in NORM.DIST".to_string(), + }; + } + + let dist = match Normal::new(mean, std_dev) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for NORM.DIST".to_string(), + } + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for NORM.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } + + // NORM.INV(probability, mean, standard_dev) + pub(crate) fn fn_norm_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let mean = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if p <= 0.0 || p >= 1.0 || std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for NORM.INV".to_string(), + }; + } + + let dist = match Normal::new(mean, std_dev) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for NORM.INV".to_string(), + } + } + }; + + let x = dist.inverse_cdf(p); + + if !x.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for NORM.INV".to_string(), + }; + } + + CalcResult::Number(x) + } + + // NORM.S.DIST(z, cumulative) + pub(crate) fn fn_norm_s_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let z = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let cumulative = match self.get_boolean(&args[1], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + let dist = match Normal::new(0.0, 1.0) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::ERROR, + origin: cell, + message: "Failed to construct standard normal distribution".to_string(), + } + } + }; + + let result = if cumulative { dist.cdf(z) } else { dist.pdf(z) }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for NORM.S.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } + + // NORM.S.INV(probability) + pub(crate) fn fn_norm_s_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if p <= 0.0 || p >= 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "probability must be in (0,1) in NORM.S.INV".to_string(), + }; + } + + let dist = match Normal::new(0.0, 1.0) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::ERROR, + origin: cell, + message: "Failed to construct standard normal distribution".to_string(), + } + } + }; + + let z = dist.inverse_cdf(p); + + if !z.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for NORM.S.INV".to_string(), + }; + } + + CalcResult::Number(z) + } + + pub(crate) fn fn_confidence_norm( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let alpha = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let size = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.floor(), + Err(e) => return e, + }; + + if alpha <= 0.0 || alpha >= 1.0 || std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for CONFIDENCE.NORM".to_string(), + }; + } + if size < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Sample size must be at least 1".to_string(), + }; + } + + let normal = match Normal::new(0.0, 1.0) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + "Failed to construct normal distribution".to_string(), + ) + } + }; + + let quantile = normal.inverse_cdf(1.0 - alpha / 2.0); + if !quantile.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid quantile for CONFIDENCE.NORM".to_string(), + }; + } + + let margin = quantile * std_dev / size.sqrt(); + CalcResult::Number(margin) + } + + pub(crate) fn fn_confidence_t( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let alpha = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let size = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // Domain checks + if alpha <= 0.0 || alpha >= 1.0 || std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for CONFIDENCE.T".to_string(), + }; + } + + // Need at least 2 observations so df = n - 1 > 0 + if size < 2.0 { + return CalcResult::Error { + error: Error::DIV, + origin: cell, + message: "Sample size must be at least 2".to_string(), + }; + } + + let df = size - 1.0; + + let t_dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + "Failed to construct Student's t distribution".to_string(), + ) + } + }; + + // Two-sided CI => use 1 - alpha/2 + let t_crit = t_dist.inverse_cdf(1.0 - alpha / 2.0); + if !t_crit.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid quantile for CONFIDENCE.T".to_string(), + }; + } + + let margin = t_crit * std_dev / size.sqrt(); + CalcResult::Number(margin) + } +} diff --git a/base/src/functions/statistical/pearson.rs b/base/src/functions/statistical/pearson.rs new file mode 100644 index 0000000..5b3ebd3 --- /dev/null +++ b/base/src/functions/statistical/pearson.rs @@ -0,0 +1,235 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::functions::statistical::chisq::is_same_shape_or_1d; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // PEARSON(array1, array2) + pub(crate) fn fn_pearson(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let left_arg = self.evaluate_node_in_context(&args[0], cell); + let right_arg = self.evaluate_node_in_context(&args[1], cell); + + let (values_left, values_right) = match (left_arg, right_arg) { + ( + CalcResult::Range { + left: l1, + right: r1, + }, + CalcResult::Range { + left: l2, + right: r2, + }, + ) => { + if l1.sheet != l2.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let rows1 = r1.row - l1.row + 1; + let cols1 = r1.column - l1.column + 1; + let rows2 = r2.row - l2.row + 1; + let cols2 = r2.column - l2.column + 1; + + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges must be of the same shape".to_string(), + ); + } + + let values_left = match self.values_from_range(l1, r1) { + Err(error) => return error, + Ok(v) => v, + }; + let values_right = match self.values_from_range(l2, r2) { + Err(error) => return error, + Ok(v) => v, + }; + + (values_left, values_right) + } + ( + CalcResult::Array(left), + CalcResult::Range { + left: l2, + right: r2, + }, + ) => { + let rows2 = r2.row - l2.row + 1; + let cols2 = r2.column - l2.column + 1; + + let rows1 = left.len() as i32; + let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; + + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Array and range must be of the same shape".to_string(), + ); + } + + let values_left = match self.values_from_array(left) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + Ok(v) => v, + }; + let values_right = match self.values_from_range(l2, r2) { + Err(error) => return error, + Ok(v) => v, + }; + + (values_left, values_right) + } + ( + CalcResult::Range { + left: l1, + right: r1, + }, + CalcResult::Array(right), + ) => { + let rows1 = r1.row - l1.row + 1; + let cols1 = r1.column - l1.column + 1; + + let rows2 = right.len() as i32; + let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; + + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Range and array must be of the same shape".to_string(), + ); + } + + let values_left = match self.values_from_range(l1, r1) { + Err(error) => return error, + Ok(v) => v, + }; + let values_right = match self.values_from_array(right) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + Ok(v) => v, + }; + + (values_left, values_right) + } + (CalcResult::Array(left), CalcResult::Array(right)) => { + let rows1 = left.len() as i32; + let rows2 = right.len() as i32; + let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; + let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; + + if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { + return CalcResult::new_error( + Error::VALUE, + cell, + "Arrays must be of the same shape".to_string(), + ); + } + + let values_left = match self.values_from_array(left) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + Ok(v) => v, + }; + let values_right = match self.values_from_array(right) { + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + Ok(v) => v, + }; + + (values_left, values_right) + } + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Both arguments must be ranges or arrays".to_string(), + ); + } + }; + + // Flatten into (x, y) pairs, skipping non-numeric entries (None) + let mut n: f64 = 0.0; + let mut sum_x = 0.0; + let mut sum_y = 0.0; + let mut sum_x2 = 0.0; + let mut sum_y2 = 0.0; + let mut sum_xy = 0.0; + + let len = values_left.len().min(values_right.len()); + for i in 0..len { + match (values_left[i], values_right[i]) { + (Some(x), Some(y)) => { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_y2 += y * y; + sum_xy += x * y; + } + _ => { + // Ignore pairs where at least one side is non-numeric + } + } + } + + if n < 2.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "PEARSON requires at least two numeric pairs".to_string(), + ); + } + + // Pearson correlation: + // r = [ n*Σxy - (Σx)(Σy) ] / sqrt( [n*Σx² - (Σx)²] [n*Σy² - (Σy)²] ) + let num = n * sum_xy - sum_x * sum_y; + let denom_x = n * sum_x2 - sum_x * sum_x; + let denom_y = n * sum_y2 - sum_y * sum_y; + + if denom_x.abs() < 1e-15 || denom_y.abs() < 1e-15 { + // Zero variance in at least one series + return CalcResult::new_error( + Error::DIV, + cell, + "PEARSON cannot be computed when one series has zero variance".to_string(), + ); + } + + let denom = (denom_x * denom_y).sqrt(); + let r = num / denom; + + CalcResult::Number(r) + } +} diff --git a/base/src/functions/statistical/phi.rs b/base/src/functions/statistical/phi.rs new file mode 100644 index 0000000..f4be299 --- /dev/null +++ b/base/src/functions/statistical/phi.rs @@ -0,0 +1,21 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{calc_result::CalcResult, expressions::parser::Node, model::Model}; + +impl Model { + // PHI(x) = standard normal PDF at x + pub(crate) fn fn_phi(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 1 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + // Standard normal PDF: (1 / sqrt(2π)) * exp(-x^2 / 2) + let result = (-(x * x) / 2.0).exp() / (2.0 * std::f64::consts::PI).sqrt(); + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/poisson.rs b/base/src/functions/statistical/poisson.rs new file mode 100644 index 0000000..9388dba --- /dev/null +++ b/base/src/functions/statistical/poisson.rs @@ -0,0 +1,94 @@ +use statrs::distribution::{Discrete, DiscreteCDF, Poisson}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // =POISSON.DIST(x, mean, cumulative) + pub(crate) fn fn_poisson_dist( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + // x + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + // mean (lambda) + let lambda = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[2], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if x < 0.0 || lambda < 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for POISSON.DIST".to_string(), + }; + } + + // Guard against insane k for u64 + if x < 0.0 || x > (u64::MAX as f64) { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for POISSON.DIST".to_string(), + }; + } + + let k = x as u64; + + // Special-case lambda = 0: degenerate distribution at 0 + if lambda == 0.0 { + let result = if cumulative { + // For x >= 0, P(X <= x) = 1 + 1.0 + } else { + // P(X = 0) = 1, P(X = k>0) = 0 + if k == 0 { + 1.0 + } else { + 0.0 + } + }; + return CalcResult::Number(result); + } + + let dist = match Poisson::new(lambda) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for POISSON.DIST".to_string(), + } + } + }; + + let prob = if cumulative { dist.cdf(k) } else { dist.pmf(k) }; + + if !prob.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for POISSON.DIST".to_string(), + }; + } + + CalcResult::Number(prob) + } +} diff --git a/base/src/functions/statistical/standard_dev.rs b/base/src/functions/statistical/standard_dev.rs new file mode 100644 index 0000000..85a87cd --- /dev/null +++ b/base/src/functions/statistical/standard_dev.rs @@ -0,0 +1,519 @@ +use crate::constants::{LAST_COLUMN, LAST_ROW}; +use crate::expressions::parser::ArrayNode; +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_stdev_p(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + + if count == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "STDEV.P with no numeric data".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / n; + + // clamp tiny negatives from FP noise + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var.sqrt()) + } + + pub(crate) fn fn_stdev_s(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + + if count <= 1 { + return CalcResult::new_error( + Error::DIV, + cell, + "STDEV.S requires at least two numeric values".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / (n - 1.0); + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var.sqrt()) + } + + pub(crate) fn fn_stdeva(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::String(_) => { + accumulate(&mut sum, &mut sumsq, &mut count, 0.0); + } + CalcResult::Boolean(value) => { + let val = if value { 1.0 } else { 0.0 }; + accumulate(&mut sum, &mut sumsq, &mut count, val); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric for now + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + + if count <= 1 { + return CalcResult::new_error( + Error::DIV, + cell, + "STDEVA requires at least two numeric values".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / (n - 1.0); + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var.sqrt()) + } + + pub(crate) fn fn_stdevpa(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::String(_) => { + accumulate(&mut sum, &mut sumsq, &mut count, 0.0); + } + CalcResult::Boolean(value) => { + let val = if value { 1.0 } else { 0.0 }; + accumulate(&mut sum, &mut sumsq, &mut count, val); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric for now + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + + if count == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "STDEVPA with no numeric data".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / n; + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var.sqrt()) + } +} diff --git a/base/src/functions/statistical/standardize.rs b/base/src/functions/statistical/standardize.rs new file mode 100644 index 0000000..447a8bf --- /dev/null +++ b/base/src/functions/statistical/standardize.rs @@ -0,0 +1,38 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_standardize(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // STANDARDIZE(x, mean, standard_dev) + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let mean = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + let std_dev = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + if std_dev <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "standard_dev must be > 0 in STANDARDIZE".to_string(), + }; + } + + let z = (x - mean) / std_dev; + + CalcResult::Number(z) + } +} diff --git a/base/src/functions/statistical/t_dist.rs b/base/src/functions/statistical/t_dist.rs new file mode 100644 index 0000000..beb9b44 --- /dev/null +++ b/base/src/functions/statistical/t_dist.rs @@ -0,0 +1,576 @@ +use statrs::distribution::{Continuous, ContinuousCDF, StudentsT}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +fn mean(xs: &[f64]) -> f64 { + let n = xs.len(); + if n == 0 { + return 0.0; + } + let mut s = 0.0; + for &x in xs { + s += x; + } + s / (n as f64) +} + +fn sample_var(xs: &[f64]) -> f64 { + let n = xs.len(); + if n < 2 { + return 0.0; + } + let m = mean(xs); + let mut s = 0.0; + for &x in xs { + let d = x - m; + s += d * d; + } + s / ((n - 1) as f64) +} + +enum TTestType { + Paired, + TwoSampleEqualVar, + TwoSampleUnequalVar, +} + +enum TTestTails { + OneTailed, + TwoTailed, +} + +impl Model { + // T.DIST(x, deg_freedom, cumulative) + pub(crate) fn fn_t_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 3 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[2], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if df < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "deg_freedom must be >= 1 in T.DIST".to_string(), + }; + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.DIST".to_string(), + } + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for T.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } + + // T.DIST.2T(x, deg_freedom) + pub(crate) fn fn_t_dist_2t(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if x < 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "x must be >= 0 in T.DIST.2T".to_string(), + }; + } + + if df < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "deg_freedom must be >= 1 in T.DIST.2T".to_string(), + }; + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.DIST.2T".to_string(), + } + } + }; + + let upper_tail = 1.0 - dist.cdf(x); + let mut result = 2.0 * upper_tail; + + result = result.clamp(0.0, 1.0); + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for T.DIST.2T".to_string(), + }; + } + + CalcResult::Number(result) + } + + // T.DIST.RT(x, deg_freedom) + pub(crate) fn fn_t_dist_rt(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if df < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "deg_freedom must be >= 1 in T.DIST.RT".to_string(), + }; + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.DIST.RT".to_string(), + } + } + }; + + let result = 1.0 - dist.cdf(x); + + if !result.is_finite() || result < 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for T.DIST.RT".to_string(), + }; + } + + CalcResult::Number(result) + } + + // T.INV(probability, deg_freedom) + pub(crate) fn fn_t_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if p <= 0.0 || p >= 1.0 || df < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.INV".to_string(), + }; + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.INV".to_string(), + } + } + }; + + let x = dist.inverse_cdf(p); + + if !x.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for T.INV".to_string(), + }; + } + + CalcResult::Number(x) + } + + // T.INV.2T(probability, deg_freedom) + pub(crate) fn fn_t_inv_2t(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 2 { + return CalcResult::new_args_number_error(cell); + } + + let p = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let df = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f.trunc(), + Err(e) => return e, + }; + + if p <= 0.0 || p > 1.0 || df < 1.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.INV.2T".to_string(), + }; + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for T.INV.2T".to_string(), + } + } + }; + + // Two-sided: F(x) = 1 - p/2 + let target_cdf = 1.0 - p / 2.0; + let x = dist.inverse_cdf(target_cdf); + + if !x.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for T.INV.2T".to_string(), + }; + } + + CalcResult::Number(x.abs()) + } + + // T.TEST(array1, array2, tails, type) + pub(crate) fn fn_t_test(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let values1_opts = match self.evaluate_node_in_context(&args[0], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in first array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "First argument must be a range or array".to_string(), + ); + } + }; + + let values2_opts = match self.evaluate_node_in_context(&args[1], cell) { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(a) => match self.values_from_array(a) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in second array: {:?}", error), + ); + } + }, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Second argument must be a range or array".to_string(), + ); + } + }; + + let tails = match self.get_number(&args[2], cell) { + Ok(f) => { + let tf = f.trunc(); + if tf == 1.0 { + TTestTails::OneTailed + } else if tf == 2.0 { + TTestTails::TwoTailed + } else { + return CalcResult::new_error( + Error::NUM, + cell, + "tails must be 1 or 2".to_string(), + ); + } + } + Err(e) => return e, + }; + let test_type = match self.get_number(&args[3], cell) { + Ok(f) => { + let tf = f.trunc(); + match tf { + 1.0 => TTestType::Paired, + 2.0 => TTestType::TwoSampleEqualVar, + 3.0 => TTestType::TwoSampleUnequalVar, + _ => { + return CalcResult::new_error( + Error::NUM, + cell, + "type must be 1, 2, or 3".to_string(), + ); + } + } + } + Err(e) => return e, + }; + + // keep only numeric entries, ignore non-numeric (Option::None) + let values1: Vec = values1_opts.into_iter().flatten().collect(); + let values2: Vec = values2_opts.into_iter().flatten().collect(); + + let n1 = values1.len(); + let n2 = values2.len(); + + if n1 == 0 || n2 == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "T.TEST requires non-empty samples".to_string(), + ); + } + + let (t_stat, df) = match test_type { + TTestType::Paired => { + if n1 != n2 { + return CalcResult::new_error( + Error::NA, + cell, + "For paired T.TEST, both samples must have the same length".to_string(), + ); + } + if n1 < 2 { + return CalcResult::new_error( + Error::DIV, + cell, + "Paired T.TEST requires at least two pairs".to_string(), + ); + } + + let mut diffs = Vec::with_capacity(n1); + for i in 0..n1 { + diffs.push(values1[i] - values2[i]); + } + + let nd = diffs.len(); + let md = mean(&diffs); + let vd = sample_var(&diffs); + if vd <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Zero variance in paired T.TEST".to_string(), + ); + } + let sd = vd.sqrt(); + let t_stat = md / (sd / (nd as f64).sqrt()); + let df = (nd - 1) as f64; + (t_stat, df) + } + + // 2: two-sample, equal variance (homoscedastic) + TTestType::TwoSampleEqualVar => { + if n1 < 2 || n2 < 2 { + return CalcResult::new_error( + Error::DIV, + cell, + "Two-sample T.TEST type 2 requires at least two values in each sample" + .to_string(), + ); + } + + let m1 = mean(&values1); + let m2 = mean(&values2); + let v1 = sample_var(&values1); + let v2 = sample_var(&values2); + + let df_i = (n1 + n2 - 2) as i32; + if df_i <= 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Degrees of freedom must be positive in T.TEST type 2".to_string(), + ); + } + let df = df_i as f64; + + let sp2 = (((n1 - 1) as f64) * v1 + ((n2 - 1) as f64) * v2) / df; // pooled variance + + if sp2 <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Zero pooled variance in T.TEST type 2".to_string(), + ); + } + + let denom = (sp2 * (1.0 / (n1 as f64) + 1.0 / (n2 as f64))).sqrt(); + if denom == 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Zero denominator in T.TEST type 2".to_string(), + ); + } + + let t_stat = (m1 - m2) / denom; + (t_stat, df) + } + + // two-sample, unequal variance (Welch) + TTestType::TwoSampleUnequalVar => { + if n1 < 2 || n2 < 2 { + return CalcResult::new_error( + Error::DIV, + cell, + "Two-sample T.TEST type 3 requires at least two values in each sample" + .to_string(), + ); + } + + let m1 = mean(&values1); + let m2 = mean(&values2); + let v1 = sample_var(&values1); + let v2 = sample_var(&values2); + + let s1n = v1 / (n1 as f64); + let s2n = v2 / (n2 as f64); + let denom = (s1n + s2n).sqrt(); + if denom == 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Zero denominator in T.TEST type 3".to_string(), + ); + } + + let t_stat = (m1 - m2) / denom; + + let num_df = (s1n + s2n).powi(2); + let den_df = (s1n * s1n) / ((n1 - 1) as f64) + (s2n * s2n) / ((n2 - 1) as f64); + if den_df == 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Invalid degrees of freedom in T.TEST type 3".to_string(), + ); + } + let df = num_df / den_df; + (t_stat, df) + } + }; + + if df <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Degrees of freedom must be positive in T.TEST".to_string(), + ); + } + + let dist = match StudentsT::new(0.0, 1.0, df) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Invalid parameters for Student's t distribution".to_string(), + ); + } + }; + + let t_abs = t_stat.abs(); + let cdf = dist.cdf(t_abs); + + let mut p = match tails { + TTestTails::OneTailed => 1.0 - cdf, + TTestTails::TwoTailed => 2.0 * (1.0 - cdf), + }; + + // clamp tiny fp noise + if p < 0.0 && p > -1e-15 { + p = 0.0; + } + if p > 1.0 && p < 1.0 + 1e-15 { + p = 1.0; + } + + CalcResult::Number(p) + } +} diff --git a/base/src/functions/statistical/variance.rs b/base/src/functions/statistical/variance.rs new file mode 100644 index 0000000..564b9dc --- /dev/null +++ b/base/src/functions/statistical/variance.rs @@ -0,0 +1,518 @@ +use crate::constants::{LAST_COLUMN, LAST_ROW}; +use crate::expressions::parser::ArrayNode; +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + pub(crate) fn fn_var_p(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + + if count == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "VAR.P with no numeric data".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / n; + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var) + } + + pub(crate) fn fn_var_s(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric + } + } + } + + if count <= 1 { + return CalcResult::new_error( + Error::DIV, + cell, + "VAR.S requires at least two numeric values".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / (n - 1.0); + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var) + } + + pub(crate) fn fn_vara(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..=row2 { + for column in column1..=column2 { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::String(_) => { + accumulate(&mut sum, &mut sumsq, &mut count, 0.0); + } + CalcResult::Boolean(value) => { + let val = if value { 1.0 } else { 0.0 }; + accumulate(&mut sum, &mut sumsq, &mut count, val); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now (A semantics to be added) + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric for now (A semantics to be added) + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now (A semantics to be added) + } + } + } + + if count <= 1 { + return CalcResult::new_error( + Error::DIV, + cell, + "VARA requires at least two numeric values".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / (n - 1.0); + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var) + } + + pub(crate) fn fn_varpa(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + if args.is_empty() { + return CalcResult::new_args_number_error(cell); + } + + let mut sum = 0.0; + let mut sumsq = 0.0; + let mut count: u64 = 0; + + #[inline] + fn accumulate(sum: &mut f64, sumsq: &mut f64, count: &mut u64, value: f64) { + *sum += value; + *sumsq += value * value; + *count += 1; + } + + for arg in args { + match self.evaluate_node_in_context(arg, cell) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::Range { left, right } => { + if left.sheet != right.sheet { + return CalcResult::new_error( + Error::VALUE, + cell, + "Ranges are in different sheets".to_string(), + ); + } + + let row1 = left.row; + let mut row2 = right.row; + let column1 = left.column; + let mut column2 = right.column; + + if row1 == 1 && row2 == LAST_ROW { + row2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_row, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + if column1 == 1 && column2 == LAST_COLUMN { + column2 = match self.workbook.worksheet(left.sheet) { + Ok(s) => s.dimension().max_column, + Err(_) => { + return CalcResult::new_error( + Error::ERROR, + cell, + format!("Invalid worksheet index: '{}'", left.sheet), + ); + } + }; + } + + for row in row1..row2 + 1 { + for column in column1..(column2 + 1) { + match self.evaluate_cell(CellReferenceIndex { + sheet: left.sheet, + row, + column, + }) { + CalcResult::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + CalcResult::String(_) => { + accumulate(&mut sum, &mut sumsq, &mut count, 0.0); + } + CalcResult::Boolean(value) => { + let val = if value { 1.0 } else { 0.0 }; + accumulate(&mut sum, &mut sumsq, &mut count, val); + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + } + } + CalcResult::Array(array) => { + for row in array { + for value in row { + match value { + ArrayNode::Number(value) => { + accumulate(&mut sum, &mut sumsq, &mut count, value); + } + ArrayNode::Error(error) => { + return CalcResult::Error { + error, + origin: cell, + message: "Error in array".to_string(), + } + } + _ => { + // ignore non-numeric for now + } + } + } + } + } + error @ CalcResult::Error { .. } => return error, + _ => { + // ignore non-numeric for now + } + } + } + + if count == 0 { + return CalcResult::new_error( + Error::DIV, + cell, + "VARPA with no numeric data".to_string(), + ); + } + + let n = count as f64; + let mut var = (sumsq - (sum * sum) / n) / n; + + if var < 0.0 && var > -1e-12 { + var = 0.0; + } + + CalcResult::Number(var) + } +} diff --git a/base/src/functions/statistical/weibull.rs b/base/src/functions/statistical/weibull.rs new file mode 100644 index 0000000..95947ec --- /dev/null +++ b/base/src/functions/statistical/weibull.rs @@ -0,0 +1,71 @@ +use statrs::distribution::{Continuous, ContinuousCDF, Weibull}; + +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // WEIBULL.DIST(x, alpha, beta, cumulative) + pub(crate) fn fn_weibull_dist( + &mut self, + args: &[Node], + cell: CellReferenceIndex, + ) -> CalcResult { + if args.len() != 4 { + return CalcResult::new_args_number_error(cell); + } + + let x = match self.get_number_no_bools(&args[0], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let alpha = match self.get_number_no_bools(&args[1], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let beta = match self.get_number_no_bools(&args[2], cell) { + Ok(f) => f, + Err(e) => return e, + }; + + let cumulative = match self.get_boolean(&args[3], cell) { + Ok(b) => b, + Err(e) => return e, + }; + + if x < 0.0 || alpha <= 0.0 || beta <= 0.0 { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for WEIBULL.DIST".to_string(), + }; + } + + // statrs::Weibull: shape = k (alpha), scale = lambda (beta) + let dist = match Weibull::new(alpha, beta) { + Ok(d) => d, + Err(_) => { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid parameters for WEIBULL.DIST".to_string(), + } + } + }; + + let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; + + if !result.is_finite() { + return CalcResult::Error { + error: Error::NUM, + origin: cell, + message: "Invalid result for WEIBULL.DIST".to_string(), + }; + } + + CalcResult::Number(result) + } +} diff --git a/base/src/functions/statistical/z_test.rs b/base/src/functions/statistical/z_test.rs new file mode 100644 index 0000000..c1c1f87 --- /dev/null +++ b/base/src/functions/statistical/z_test.rs @@ -0,0 +1,171 @@ +use statrs::distribution::{ContinuousCDF, Normal}; + +use crate::expressions::token::Error; +use crate::expressions::types::CellReferenceIndex; +use crate::{calc_result::CalcResult, expressions::parser::Node, model::Model}; + +impl Model { + // Z.TEST(array, x, [sigma]) + pub(crate) fn fn_z_test(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + // 2 or 3 arguments + if args.len() < 2 || args.len() > 3 { + return CalcResult::new_args_number_error(cell); + } + + let array_arg = self.evaluate_node_in_context(&args[0], cell); + + // Flatten first argument into Vec> (numeric / non-numeric) + let values = match array_arg { + CalcResult::Range { left, right } => match self.values_from_range(left, right) { + Ok(v) => v, + Err(error) => return error, + }, + CalcResult::Array(array) => match self.values_from_array(array) { + Ok(v) => v, + Err(error) => { + return CalcResult::new_error( + Error::VALUE, + cell, + format!("Error in array argument: {:?}", error), + ); + } + }, + CalcResult::Number(v) => vec![Some(v)], + error @ CalcResult::Error { .. } => return error, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Z.TEST first argument must be a range or array".to_string(), + ); + } + }; + + // Collect basic stats on numeric entries + let mut sum = 0.0; + let mut count: u64 = 0; + + for x in values.iter().flatten() { + sum += x; + count += 1; + } + + // Excel: if array has no numeric values -> #N/A + if count == 0 { + return CalcResult::new_error( + Error::NA, + cell, + "Z.TEST array has no numeric data".to_string(), + ); + } + + let n = count as f64; + let mean = sum / n; + + // x argument (hypothesized population mean) + let x_value = match self.evaluate_node_in_context(&args[1], cell) { + CalcResult::Number(v) => v, + error @ CalcResult::Error { .. } => return error, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Z.TEST second argument (x) must be numeric".to_string(), + ); + } + }; + + // Optional sigma + let mut sigma: Option = None; + if args.len() == 3 { + match self.evaluate_node_in_context(&args[2], cell) { + CalcResult::Number(v) => { + if v == 0.0 { + return CalcResult::new_error( + Error::NUM, + cell, + "Z.TEST sigma cannot be zero".to_string(), + ); + } + sigma = Some(v); + } + error @ CalcResult::Error { .. } => return error, + _ => { + return CalcResult::new_error( + Error::VALUE, + cell, + "Z.TEST sigma (third argument) must be numeric".to_string(), + ); + } + } + } + + // If sigma omitted, use sample standard deviation STDEV(array) + let sigma_value = if let Some(s) = sigma { + s + } else { + // Excel: if only one numeric value and sigma omitted -> #DIV/0! + if count <= 1 { + return CalcResult::new_error( + Error::DIV, + cell, + "Z.TEST requires at least two values when sigma is omitted".to_string(), + ); + } + + // Compute sum of squared deviations + let mut sumsq_dev = 0.0; + for x in values.iter().flatten() { + let d = x - mean; + sumsq_dev += d * d; + } + + let var = sumsq_dev / (n - 1.0); + if var <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Z.TEST standard deviation is zero".to_string(), + ); + } + + var.sqrt() + }; + + // Compute z statistic: (mean - x) / (sigma / sqrt(n)) + let denom = sigma_value / n.sqrt(); + if denom == 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "Z.TEST denominator is zero".to_string(), + ); + } + + let z = (mean - x_value) / denom; + + // Standard normal CDF + let dist = match Normal::new(0.0, 1.0) { + Ok(d) => d, + Err(_) => { + return CalcResult::new_error( + Error::NUM, + cell, + "Cannot create standard normal distribution in Z.TEST".to_string(), + ); + } + }; + + let mut p = 1.0 - dist.cdf(z); + + // clamp tiny FP noise + if p < 0.0 && p > -1e-15 { + p = 0.0; + } + if p > 1.0 && p < 1.0 + 1e-15 { + p = 1.0; + } + + CalcResult::Number(p) + } +} diff --git a/base/src/test/mod.rs b/base/src/test/mod.rs index 2e41417..3e80ace 100644 --- a/base/src/test/mod.rs +++ b/base/src/test/mod.rs @@ -55,12 +55,17 @@ mod test_yearfrac_basis; pub(crate) mod util; mod engineering; +mod statistical; mod test_fn_offset; mod test_number_format; mod test_arrays; +mod test_combin_combina; mod test_escape_quotes; +mod test_even_odd; +mod test_exp_sign; mod test_extend; +mod test_fn_datevalue_timevalue; mod test_fn_fv; mod test_fn_round; mod test_fn_type; @@ -80,5 +85,6 @@ mod test_percentage; mod test_set_functions_error_handling; mod test_sheet_names; mod test_today; +mod test_trigonometric_reciprocals; mod test_types; mod user_model; diff --git a/base/src/test/statistical/mod.rs b/base/src/test/statistical/mod.rs new file mode 100644 index 0000000..a96151a --- /dev/null +++ b/base/src/test/statistical/mod.rs @@ -0,0 +1,22 @@ +mod test_fn_avedev; +mod test_fn_binom; +mod test_fn_chisq; +mod test_fn_chisq_test; +mod test_fn_confidence; +mod test_fn_covariance; +mod test_fn_devsq; +mod test_fn_expon_dist; +mod test_fn_f; +mod test_fn_fisher; +mod test_fn_hyp_geom_dist; +mod test_fn_log_norm; +mod test_fn_norm_dist; +mod test_fn_pearson; +mod test_fn_phi; +mod test_fn_poisson; +mod test_fn_stdev; +mod test_fn_t_dist; +mod test_fn_t_test; +mod test_fn_var; +mod test_fn_weibull; +mod test_fn_z_test; diff --git a/base/src/test/statistical/test_fn_avedev.rs b/base/src/test/statistical/test_fn_avedev.rs new file mode 100644 index 0000000..74c8dbb --- /dev/null +++ b/base/src/test/statistical/test_fn_avedev.rs @@ -0,0 +1,40 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn smoke_test() { + let mut model = new_empty_model(); + model._set("A1", "=STDEV.P(10, 12, 23, 23, 16, 23, 21)"); + model._set("A2", "=STDEV.S(10, 12, 23, 23, 16, 23, 21)"); + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"5.174505793"); + + assert_eq!(model._get_text("A2"), *"5.589105048"); +} + +#[test] +fn numbers() { + let mut model = new_empty_model(); + + model._set("A2", "24"); + model._set("A3", "25"); + model._set("A4", "27"); + model._set("A5", "23"); + model._set("A6", "45"); + model._set("A7", "23.5"); + model._set("A8", "34"); + model._set("A9", "23"); + model._set("A10", "23"); + model._set("A11", "TRUE"); + model._set("A12", "'23"); + model._set("A13", "Text"); + model._set("A14", "FALSE"); + model._set("A15", "45"); + + model._set("B1", "=AVEDEV(A2:A15)"); + model.evaluate(); + + assert_eq!(model._get_text("B1"), *"7.25"); +} diff --git a/base/src/test/statistical/test_fn_binom.rs b/base/src/test/statistical/test_fn_binom.rs new file mode 100644 index 0000000..b062565 --- /dev/null +++ b/base/src/test/statistical/test_fn_binom.rs @@ -0,0 +1,86 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_binom_dist_smoke() { + let mut model = new_empty_model(); + model._set("A1", "=BINOM.DIST(6, 10, 0.5, TRUE)"); + model._set("A2", "=BINOM.DIST(6, 10, 0.5, FALSE)"); + model._set("A3", "=BINOM.DIST(6, 10, 0.5)"); // wrong args + model._set("A4", "=BINOM.DIST(6, 10, 0.5, TRUE, FALSE)"); // too many args + model.evaluate(); + + // P(X <= 6) for X ~ Bin(10, 0.5) = 0.828125 + assert_eq!(model._get_text("A1"), *"0.828125"); + + // P(X = 6) for X ~ Bin(10, 0.5) = 0.205078125 + assert_eq!(model._get_text("A2"), *"0.205078125"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); +} + +#[test] +fn test_fn_binom_dist_range_smoke() { + let mut model = new_empty_model(); + model._set("A1", "=BINOM.DIST.RANGE(60, 0.75, 48)"); + model._set("A2", "=BINOM.DIST.RANGE(60, 0.75, 45, 50)"); + model._set("A3", "=BINOM.DIST.RANGE(60, 1.2, 45, 50)"); // p > 1 -> #NUM! + model._set("A4", "=BINOM.DIST.RANGE(60, 0.75, 50, 45)"); // lower > upper -> #NUM!"); + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.083974967"); + + assert_eq!(model._get_text("A2"), *"0.523629793"); + + assert_eq!(model._get_text("A3"), *"#NUM!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); +} + +#[test] +fn test_fn_binom_inv_smoke() { + let mut model = new_empty_model(); + model._set("A1", "=BINOM.INV(6, 0.5, 0.75)"); + model._set("A2", "=BINOM.INV(6, 0.5, -0.1)"); // alpha < 0 -> #NUM! + model._set("A3", "=BINOM.INV(6, 1.2, 0.75)"); // p > 1 -> #NUM! + model._set("A4", "=BINOM.INV(6, 0.5)"); // args error + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"4"); + assert_eq!(model._get_text("A2"), *"#NUM!"); + assert_eq!(model._get_text("A3"), *"#NUM!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); +} + +#[test] +fn test_fn_negbinom_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: PMF (non-cumulative) and CDF (cumulative) + model._set("A1", "=NEGBINOM.DIST(10, 5, 0.25, FALSE)"); + model._set("A2", "=NEGBINOM.DIST(10, 5, 0.25, TRUE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=NEGBINOM.DIST(10, 5, 0.25)"); + model._set("A4", "=NEGBINOM.DIST(10, 5, 0.25, TRUE, FALSE)"); + + // Domain errors: + // p < 0 or p > 1 -> #NUM! + model._set("A5", "=NEGBINOM.DIST(10, 5, 1.5, TRUE)"); + // number_f < 0 -> #NUM! + model._set("A6", "=NEGBINOM.DIST(-1, 5, 0.25, TRUE)"); + // number_s < 1 -> #NUM! + model._set("A7", "=NEGBINOM.DIST(10, 0, 0.25, TRUE)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.05504866"); + assert_eq!(model._get_text("A2"), *"0.313514058"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_chisq.rs b/base/src/test/statistical/test_fn_chisq.rs new file mode 100644 index 0000000..fce8ec6 --- /dev/null +++ b/base/src/test/statistical/test_fn_chisq.rs @@ -0,0 +1,140 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_chisq_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: CDF + model._set("A1", "=CHISQ.DIST(0.5, 4, TRUE)"); + + // Valid: PDF + model._set("A2", "=CHISQ.DIST(0.5, 4, FALSE)"); + + // Valid: CDF with numeric cumulative (1 -> TRUE) + model._set("A3", "=CHISQ.DIST(0.5, 4, 1)"); + + // Wrong number of args -> #ERROR! + model._set("A4", "=CHISQ.DIST(0.5, 4)"); + model._set("A5", "=CHISQ.DIST(0.5, 4, TRUE, FALSE)"); + + // Domain errors + // x < 0 -> #NUM! + model._set("A6", "=CHISQ.DIST(-1, 4, TRUE)"); + // deg_freedom < 1 -> #NUM! + model._set("A7", "=CHISQ.DIST(0.5, 0, TRUE)"); + + model.evaluate(); + + // Values for df = 4 + // CDF(0.5) ≈ 0.026499021, PDF(0.5) ≈ 0.097350098 + assert_eq!(model._get_text("A1"), *"0.026499021"); + assert_eq!(model._get_text("A2"), *"0.097350098"); + assert_eq!(model._get_text("A3"), *"0.026499021"); + + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#ERROR!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} + +#[test] +fn test_fn_chisq_dist_rt_smoke() { + let mut model = new_empty_model(); + + // Valid calls + model._set("A1", "=CHISQ.DIST.RT(0.5, 4)"); + model._set("A2", "=CHISQ.DIST.RT(5, 4)"); + + // Too few / too many args -> #ERROR! + model._set("A3", "=CHISQ.DIST.RT(0.5)"); + model._set("A4", "=CHISQ.DIST.RT(0.5, 4, 1)"); + + // Domain errors + // x < 0 -> #NUM! + model._set("A5", "=CHISQ.DIST.RT(-1, 4)"); + // deg_freedom < 1 -> #NUM! + model._set("A6", "=CHISQ.DIST.RT(0.5, 0)"); + + model.evaluate(); + + // For df = 4: + // right tail at 0.5 ≈ 0.973500979 + // right tail at 5.0 ≈ 0.287297495 + assert_eq!(model._get_text("A1"), *"0.973500979"); + assert_eq!(model._get_text("A2"), *"0.287297495"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} + +#[test] +fn test_fn_chisq_inv_smoke() { + let mut model = new_empty_model(); + + // Valid calls + model._set("A1", "=CHISQ.INV(0.95, 4)"); + model._set("A2", "=CHISQ.INV(0.1, 10)"); + + // Wrong number of args -> #ERROR! + model._set("A3", "=CHISQ.INV(0.95)"); + model._set("A4", "=CHISQ.INV(0.95, 4, 1)"); + + // Domain errors + // probability < 0 or > 1 -> #NUM! + model._set("A5", "=CHISQ.INV(-0.1, 4)"); + model._set("A6", "=CHISQ.INV(1.1, 4)"); + // deg_freedom < 1 -> #NUM! + model._set("A7", "=CHISQ.INV(0.5, 0)"); + + model.evaluate(); + + // Standard critical values: + // CHISQ.INV(0.95, 4) ≈ 9.487729037 + // CHISQ.INV(0.1, 10) ≈ 4.865182052 + assert_eq!(model._get_text("A1"), *"9.487729037"); + assert_eq!(model._get_text("A2"), *"4.865182052"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} + +#[test] +fn test_fn_chisq_inv_rt_smoke() { + let mut model = new_empty_model(); + + // Valid calls + model._set("A1", "=CHISQ.INV.RT(0.05, 4)"); + model._set("A2", "=CHISQ.INV.RT(0.9, 10)"); + + // Wrong number of args -> #ERROR! + model._set("A3", "=CHISQ.INV.RT(0.05)"); + model._set("A4", "=CHISQ.INV.RT(0.05, 4, 1)"); + + // Domain errors + // probability < 0 or > 1 -> #NUM! + model._set("A5", "=CHISQ.INV.RT(-0.1, 4)"); + model._set("A6", "=CHISQ.INV.RT(1.1, 4)"); + // deg_freedom < 1 -> #NUM! + model._set("A7", "=CHISQ.INV.RT(0.5, 0)"); + + model.evaluate(); + + // For chi-square: + // CHISQ.INV.RT(0.05, 4) = CHISQ.INV(0.95, 4) ≈ 9.487729037 + // CHISQ.INV.RT(0.9, 10) = CHISQ.INV(0.1, 10) ≈ 4.865182052 + assert_eq!(model._get_text("A1"), *"9.487729037"); + assert_eq!(model._get_text("A2"), *"4.865182052"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_chisq_test.rs b/base/src/test/statistical/test_fn_chisq_test.rs new file mode 100644 index 0000000..c6b14db --- /dev/null +++ b/base/src/test/statistical/test_fn_chisq_test.rs @@ -0,0 +1,127 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_chisq_test_smoke() { + let mut model = new_empty_model(); + model._set("A2", "48"); + model._set("A3", "32"); + model._set("A4", "12"); + model._set("A5", "1"); + model._set("A6", "'13"); + model._set("A7", "TRUE"); + model._set("A8", "1"); + model._set("A9", "13"); + model._set("A10", "15"); + + model._set("B2", "55"); + model._set("B3", "34"); + model._set("B4", "13"); + model._set("B5", "blah"); + model._set("B6", "13"); + model._set("B7", "1"); + model._set("B8", "TRUE"); + model._set("B9", "'14"); + model._set("B10", "16"); + + model._set("C1", "=CHISQ.TEST(A2:A10, B2:B10)"); + model.evaluate(); + assert_eq!(model._get_text("C1"), *"0.997129538"); +} + +#[test] +fn arrays() { + let mut model = new_empty_model(); + model._set("A2", "TRUE"); + model._set("A3", "4"); + model._set("A4", "'3"); + model._set("B2", "2"); + model._set("B3", "2"); + model._set("B4", "2"); + model._set("C1", "=CHISQ.TEST(A2:A4, B2:B4)"); + + model._set("G5", "=CHISQ.TEST({TRUE,4,\"3\"}, {2,2,2})"); + + // 1D arrays with different shapes + model._set("G6", "=CHISQ.TEST({1,2,3}, {3;3;4})"); + + // 2D array + model._set("G7", "=CHISQ.TEST({1,2;3,4},{2,3;2,2})"); + + // 1D arrays with same shape + model._set("G8", "=CHISQ.TEST({1,2,3,4}, {2,3,4,5})"); + + model.evaluate(); + assert_eq!(model._get_text("C1"), *"0.367879441"); + assert_eq!(model._get_text("G5"), *"0.367879441"); + + assert_eq!(model._get_text("G6"), *"0.383531573"); + + assert_eq!(model._get_text("G7"), *"0.067889155"); + + assert_eq!(model._get_text("G8"), *"0.733094495"); +} + +#[test] +fn more_arrays() { + let mut model = new_empty_model(); + model._set("V20", "2"); + model._set("V21", "4"); + model._set("W20", "3"); + model._set("W21", "5"); + model._set("C1", "=CHISQ.TEST({1,2;3,4},V20:W21)"); + model._set("C2", "=CHISQ.TEST({1,2;3,4}, {2,3;4,5})"); + model.evaluate(); + assert_eq!(model._get_text("C1"), *"0.257280177"); + assert_eq!(model._get_text("C2"), *"0.257280177"); +} + +#[test] +fn array_ranges() { + let mut model = new_empty_model(); + model._set("A2", "TRUE"); + model._set("A3", "4"); + model._set("A4", "'3"); + model._set("B2", "2"); + model._set("B3", "2"); + model._set("B4", "2"); + model._set("C1", "=CHISQ.TEST(A2:A4, {2;2;2})"); + + model._set("G5", "=CHISQ.TEST({TRUE;4;\"3\"}, B2:B4)"); + model.evaluate(); + + assert_eq!(model._get_text("C1"), *"0.367879441"); + assert_eq!(model._get_text("G5"), *"0.367879441"); +} + +#[test] +fn array_2d_ranges() { + let mut model = new_empty_model(); + model._set("A2", "2"); + model._set("B2", "3"); + model._set("C2", "4"); + model._set("A3", "5"); + model._set("B3", "6"); + model._set("C3", "7"); + model._set("G1", "=CHISQ.TEST({1,2,3;4,2,6}, A2:C3)"); + model.evaluate(); + assert_eq!(model._get_text("G1"), *"0.129195493"); +} + +#[test] +fn ranges_1d() { + let mut model = new_empty_model(); + model._set("A2", "1"); + model._set("A3", "2"); + model._set("A4", "3"); + model._set("B2", "4"); + model._set("C2", "5"); + model._set("D2", "6"); + model._set("G1", "=CHISQ.TEST(A2:A4, B2:D2)"); + model._set("G2", "=CHISQ.TEST(B2:D2, A2:A4)"); + + model.evaluate(); + assert_eq!(model._get_text("G1"), *"0.062349477"); + assert_eq!(model._get_text("G2"), *"0.000261259"); +} diff --git a/base/src/test/statistical/test_fn_confidence.rs b/base/src/test/statistical/test_fn_confidence.rs new file mode 100644 index 0000000..7835516 --- /dev/null +++ b/base/src/test/statistical/test_fn_confidence.rs @@ -0,0 +1,51 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_confidence_norm_smoke() { + let mut model = new_empty_model(); + + model._set("A1", "=CONFIDENCE.NORM(0.05, 2.5, 50)"); + + // Some edge/error cases + model._set("A2", "=CONFIDENCE.NORM(0, 2.5, 50)"); // alpha <= 0 -> #NUM! + model._set("A3", "=CONFIDENCE.NORM(1, 2.5, 50)"); // alpha >= 1 -> #NUM! + model._set("A4", "=CONFIDENCE.NORM(0.05, -1, 50)"); // std_dev <=0 -> #NUM! + model._set("A5", "=CONFIDENCE.NORM(0.05, 2.5, 1)"); + model._set("A6", "=CONFIDENCE.NORM(0.05, 2.5, 0.99)"); // size < 1 -> #NUM! + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.692951912"); + + assert_eq!(model._get_text("A2"), *"#NUM!"); + assert_eq!(model._get_text("A3"), *"#NUM!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"4.899909961"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} + +#[test] +fn test_fn_confidence_t_smoke() { + let mut model = new_empty_model(); + + model._set("A1", "=CONFIDENCE.T(0.05, 50000, 100)"); + + // Some edge/error cases + model._set("A2", "=CONFIDENCE.T(0, 50000, 100)"); // alpha <= 0 -> #NUM! + model._set("A3", "=CONFIDENCE.T(1, 50000, 100)"); // alpha >= 1 -> #NUM! + model._set("A4", "=CONFIDENCE.T(0.05, -1, 100)"); + model._set("A5", "=CONFIDENCE.T(0.05, 50000, 1)"); + model._set("A6", "=CONFIDENCE.T(0.05, 50000, 1.7)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"9921.08475793"); + + assert_eq!(model._get_text("A2"), *"#NUM!"); + assert_eq!(model._get_text("A3"), *"#NUM!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#DIV/0!"); + assert_eq!(model._get_text("A6"), *"#DIV/0!"); +} diff --git a/base/src/test/statistical/test_fn_covariance.rs b/base/src/test/statistical/test_fn_covariance.rs new file mode 100644 index 0000000..edcefd7 --- /dev/null +++ b/base/src/test/statistical/test_fn_covariance.rs @@ -0,0 +1,57 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_covariance_smoke() { + let mut model = new_empty_model(); + model._set("A1", "3"); + model._set("A2", "9"); + model._set("A3", "2"); + model._set("A4", "7"); + model._set("A5", "4"); + model._set("A6", "12"); + + model._set("B1", "5"); + model._set("B2", "15"); + model._set("B3", "6"); + model._set("B4", "17"); + model._set("B5", "8"); + model._set("B6", "20"); + + model._set("C1", "=COVARIANCE.P(A1:A6, B1:B6)"); + model._set("C2", "=COVARIANCE.S(A1:A6, B1:B6)"); + model.evaluate(); + + assert_eq!(model._get_text("C1"), *"19.194444444"); + assert_eq!(model._get_text("C2"), *"23.033333333"); +} + +#[test] +fn arrays_mixed() { + let mut model = new_empty_model(); + + model._set("A2", "2"); + model._set("A3", "4"); + model._set("A4", "6"); + model._set("A5", "8"); + + model._set("B2", "1"); + model._set("B3", "3"); + model._set("B4", "5"); + model._set("B5", "7"); + + model._set("C1", "=COVARIANCE.P(A2:A5, {1,3,5,7})"); + model._set("C2", "=COVARIANCE.S(A2:A5, {1,3,5,7})"); + model._set("C3", "=COVARIANCE.P(A2:A5, B2:B5)"); + model._set("C4", "=COVARIANCE.S(A2:A5, B2:B5)"); + model._set("C5", "=COVARIANCE.P({2,4,6,8}, B2:B5)"); + model._set("C6", "=COVARIANCE.S({2,4,6,8}, B2:B5)"); + model._set("C7", "=COVARIANCE.P({2,4,6,8}, {1,3,5,7})"); + model._set("C8", "=COVARIANCE.S({2,4,6,8}, {1,3,5,7})"); + + model.evaluate(); + + assert_eq!(model._get_text("C1"), *"5"); + assert_eq!(model._get_text("C2"), *"6.666666667"); +} diff --git a/base/src/test/statistical/test_fn_devsq.rs b/base/src/test/statistical/test_fn_devsq.rs new file mode 100644 index 0000000..a2a4fd6 --- /dev/null +++ b/base/src/test/statistical/test_fn_devsq.rs @@ -0,0 +1,50 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn arguments_smoke_test() { + let mut model = new_empty_model(); + model._set("A1", "=DEVSQ()"); + model._set("A2", "=DEVSQ(1, 2, 3)"); + model._set("A3", "=DEVSQ(1, )"); + model._set("A4", "=DEVSQ(1, , 3)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"#ERROR!"); + assert_eq!(model._get_text("A2"), *"2"); + assert_eq!(model._get_text("A3"), *"0"); + assert_eq!(model._get_text("A4"), *"2"); +} + +#[test] +fn ranges() { + let mut model = new_empty_model(); + model._set("A1", "=DEVSQ(A2:A8)"); + model._set("A2", "4"); + model._set("A3", "5"); + model._set("A4", "8"); + model._set("A5", "7"); + model._set("A6", "11"); + model._set("A7", "4"); + model._set("A8", "3"); + model.evaluate(); + assert_eq!(model._get_text("A1"), *"48"); +} + +#[test] +fn arrays() { + let mut model = new_empty_model(); + model._set("A1", "=DEVSQ({1, 2, 3})"); + model._set("A2", "=DEVSQ({1; 2; 3})"); + model._set("A3", "=DEVSQ({1, 2; 3, 4})"); + model._set("A4", "=DEVSQ({1, 2; 3, 4; 5, 6})"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"2"); + assert_eq!(model._get_text("A2"), *"2"); + assert_eq!(model._get_text("A3"), *"5"); + assert_eq!(model._get_text("A4"), *"17.5"); +} diff --git a/base/src/test/statistical/test_fn_expon_dist.rs b/base/src/test/statistical/test_fn_expon_dist.rs new file mode 100644 index 0000000..2d2305f --- /dev/null +++ b/base/src/test/statistical/test_fn_expon_dist.rs @@ -0,0 +1,32 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_expon_dist_smoke() { + let mut model = new_empty_model(); + + // λ = 1, x = 0.5 + // CDF = 1 - e^-0.5 ≈ 0.393469340 + // PDF = e^-0.5 ≈ 0.606530660 + model._set("A1", "=EXPON.DIST(0.5, 1, TRUE)"); + model._set("A2", "=EXPON.DIST(0.5, 1, FALSE)"); + + // Wrong number of args + model._set("A3", "=EXPON.DIST(0.5, 1)"); + model._set("A4", "=EXPON.DIST(0.5, 1, TRUE, FALSE)"); + + // Domain errors + model._set("A5", "=EXPON.DIST(-1, 1, TRUE)"); // x < 0 + model._set("A6", "=EXPON.DIST(0.5, 0, TRUE)"); // lambda <= 0 + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.39346934"); + assert_eq!(model._get_text("A2"), *"0.60653066"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_f.rs b/base/src/test/statistical/test_fn_f.rs new file mode 100644 index 0000000..2621ecb --- /dev/null +++ b/base/src/test/statistical/test_fn_f.rs @@ -0,0 +1,75 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_f_dist_sanity() { + let mut model = new_empty_model(); + model._set("A1", "=F.DIST(15, 6, 4, TRUE)"); + model._set("A2", "=F.DIST(15, 6, 4, FALSE)"); + model._set("A3", "=F.DIST(15, 6, 4)"); + model._set("A4", "=F.DIST(15, 6, 4, TRUE, FALSE)"); + model.evaluate(); + assert_eq!(model._get_text("A1"), *"0.989741952"); + assert_eq!(model._get_text("A2"), *"0.001271447"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); +} + +#[test] +fn test_fn_f_dist_rt_sanity() { + let mut model = new_empty_model(); + + // Valid call + model._set("A1", "=F.DIST.RT(15, 6, 4)"); + // Too few args + model._set("A2", "=F.DIST.RT(15, 6)"); + // Too many args + model._set("A3", "=F.DIST.RT(15, 6, 4, 1)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.010258048"); + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); +} + +#[test] +fn test_fn_f_inv_sanity() { + let mut model = new_empty_model(); + + // Valid call: left-tail inverse + model._set("A1", "=F.INV(0.9897419523940, 6, 4)"); + + // Too many args + model._set("A2", "=F.INV(0.5, 6, 4, 2)"); + + // Too few args + model._set("A3", "=F.INV(0.5, 6)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"15"); + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); +} + +#[test] +fn test_fn_f_inv_rt_sanity() { + let mut model = new_empty_model(); + + // Valid call: left-tail inverse + model._set("A1", "=F.INV.RT(0.0102580476059808, 6, 4)"); + + // Too many args + model._set("A2", "=F.INV.RT(0.5, 6, 4, 2)"); + + // Too few args + model._set("A3", "=F.INV.RT(0.5, 6)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"15"); + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); +} diff --git a/base/src/test/statistical/test_fn_fisher.rs b/base/src/test/statistical/test_fn_fisher.rs new file mode 100644 index 0000000..d411b8d --- /dev/null +++ b/base/src/test/statistical/test_fn_fisher.rs @@ -0,0 +1,53 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; +#[test] +fn test_fn_fisher_smoke() { + let mut model = new_empty_model(); + + // Valid inputs + model._set("A1", "=FISHER(0.1)"); + model._set("A2", "=FISHER(-0.5)"); + model._set("A3", "=FISHER(0.8)"); + + // Domain errors: x <= -1 or x >= 1 -> #NUM! + model._set("A4", "=FISHER(1)"); + model._set("A5", "=FISHER(-1)"); + model._set("A6", "=FISHER(2)"); + + // Wrong number of arguments -> #ERROR! + model._set("A7", "=FISHER(0.1, 2)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.100335348"); + assert_eq!(model._get_text("A2"), *"-0.549306144"); + assert_eq!(model._get_text("A3"), *"1.098612289"); + + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + + assert_eq!(model._get_text("A7"), *"#ERROR!"); +} + +#[test] +fn test_fn_fisher_inv_smoke() { + let mut model = new_empty_model(); + + // Valid inputs + model._set("A1", "=FISHERINV(-1.5)"); + model._set("A2", "=FISHERINV(0.5)"); + model._set("A3", "=FISHERINV(2)"); + + // Wrong number of arguments -> #ERROR! + model._set("A4", "=FISHERINV(0.5, 1)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"-0.905148254"); + assert_eq!(model._get_text("A2"), *"0.462117157"); + assert_eq!(model._get_text("A3"), *"0.96402758"); + + assert_eq!(model._get_text("A4"), *"#ERROR!"); +} diff --git a/base/src/test/statistical/test_fn_hyp_geom_dist.rs b/base/src/test/statistical/test_fn_hyp_geom_dist.rs new file mode 100644 index 0000000..1608a41 --- /dev/null +++ b/base/src/test/statistical/test_fn_hyp_geom_dist.rs @@ -0,0 +1,42 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_hyp_geom_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: PDF (non-cumulative) + model._set("A1", "=HYPGEOM.DIST(1, 4, 12, 20, FALSE)"); + + // Valid: CDF (cumulative) + model._set("A2", "=HYPGEOM.DIST(1, 4, 12, 20, TRUE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=HYPGEOM.DIST(1, 4, 12, 20)"); + model._set("A4", "=HYPGEOM.DIST(1, 4, 12, 20, TRUE, FALSE)"); + + // Domain errors: + // sample_s > number_sample -> #NUM! + model._set("A5", "=HYPGEOM.DIST(5, 4, 12, 20, TRUE)"); + + // population_s > number_pop -> #NUM! + model._set("A6", "=HYPGEOM.DIST(1, 4, 25, 20, TRUE)"); + + // number_sample > number_pop -> #NUM! + model._set("A7", "=HYPGEOM.DIST(1, 25, 12, 20, TRUE)"); + + model.evaluate(); + + // PDF: P(X = 1) + assert_eq!(model._get_text("A1"), *"0.13869969"); + + // CDF: P(X <= 1) + assert_eq!(model._get_text("A2"), *"0.153147575"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_log_norm.rs b/base/src/test/statistical/test_fn_log_norm.rs new file mode 100644 index 0000000..43ab704 --- /dev/null +++ b/base/src/test/statistical/test_fn_log_norm.rs @@ -0,0 +1,61 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_log_norm_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: CDF and PDF + model._set("A1", "=LOGNORM.DIST(4, 3.5, 1.2, TRUE)"); + model._set("A2", "=LOGNORM.DIST(4, 3.5, 1.2, FALSE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=LOGNORM.DIST(4, 3.5, 1.2)"); + model._set("A4", "=LOGNORM.DIST(4, 3.5, 1.2, TRUE, FALSE)"); + + // Domain errors: + // x <= 0 -> #NUM! + model._set("A5", "=LOGNORM.DIST(0, 3.5, 1.2, TRUE)"); + // std_dev <= 0 -> #NUM! + model._set("A6", "=LOGNORM.DIST(4, 3.5, 0, TRUE)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.039083556"); + assert_eq!(model._get_text("A2"), *"0.017617597"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} + +#[test] +fn test_fn_log_norm_inv_smoke() { + let mut model = new_empty_model(); + + // Valid call + model._set("A1", "=LOGNORM.INV(0.5, 3.5, 1.2)"); + + // Wrong number of arguments -> #ERROR! + model._set("A2", "=LOGNORM.INV(0.5, 3.5)"); + model._set("A3", "=LOGNORM.INV(0.5, 3.5, 1.2, 0)"); + + // Domain errors: + // probability <= 0 or >= 1 -> #NUM! + model._set("A4", "=LOGNORM.INV(0, 3.5, 1.2)"); + model._set("A5", "=LOGNORM.INV(1, 3.5, 1.2)"); + // std_dev <= 0 -> #NUM! + model._set("A6", "=LOGNORM.INV(0.5, 3.5, 0)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"33.115451959"); + + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_norm_dist.rs b/base/src/test/statistical/test_fn_norm_dist.rs new file mode 100644 index 0000000..0b5d5f3 --- /dev/null +++ b/base/src/test/statistical/test_fn_norm_dist.rs @@ -0,0 +1,119 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_norm_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: standard normal as a special case + model._set("A1", "=NORM.DIST(1, 0, 1, TRUE)"); + model._set("A2", "=NORM.DIST(1, 0, 1, FALSE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=NORM.DIST(1, 0, 1)"); + model._set("A4", "=NORM.DIST(1, 0, 1, TRUE, FALSE)"); + + // Domain errors: standard_dev <= 0 -> #NUM! + model._set("A5", "=NORM.DIST(1, 0, 0, TRUE)"); + model._set("A6", "=NORM.DIST(1, 0, -1, TRUE)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.841344746"); + assert_eq!(model._get_text("A2"), *"0.241970725"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} + +#[test] +fn test_fn_norm_inv_smoke() { + let mut model = new_empty_model(); + + // Valid: median of standard normal + model._set("A1", "=NORM.INV(0.5, 0, 1)"); + + // Wrong number of arguments -> #ERROR! + model._set("A2", "=NORM.INV(0.5, 0)"); + model._set("A3", "=NORM.INV(0.5, 0, 1, 0)"); + + // Domain errors: + // probability <= 0 or >= 1 -> #NUM! + model._set("A4", "=NORM.INV(0, 0, 1)"); + model._set("A5", "=NORM.INV(1, 0, 1)"); + // standard_dev <= 0 -> #NUM! + model._set("A6", "=NORM.INV(0.5, 0, 0)"); + + model._set("A7", "=NORM.INV(0.7, 0.2, 1)"); + model._set("A8", "=NORM.INV(0.7, 0.2, 5)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0"); + + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"0.724400513"); + assert_eq!(model._get_text("A8"), *"2.822002564"); +} + +#[test] +fn test_fn_norm_s_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: CDF and PDF at z = 0 + model._set("A1", "=NORM.S.DIST(0, TRUE)"); + model._set("A2", "=NORM.S.DIST(0, FALSE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=NORM.S.DIST(0)"); + model._set("A4", "=NORM.S.DIST(0, TRUE, FALSE)"); + + model._set("A5", "=NORM.S.DIST(0.2, FALSE)"); + model._set("A6", "=NORM.S.DIST(2.2, TRUE)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.5"); + assert_eq!(model._get_text("A2"), *"0.39894228"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + + assert_eq!(model._get_text("A5"), *"0.391042694"); + assert_eq!(model._get_text("A6"), *"0.986096552"); +} + +#[test] +fn test_fn_norm_s_inv_smoke() { + let mut model = new_empty_model(); + + // Valid: symmetric points + model._set("A1", "=NORM.S.INV(0.5)"); + model._set("A2", "=NORM.S.INV(0.841344746)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=NORM.S.INV()"); + model._set("A4", "=NORM.S.INV(0.5, 0)"); + + // Domain errors: probability <= 0 or >= 1 -> #NUM! + model._set("A5", "=NORM.S.INV(0)"); + model._set("A6", "=NORM.S.INV(1)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0"); + // Approximately 1 + assert_eq!(model._get_text("A2"), *"1"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_pearson.rs b/base/src/test/statistical/test_fn_pearson.rs new file mode 100644 index 0000000..8b56641 --- /dev/null +++ b/base/src/test/statistical/test_fn_pearson.rs @@ -0,0 +1,31 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_chisq_test_smoke() { + let mut model = new_empty_model(); + model._set("A2", "48"); + model._set("A3", "32"); + model._set("A4", "12"); + model._set("A5", "1"); + model._set("A6", "'13"); + model._set("A7", "TRUE"); + model._set("A8", "1"); + model._set("A9", "13"); + model._set("A10", "15"); + + model._set("B2", "55"); + model._set("B3", "34"); + model._set("B4", "13"); + model._set("B5", "blah"); + model._set("B6", "13"); + model._set("B7", "1"); + model._set("B8", "TRUE"); + model._set("B9", "'14"); + model._set("B10", "16"); + + model._set("C1", "=PEARSON(A2:A10, B2:B10)"); + model.evaluate(); + assert_eq!(model._get_text("C1"), *"0.998381439"); +} diff --git a/base/src/test/statistical/test_fn_phi.rs b/base/src/test/statistical/test_fn_phi.rs new file mode 100644 index 0000000..1e466f8 --- /dev/null +++ b/base/src/test/statistical/test_fn_phi.rs @@ -0,0 +1,26 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_phi_smoke() { + let mut model = new_empty_model(); + + model._set("A1", "=PHI(0)"); + model._set("A2", "=PHI(1)"); + model._set("A3", "=PHI(-1)"); + + // Wrong number of arguments -> #ERROR! + model._set("A4", "=PHI()"); + model._set("A5", "=PHI(0, 1)"); + + model.evaluate(); + + // Standard values + assert_eq!(model._get_text("A1"), *"0.39894228"); + assert_eq!(model._get_text("A2"), *"0.241970725"); + assert_eq!(model._get_text("A3"), *"0.241970725"); + + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#ERROR!"); +} diff --git a/base/src/test/statistical/test_fn_poisson.rs b/base/src/test/statistical/test_fn_poisson.rs new file mode 100644 index 0000000..5fb455d --- /dev/null +++ b/base/src/test/statistical/test_fn_poisson.rs @@ -0,0 +1,41 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_poisson_dist_smoke() { + let mut model = new_empty_model(); + + // λ = 2, x = 3 + // P(X = 3) ≈ 0.180447045 + // P(X <= 3) ≈ 0.857123461 + model._set("A1", "=POISSON.DIST(3, 2, FALSE)"); + model._set("A2", "=POISSON.DIST(3, 2, TRUE)"); + + // Wrong arg count + model._set("A3", "=POISSON.DIST(3, 2)"); + model._set("A4", "=POISSON.DIST(3, 2, TRUE, FALSE)"); + + // Domain errors + model._set("A5", "=POISSON.DIST(-1, 2, TRUE)"); // x < 0 + model._set("A6", "=POISSON.DIST(3, -2, TRUE)"); // mean < 0 + + // λ = 0 special cases + model._set("A7", "=POISSON.DIST(0, 0, FALSE)"); // 1 + model._set("A8", "=POISSON.DIST(1, 0, FALSE)"); // 0 + model._set("A9", "=POISSON.DIST(5, 0, TRUE)"); // 1 + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.180447044"); + assert_eq!(model._get_text("A2"), *"0.85712346"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + + assert_eq!(model._get_text("A7"), *"1"); + assert_eq!(model._get_text("A8"), *"0"); + assert_eq!(model._get_text("A9"), *"1"); +} diff --git a/base/src/test/statistical/test_fn_stdev.rs b/base/src/test/statistical/test_fn_stdev.rs new file mode 100644 index 0000000..a23d031 --- /dev/null +++ b/base/src/test/statistical/test_fn_stdev.rs @@ -0,0 +1,46 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn smoke_test() { + let mut model = new_empty_model(); + model._set("A1", "=STDEV.P(10, 12, 23, 23, 16, 23, 21)"); + model._set("A2", "=STDEV.S(10, 12, 23, 23, 16, 23, 21)"); + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"5.174505793"); + + assert_eq!(model._get_text("A2"), *"5.589105048"); +} + +#[test] +fn numbers() { + let mut model = new_empty_model(); + + model._set("A2", "24"); + model._set("A3", "25"); + model._set("A4", "27"); + model._set("A5", "23"); + model._set("A6", "45"); + model._set("A7", "23.5"); + model._set("A8", "34"); + model._set("A9", "23"); + model._set("A10", "23"); + model._set("A11", "TRUE"); + model._set("A12", "'23"); + model._set("A13", "Text"); + model._set("A14", "FALSE"); + model._set("A15", "45"); + + model._set("B1", "=STDEV.P(A2:A15)"); + model._set("B2", "=STDEV.S(A2:A15)"); + model._set("B3", "=STDEVA(A2:A15)"); + model._set("B4", "=STDEVPA(A2:A15)"); + model.evaluate(); + + assert_eq!(model._get_text("B1"), *"8.483071378"); + assert_eq!(model._get_text("B2"), *"8.941942369"); + assert_eq!(model._get_text("B3"), *"15.499955689"); + assert_eq!(model._get_text("B4"), *"14.936131032"); +} diff --git a/base/src/test/statistical/test_fn_t_dist.rs b/base/src/test/statistical/test_fn_t_dist.rs new file mode 100644 index 0000000..eef8c2f --- /dev/null +++ b/base/src/test/statistical/test_fn_t_dist.rs @@ -0,0 +1,160 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_t_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: cumulative (left-tail CDF) + model._set("A1", "=T.DIST(2, 10, TRUE)"); + // Valid: probability density function (PDF) + model._set("B1", "=T.DIST(2, 10, FALSE)"); + + // Wrong number of arguments + model._set("A2", "=T.DIST(2, 10)"); + model._set("A3", "=T.DIST(2, 10, TRUE, FALSE)"); + + // Domain error: df < 1 -> #NUM! + model._set("A4", "=T.DIST(2, 0, TRUE)"); + model._set("A5", "=T.DIST(2, -1, TRUE)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.963305983"); + assert_eq!(model._get_text("B1"), *"0.061145766"); + + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); +} + +#[test] +fn test_fn_t_dist_rt_smoke() { + let mut model = new_empty_model(); + + // Valid: right tail probability + model._set("A1", "=T.DIST.RT(2, 10)"); + + // Wrong number of arguments + model._set("A2", "=T.DIST.RT(2)"); + model._set("A3", "=T.DIST.RT(2, 10, TRUE)"); + + // Domain error: df < 1 + model._set("A4", "=T.DIST.RT(2, 0)"); + model._set("A5", "=T.DIST.RT(2, -1)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.036694017"); + + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#NUM!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); +} + +#[test] +fn test_fn_t_dist_2t_smoke() { + let mut model = new_empty_model(); + + // Valid: two-tailed probability + model._set("A1", "=T.DIST.2T(2, 10)"); + + // In the limit case of x = 0, the two-tailed probability is 1.0 + model._set("A4", "=T.DIST.2T(0, 10)"); + + // Wrong number of arguments + model._set("A2", "=T.DIST.2T(2)"); + model._set("A3", "=T.DIST.2T(2, 10, TRUE)"); + + // Domain errors: + // x < 0 -> #NUM! + model._set("A5", "=T.DIST.2T(-0.001, 10)"); + // df < 1 -> #NUM! + model._set("A6", "=T.DIST.2T(2, 0)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"0.073388035"); + assert_eq!(model._get_text("A4"), *"1"); + + assert_eq!(model._get_text("A2"), *"#ERROR!"); + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); +} + +#[test] +fn test_fn_t_inv_smoke() { + let mut model = new_empty_model(); + + // Valid: upper and lower tail + model._set("A1", "=T.INV(0.95, 10)"); + model._set("A2", "=T.INV(0.05, 10)"); + // limit case: + model._set("B2", "=T.INV(0.95, 1)"); + + // Wrong number of arguments + model._set("A3", "=T.INV(0.95)"); + model._set("A4", "=T.INV(0.95, 10, 1)"); + + // Domain errors: + // p <= 0 or >= 1 + model._set("A5", "=T.INV(0, 10)"); + model._set("A6", "=T.INV(1, 10)"); + // df < 1 + model._set("A7", "=T.INV(0.95, 0)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"1.812461123"); + assert_eq!(model._get_text("A2"), *"-1.812461123"); + assert_eq!(model._get_text("B2"), *"6.313751515"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); +} + +#[test] +fn test_fn_t_inv_2t_smoke() { + let mut model = new_empty_model(); + + // Valid: two-tailed critical values + model._set("A1", "=T.INV.2T(0.1, 10)"); + model._set("A2", "=T.INV.2T(0.05, 10)"); + + // p = 1 should give t = 0 (both tails outside are 1.0, so cut at the mean) + model._set("A3", "=T.INV.2T(1, 10)"); + + model._set("A7", "=T.INV.2T(1.5, 10)"); + + // Wrong number of arguments + model._set("A4", "=T.INV.2T(0.1)"); + model._set("A5", "=T.INV.2T(0.1, 10, 1)"); + + // Domain errors: + // p <= 0 or p > 1 + model._set("A6", "=T.INV.2T(0, 10)"); + // df < 1 + model._set("A8", "=T.INV.2T(0.1, 0)"); + + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"1.812461123"); + assert_eq!(model._get_text("A2"), *"2.228138852"); + assert_eq!(model._get_text("A3"), *"0"); + + // NB: Excel returns -0.699812061 for T.INV.2T(1.5, 10) + // which seems inconsistent with its documented behavior + assert_eq!(model._get_text("A7"), *"#NUM!"); + + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#ERROR!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A8"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_t_test.rs b/base/src/test/statistical/test_fn_t_test.rs new file mode 100644 index 0000000..dee103d --- /dev/null +++ b/base/src/test/statistical/test_fn_t_test.rs @@ -0,0 +1,41 @@ +#![allow(clippy::unwrap_used)] +use crate::test::util::new_empty_model; +#[test] +fn test_fn_t_test_smoke() { + let mut model = new_empty_model(); + model._set("A2", "3"); + model._set("A3", "4"); + model._set("A4", "5"); + model._set("A5", "6"); + model._set("A6", "10"); + model._set("A7", "3"); + model._set("A8", "2"); + model._set("A9", "4"); + model._set("A10", "7"); + + model._set("B2", "6"); + model._set("B3", "19"); + model._set("B4", "3"); + model._set("B5", "2"); + model._set("B6", "13"); + model._set("B7", "4"); + model._set("B8", "5"); + model._set("B9", "17"); + model._set("B10", "3"); + + model._set("C1", "=T.TEST(A2:A10, B2:B10, 1, 1)"); + model._set("C2", "=T.TEST(A2:A10, B2:B10, 1, 2)"); + model._set("C3", "=T.TEST(A2:A10, B2:B10, 1, 3)"); + model._set("C4", "=T.TEST(A2:A10, B2:B10, 2, 1)"); + model._set("C5", "=T.TEST(A2:A10, B2:B10, 2, 2)"); + model._set("C6", "=T.TEST(A2:A10, B2:B10, 2, 3)"); + + model.evaluate(); + + assert_eq!(model._get_text("C1"), *"0.103836888"); + assert_eq!(model._get_text("C2"), *"0.100244599"); + assert_eq!(model._get_text("C3"), *"0.105360319"); + assert_eq!(model._get_text("C4"), *"0.207673777"); + assert_eq!(model._get_text("C5"), *"0.200489197"); + assert_eq!(model._get_text("C6"), *"0.210720639"); +} diff --git a/base/src/test/statistical/test_fn_var.rs b/base/src/test/statistical/test_fn_var.rs new file mode 100644 index 0000000..7daeac3 --- /dev/null +++ b/base/src/test/statistical/test_fn_var.rs @@ -0,0 +1,46 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn smoke_test() { + let mut model = new_empty_model(); + model._set("A1", "=STDEV.P(10, 12, 23, 23, 16, 23, 21)"); + model._set("A2", "=STDEV.S(10, 12, 23, 23, 16, 23, 21)"); + model.evaluate(); + + assert_eq!(model._get_text("A1"), *"5.174505793"); + + assert_eq!(model._get_text("A2"), *"5.589105048"); +} + +#[test] +fn numbers() { + let mut model = new_empty_model(); + + model._set("A2", "24"); + model._set("A3", "25"); + model._set("A4", "27"); + model._set("A5", "23"); + model._set("A6", "45"); + model._set("A7", "23.5"); + model._set("A8", "34"); + model._set("A9", "23"); + model._set("A10", "23"); + model._set("A11", "TRUE"); + model._set("A12", "'23"); + model._set("A13", "Text"); + model._set("A14", "FALSE"); + model._set("A15", "45"); + + model._set("B1", "=VAR.P(A2:A15)"); + model._set("B2", "=VAR.S(A2:A15)"); + model._set("B3", "=VARA(A2:A15)"); + model._set("B4", "=VARPA(A2:A15)"); + model.evaluate(); + + assert_eq!(model._get_text("B1"), *"71.9625"); + assert_eq!(model._get_text("B2"), *"79.958333333"); + assert_eq!(model._get_text("B3"), *"240.248626374"); + assert_eq!(model._get_text("B4"), *"223.088010204"); +} diff --git a/base/src/test/statistical/test_fn_weibull.rs b/base/src/test/statistical/test_fn_weibull.rs new file mode 100644 index 0000000..9ca0b66 --- /dev/null +++ b/base/src/test/statistical/test_fn_weibull.rs @@ -0,0 +1,41 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_weibull_dist_smoke() { + let mut model = new_empty_model(); + + // Valid: CDF and PDF for x = 1, alpha = 2, beta = 1 + model._set("A1", "=WEIBULL.DIST(1, 2, 1, TRUE)"); + model._set("A2", "=WEIBULL.DIST(1, 2, 1, FALSE)"); + + // Wrong number of arguments -> #ERROR! + model._set("A3", "=WEIBULL.DIST(1, 2, 1)"); + model._set("A4", "=WEIBULL.DIST(1, 2, 1, TRUE, FALSE)"); + + // Domain errors: + // x < 0 -> #NUM! + model._set("A5", "=WEIBULL.DIST(-1, 2, 1, TRUE)"); + // alpha <= 0 -> #NUM! + model._set("A6", "=WEIBULL.DIST(1, 0, 1, TRUE)"); + model._set("A7", "=WEIBULL.DIST(1, -1, 1, TRUE)"); + // beta <= 0 -> #NUM! + model._set("A8", "=WEIBULL.DIST(1, 2, 0, TRUE)"); + model._set("A9", "=WEIBULL.DIST(1, 2, -1, TRUE)"); + + model.evaluate(); + + // 1 - e^-1 + assert_eq!(model._get_text("A1"), *"0.632120559"); + // 2 * e^-1 + assert_eq!(model._get_text("A2"), *"0.735758882"); + + assert_eq!(model._get_text("A3"), *"#ERROR!"); + assert_eq!(model._get_text("A4"), *"#ERROR!"); + assert_eq!(model._get_text("A5"), *"#NUM!"); + assert_eq!(model._get_text("A6"), *"#NUM!"); + assert_eq!(model._get_text("A7"), *"#NUM!"); + assert_eq!(model._get_text("A8"), *"#NUM!"); + assert_eq!(model._get_text("A9"), *"#NUM!"); +} diff --git a/base/src/test/statistical/test_fn_z_test.rs b/base/src/test/statistical/test_fn_z_test.rs new file mode 100644 index 0000000..864f544 --- /dev/null +++ b/base/src/test/statistical/test_fn_z_test.rs @@ -0,0 +1,36 @@ +#![allow(clippy::unwrap_used)] + +use crate::test::util::new_empty_model; + +#[test] +fn test_fn_z_test_smoke() { + let mut model = new_empty_model(); + model._set("A2", "3"); + model._set("A3", "6"); + model._set("A4", "7"); + model._set("A5", "8"); + model._set("A6", "6"); + model._set("A7", "5"); + model._set("A8", "4"); + model._set("A9", "2"); + model._set("A10", "1"); + model._set("A11", "9"); + + model._set("G1", "=Z.TEST(A2:A11, 4)"); + model._set("G2", "=Z.TEST(A2:A11, 6)"); + model.evaluate(); + + assert_eq!(model._get_text("G1"), *"0.090574197"); + assert_eq!(model._get_text("G2"), *"0.863043389"); +} + +#[test] +fn arrays() { + let mut model = new_empty_model(); + model._set("D1", "=Z.TEST({5,2,3,4}, 4, 123)"); + model._set("D2", "=Z.TEST({5,2,3,4}, 4)"); + model.evaluate(); + + assert_eq!(model._get_text("D1"), *"0.503243397"); + assert_eq!(model._get_text("D2"), *"0.780710987"); +} diff --git a/base/src/test/test_combin_combina b/base/src/test/test_combin_combina.rs similarity index 89% rename from base/src/test/test_combin_combina rename to base/src/test/test_combin_combina.rs index 3876299..9cae2ba 100644 --- a/base/src/test/test_combin_combina +++ b/base/src/test/test_combin_combina.rs @@ -11,8 +11,8 @@ fn arguments() { model._set("A4", "=COMBINA()"); model._set("A5", "=COMBIN(2)"); model._set("A6", "=COMBINA(2)"); - model._set("A5", "=COMBIN(1, 2, 3)"); - model._set("A6", "=COMBINA(1, 2, 3)"); + model._set("A7", "=COMBIN(1, 2, 3)"); + model._set("A8", "=COMBINA(1, 2, 3)"); model.evaluate(); @@ -24,4 +24,4 @@ fn arguments() { assert_eq!(model._get_text("A6"), *"#ERROR!"); assert_eq!(model._get_text("A7"), *"#ERROR!"); assert_eq!(model._get_text("A8"), *"#ERROR!"); -} \ No newline at end of file +} diff --git a/base/src/test/test_even_odd b/base/src/test/test_even_odd.rs similarity index 100% rename from base/src/test/test_even_odd rename to base/src/test/test_even_odd.rs diff --git a/base/src/test/test_fn_datevalue_timevalue.rs b/base/src/test/test_fn_datevalue_timevalue.rs index 039fc8e..795bca4 100644 --- a/base/src/test/test_fn_datevalue_timevalue.rs +++ b/base/src/test/test_fn_datevalue_timevalue.rs @@ -7,8 +7,8 @@ fn datevalue_timevalue_arguments() { let mut model = new_empty_model(); model._set("A1", "=DATEVALUE()"); model._set("A2", "=TIMEVALUE()"); - model._set("A3", "=DATEVALUE("2000-01-01")") - model._set("A4", "=TIMEVALUE("12:00:00")") + model._set("A3", "=DATEVALUE(\"2000-01-01\")"); + model._set("A4", "=TIMEVALUE(\"12:00:00\")"); model._set("A5", "=DATEVALUE(1,2)"); model._set("A6", "=TIMEVALUE(1,2)"); model.evaluate(); @@ -20,5 +20,3 @@ fn datevalue_timevalue_arguments() { assert_eq!(model._get_text("A5"), *"#ERROR!"); assert_eq!(model._get_text("A6"), *"#ERROR!"); } - - diff --git a/xlsx/tests/calc_tests/BETA_GAMMA.xlsx b/xlsx/tests/calc_tests/BETA_GAMMA.xlsx new file mode 100644 index 0000000..77b0dfb Binary files /dev/null and b/xlsx/tests/calc_tests/BETA_GAMMA.xlsx differ