use statrs::distribution::{ChiSquared, Continuous, ContinuousCDF}; use crate::expressions::parser::ArrayNode; use crate::expressions::types::CellReferenceIndex; use crate::{ calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, }; // Helper to check if two shapes are the same or compatible 1D shapes pub(crate) fn is_same_shape_or_1d(rows1: i32, cols1: i32, rows2: i32, cols2: i32) -> bool { (rows1 == rows2 && cols1 == cols2) || (rows1 == 1 && cols2 == 1 && cols1 == rows2) || (rows2 == 1 && cols1 == 1 && cols2 == rows1) } impl Model { // CHISQ.DIST(x, deg_freedom, cumulative) pub(crate) fn fn_chisq_dist(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.len() != 3 { return CalcResult::new_args_number_error(cell); } let x = match self.get_number_no_bools(&args[0], cell) { Ok(f) => f, Err(e) => return e, }; let df = match self.get_number_no_bools(&args[1], cell) { Ok(f) => f.trunc(), Err(e) => return e, }; let cumulative = match self.get_boolean(&args[2], cell) { Ok(b) => b, Err(e) => return e, }; if x < 0.0 { return CalcResult::new_error( Error::NUM, cell, "x must be >= 0 in CHISQ.DIST".to_string(), ); } if df < 1.0 { return CalcResult::new_error( Error::NUM, cell, "degrees of freedom must be >= 1 in CHISQ.DIST".to_string(), ); } let dist = match ChiSquared::new(df) { Ok(d) => d, Err(_) => { return CalcResult::new_error( Error::NUM, cell, "Invalid parameters for Chi-squared distribution".to_string(), ) } }; let result = if cumulative { dist.cdf(x) } else { dist.pdf(x) }; if result.is_nan() || result.is_infinite() { return CalcResult::new_error( Error::NUM, cell, "Invalid result for CHISQ.DIST".to_string(), ); } CalcResult::Number(result) } // CHISQ.DIST.RT(x, deg_freedom) pub(crate) fn fn_chisq_dist_rt( &mut self, args: &[Node], cell: CellReferenceIndex, ) -> CalcResult { if args.len() != 2 { return CalcResult::new_args_number_error(cell); } let x = match self.get_number_no_bools(&args[0], cell) { Ok(f) => f, Err(e) => return e, }; let df_raw = match self.get_number_no_bools(&args[1], cell) { Ok(f) => f, Err(e) => return e, }; let df = df_raw.trunc(); if x < 0.0 { return CalcResult::new_error( Error::NUM, cell, "x must be >= 0 in CHISQ.DIST.RT".to_string(), ); } if df < 1.0 { return CalcResult::new_error( Error::NUM, cell, "degrees of freedom must be >= 1 in CHISQ.DIST.RT".to_string(), ); } let dist = match ChiSquared::new(df) { Ok(d) => d, Err(_) => { return CalcResult::new_error( Error::NUM, cell, "Invalid parameters for Chi-squared distribution".to_string(), ) } }; // Right-tail probability: P(X > x). // Use sf(x) directly for better numerical properties than 1 - cdf(x). let result = dist.sf(x); if result.is_nan() || result.is_infinite() || result < 0.0 { return CalcResult::new_error( Error::NUM, cell, "Invalid result for CHISQ.DIST.RT".to_string(), ); } CalcResult::Number(result) } // CHISQ.INV(probability, deg_freedom) pub(crate) fn fn_chisq_inv(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.len() != 2 { return CalcResult::new_args_number_error(cell); } let p = match self.get_number_no_bools(&args[0], cell) { Ok(f) => f, Err(e) => return e, }; let df = match self.get_number_no_bools(&args[1], cell) { Ok(f) => f.trunc(), Err(e) => return e, }; // if probability < 0 or > 1 → #NUM! if !(0.0..=1.0).contains(&p) { return CalcResult::new_error( Error::NUM, cell, "probability must be in [0,1] in CHISQ.INV".to_string(), ); } if df < 1.0 { return CalcResult::new_error( Error::NUM, cell, "degrees of freedom must be >= 1 in CHISQ.INV".to_string(), ); } let dist = match ChiSquared::new(df) { Ok(d) => d, Err(_) => { return CalcResult::new_error( Error::NUM, cell, "Invalid parameters for Chi-squared distribution".to_string(), ) } }; let x = dist.inverse_cdf(p); if x.is_nan() || x.is_infinite() || x < 0.0 { return CalcResult::new_error( Error::NUM, cell, "Invalid result for CHISQ.INV".to_string(), ); } CalcResult::Number(x) } // CHISQ.INV.RT(probability, deg_freedom) pub(crate) fn fn_chisq_inv_rt( &mut self, args: &[Node], cell: CellReferenceIndex, ) -> CalcResult { if args.len() != 2 { return CalcResult::new_args_number_error(cell); } let p = match self.get_number_no_bools(&args[0], cell) { Ok(f) => f, Err(e) => return e, }; let df_raw = match self.get_number_no_bools(&args[1], cell) { Ok(f) => f, Err(e) => return e, }; let df = df_raw.trunc(); // if probability < 0 or > 1 → #NUM! if !(0.0..=1.0).contains(&p) { return CalcResult::new_error( Error::NUM, cell, "probability must be in [0,1] in CHISQ.INV.RT".to_string(), ); } if df < 1.0 { return CalcResult::new_error( Error::NUM, cell, "degrees of freedom must be >= 1 in CHISQ.INV.RT".to_string(), ); } let dist = match ChiSquared::new(df) { Ok(d) => d, Err(_) => { return CalcResult::new_error( Error::NUM, cell, "Invalid parameters for Chi-squared distribution".to_string(), ) } }; // Right-tail inverse: p = P(X > x) = SF(x) = 1 - CDF(x) // So x = inverse_cdf(1 - p). let x = dist.inverse_cdf(1.0 - p); if x.is_nan() || x.is_infinite() || x < 0.0 { return CalcResult::new_error( Error::NUM, cell, "Invalid result for CHISQ.INV.RT".to_string(), ); } CalcResult::Number(x) } pub(crate) fn values_from_range( &mut self, left: CellReferenceIndex, right: CellReferenceIndex, ) -> Result>, CalcResult> { let mut values = Vec::new(); for row_offset in 0..=(right.row - left.row) { for col_offset in 0..=(right.column - left.column) { let cell_ref = CellReferenceIndex { sheet: left.sheet, row: left.row + row_offset, column: left.column + col_offset, }; let cell_value = self.evaluate_cell(cell_ref); match cell_value { CalcResult::Number(v) => { values.push(Some(v)); } error @ CalcResult::Error { .. } => return Err(error), _ => { values.push(None); } } } } Ok(values) } pub(crate) fn values_from_array( &mut self, array: Vec>, ) -> Result>, Error> { let mut values = Vec::new(); for row in array { for item in row { match item { ArrayNode::Number(f) => { values.push(Some(f)); } ArrayNode::Error(error) => { return Err(error); } _ => { values.push(None); } } } } Ok(values) } // CHISQ.TEST(actual_range, expected_range) pub(crate) fn fn_chisq_test(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { if args.len() != 2 { return CalcResult::new_args_number_error(cell); } let actual_range = self.evaluate_node_in_context(&args[0], cell); let expected_range = self.evaluate_node_in_context(&args[1], cell); let (width, height, values_left, values_right) = match (actual_range, expected_range) { ( CalcResult::Range { left: l1, right: r1, }, CalcResult::Range { left: l2, right: r2, }, ) => { if l1.sheet != l2.sheet { return CalcResult::new_error( Error::VALUE, cell, "Ranges are in different sheets".to_string(), ); } let rows1 = r1.row - l1.row + 1; let cols1 = r1.column - l1.column + 1; let rows2 = r2.row - l2.row + 1; let cols2 = r2.column - l2.column + 1; if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { return CalcResult::new_error( Error::VALUE, cell, "Ranges must be of the same shape".to_string(), ); } let values_left = match self.values_from_range(l1, r1) { Err(error) => { return error; } Ok(v) => v, }; let values_right = match self.values_from_range(l2, r2) { Err(error) => { return error; } Ok(v) => v, }; (rows1, cols1, values_left, values_right) } ( CalcResult::Array(left), CalcResult::Range { left: l2, right: r2, }, ) => { let rows2 = r2.row - l2.row + 1; let cols2 = r2.column - l2.column + 1; let rows1 = left.len() as i32; let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { return CalcResult::new_error( Error::VALUE, cell, "Array and range must be of the same shape".to_string(), ); } let values_left = match self.values_from_array(left) { Err(error) => { return CalcResult::new_error( Error::VALUE, cell, format!("Error in first array: {:?}", error), ); } Ok(v) => v, }; let values_right = match self.values_from_range(l2, r2) { Err(error) => { return error; } Ok(v) => v, }; (rows2, cols2, values_left, values_right) } ( CalcResult::Range { left: l1, right: r1, }, CalcResult::Array(right), ) => { let rows1 = r1.row - l1.row + 1; let cols1 = r1.column - l1.column + 1; let rows2 = right.len() as i32; let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { return CalcResult::new_error( Error::VALUE, cell, "Range and array must be of the same shape".to_string(), ); } let values_left = match self.values_from_range(l1, r1) { Err(error) => { return error; } Ok(v) => v, }; let values_right = match self.values_from_array(right) { Err(error) => { return CalcResult::new_error( Error::VALUE, cell, format!("Error in second array: {:?}", error), ); } Ok(v) => v, }; (rows1, cols1, values_left, values_right) } (CalcResult::Array(left), CalcResult::Array(right)) => { let rows1 = left.len() as i32; let rows2 = right.len() as i32; let cols1 = if rows1 > 0 { left[0].len() as i32 } else { 0 }; let cols2 = if rows2 > 0 { right[0].len() as i32 } else { 0 }; if !is_same_shape_or_1d(rows1, cols1, rows2, cols2) { return CalcResult::new_error( Error::VALUE, cell, "Arrays must be of the same shape".to_string(), ); } let values_left = match self.values_from_array(left) { Err(error) => { return CalcResult::new_error( Error::VALUE, cell, format!("Error in first array: {:?}", error), ); } Ok(v) => v, }; let values_right = match self.values_from_array(right) { Err(error) => { return CalcResult::new_error( Error::VALUE, cell, format!("Error in second array: {:?}", error), ); } Ok(v) => v, }; (rows1, cols1, values_left, values_right) } _ => { return CalcResult::new_error( Error::VALUE, cell, "Both arguments must be ranges or arrays".to_string(), ); } }; let mut values = Vec::with_capacity(values_left.len()); // Now we have: // - values: flattened (observed, expected) // - width, height: shape for i in 0..values_left.len() { match (values_left[i], values_right[i]) { (Some(v1), Some(v2)) => { values.push((v1, v2)); } _ => { values.push((1.0, 1.0)); } } } if width == 0 || height == 0 || values.len() < 2 { return CalcResult::new_error( Error::NUM, cell, "CHISQ.TEST requires at least two data points".to_string(), ); } let mut chi2 = 0.0; for (obs, exp) in &values { if *obs < 0.0 || *exp < 0.0 { return CalcResult::new_error( Error::NUM, cell, "Negative value in CHISQ.TEST data".to_string(), ); } if *exp == 0.0 { return CalcResult::new_error( Error::DIV, cell, "Zero expected value in CHISQ.TEST".to_string(), ); } let diff = obs - exp; chi2 += (diff * diff) / exp; } if chi2 < 0.0 && chi2 > -1e-12 { chi2 = 0.0; } let total = width * height; if total <= 1 { return CalcResult::new_error( Error::NUM, cell, "CHISQ.TEST degrees of freedom is zero".to_string(), ); } let df = if width > 1 && height > 1 { (width - 1) * (height - 1) } else { total - 1 }; let dist = match ChiSquared::new(df as f64) { Ok(d) => d, Err(_) => { return CalcResult::new_error( Error::NUM, cell, "Invalid degrees of freedom in CHISQ.TEST".to_string(), ); } }; let mut p = 1.0 - dist.cdf(chi2); // clamp tiny fp noise if p < 0.0 && p > -1e-15 { p = 0.0; } if p > 1.0 && p < 1.0 + 1e-15 { p = 1.0; } CalcResult::Number(p) } }