diff --git a/base/src/expressions/parser/static_analysis.rs b/base/src/expressions/parser/static_analysis.rs index eb24277..e3e5ef1 100644 --- a/base/src/expressions/parser/static_analysis.rs +++ b/base/src/expressions/parser/static_analysis.rs @@ -990,6 +990,11 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec vec![Signature::Vector; 2], Function::Sumx2py2 => vec![Signature::Vector; 2], Function::Sumxmy2 => vec![Signature::Vector; 2], + Function::Correl => vec![Signature::Vector; 2], + Function::Rsq => vec![Signature::Vector; 2], + Function::Intercept => vec![Signature::Vector; 2], + Function::Slope => vec![Signature::Vector; 2], + Function::Steyx => vec![Signature::Vector; 2], } } @@ -1324,5 +1329,10 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult { Function::Sumx2my2 => StaticResult::Scalar, Function::Sumx2py2 => StaticResult::Scalar, Function::Sumxmy2 => StaticResult::Scalar, + Function::Correl => StaticResult::Scalar, + Function::Rsq => StaticResult::Scalar, + Function::Intercept => StaticResult::Scalar, + Function::Slope => StaticResult::Scalar, + Function::Steyx => StaticResult::Scalar, } } diff --git a/base/src/functions/mod.rs b/base/src/functions/mod.rs index a522330..a66f7d9 100644 --- a/base/src/functions/mod.rs +++ b/base/src/functions/mod.rs @@ -421,10 +421,16 @@ pub enum Function { Dvar, Dvarp, Dstdevp, + + Correl, + Rsq, + Intercept, + Slope, + Steyx, } impl Function { - pub fn into_iter() -> IntoIter { + pub fn into_iter() -> IntoIter { [ Function::And, Function::False, @@ -754,6 +760,11 @@ impl Function { Function::VarA, Function::WeibullDist, Function::ZTest, + Function::Correl, + Function::Rsq, + Function::Intercept, + Function::Slope, + Function::Steyx, ] .into_iter() } @@ -1234,6 +1245,11 @@ impl Function { "SUMX2MY2" => Some(Function::Sumx2my2), "SUMX2PY2" => Some(Function::Sumx2py2), "SUMXMY2" => Some(Function::Sumxmy2), + "CORREL" => Some(Function::Correl), + "RSQ" => Some(Function::Rsq), + "INTERCEPT" => Some(Function::Intercept), + "SLOPE" => Some(Function::Slope), + "STEYX" => Some(Function::Steyx), _ => None, } @@ -1573,6 +1589,11 @@ impl fmt::Display for Function { Function::Sumx2my2 => write!(f, "SUMX2MY2"), Function::Sumx2py2 => write!(f, "SUMX2PY2"), Function::Sumxmy2 => write!(f, "SUMXMY2"), + Function::Correl => write!(f, "CORREL"), + Function::Rsq => write!(f, "RSQ"), + Function::Intercept => write!(f, "INTERCEPT"), + Function::Slope => write!(f, "SLOPE"), + Function::Steyx => write!(f, "STEYX"), } } } @@ -1929,6 +1950,11 @@ impl Model { Function::Sumx2my2 => self.fn_sumx2my2(args, cell), Function::Sumx2py2 => self.fn_sumx2py2(args, cell), Function::Sumxmy2 => self.fn_sumxmy2(args, cell), + Function::Correl => self.fn_correl(args, cell), + Function::Rsq => self.fn_rsq(args, cell), + Function::Intercept => self.fn_intercept(args, cell), + Function::Slope => self.fn_slope(args, cell), + Function::Steyx => self.fn_steyx(args, cell), } } } diff --git a/base/src/functions/statistical/correl.rs b/base/src/functions/statistical/correl.rs new file mode 100644 index 0000000..44aa456 --- /dev/null +++ b/base/src/functions/statistical/correl.rs @@ -0,0 +1,227 @@ +use crate::expressions::types::CellReferenceIndex; +use crate::{ + calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model, +}; + +impl Model { + // CORREL(array1, array2) - Returns the correlation coefficient of two data sets + pub(crate) fn fn_correl(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let (_, _, values_left, values_right) = match self.fn_get_two_matrices(args, cell) { + Ok(s) => s, + Err(e) => return e, + }; + + let mut n = 0.0; + let mut sum_x = 0.0; + let mut sum_y = 0.0; + let mut sum_x2 = 0.0; + let mut sum_y2 = 0.0; + let mut sum_xy = 0.0; + + for (x_opt, y_opt) in values_left.into_iter().zip(values_right.into_iter()) { + if let (Some(x), Some(y)) = (x_opt, y_opt) { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_y2 += y * y; + sum_xy += x * y; + } + } + + // Need at least 2 valid pairs + if n < 2.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "CORREL requires at least two numeric data points in each range".to_string(), + ); + } + + let num = n * sum_xy - sum_x * sum_y; + let denom_x = n * sum_x2 - sum_x * sum_x; + let denom_y = n * sum_y2 - sum_y * sum_y; + let denom = (denom_x * denom_y).sqrt(); + + if denom == 0.0 || !denom.is_finite() { + return CalcResult::new_error( + Error::DIV, + cell, + "Division by zero in CORREL".to_string(), + ); + } + + let r = num / denom; + CalcResult::Number(r) + } + + // SLOPE(known_y's, known_x's) - Returns the slope of the linear regression line + pub(crate) fn fn_slope(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) { + Ok(s) => s, + Err(e) => return e, + }; + + let mut n = 0.0; + let mut sum_x = 0.0; + let mut sum_y = 0.0; + let mut sum_x2 = 0.0; + let mut sum_xy = 0.0; + + let len = values_y.len().min(values_x.len()); + for i in 0..len { + if let (Some(y), Some(x)) = (values_y[i], values_x[i]) { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_xy += x * y; + } + } + + if n < 2.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "SLOPE requires at least two numeric data points".to_string(), + ); + } + + let denom = n * sum_x2 - sum_x * sum_x; + if denom == 0.0 || !denom.is_finite() { + return CalcResult::new_error( + Error::DIV, + cell, + "Division by zero in SLOPE".to_string(), + ); + } + + let num = n * sum_xy - sum_x * sum_y; + let slope = num / denom; + + CalcResult::Number(slope) + } + + // INTERCEPT(known_y's, known_x's) - Returns the y-intercept of the linear regression line + pub(crate) fn fn_intercept(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) { + Ok(s) => s, + Err(e) => return e, + }; + + let mut n = 0.0; + let mut sum_x = 0.0; + let mut sum_y = 0.0; + let mut sum_x2 = 0.0; + let mut sum_xy = 0.0; + + let len = values_y.len().min(values_x.len()); + for i in 0..len { + if let (Some(y), Some(x)) = (values_y[i], values_x[i]) { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_xy += x * y; + } + } + + if n < 2.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "INTERCEPT requires at least two numeric data points".to_string(), + ); + } + + let denom = n * sum_x2 - sum_x * sum_x; + if denom == 0.0 || !denom.is_finite() { + return CalcResult::new_error( + Error::DIV, + cell, + "Division by zero in INTERCEPT".to_string(), + ); + } + + let num = n * sum_xy - sum_x * sum_y; + let slope = num / denom; + let intercept = (sum_y - slope * sum_x) / n; + + CalcResult::Number(intercept) + } + + // STEYX(known_y's, known_x's) - Returns the standard error of the predicted y-values + pub(crate) fn fn_steyx(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) { + Ok(s) => s, + Err(e) => return e, + }; + + let mut n = 0.0; + let mut sum_x = 0.0; + let mut sum_y = 0.0; + let mut sum_x2 = 0.0; + let mut sum_xy = 0.0; + + // We need the actual pairs again later for residuals + let mut pairs: Vec<(f64, f64)> = Vec::new(); + + let len = values_y.len().min(values_x.len()); + for i in 0..len { + if let (Some(y), Some(x)) = (values_y[i], values_x[i]) { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_xy += x * y; + pairs.push((x, y)); + } + } + + // Need at least 3 points for STEYX (n - 2 in denominator) + if n < 3.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "STEYX requires at least three numeric data points".to_string(), + ); + } + + let denom = n * sum_x2 - sum_x * sum_x; + if denom == 0.0 || !denom.is_finite() { + return CalcResult::new_error( + Error::DIV, + cell, + "Division by zero in STEYX".to_string(), + ); + } + + let num = n * sum_xy - sum_x * sum_y; + let slope = num / denom; + let intercept = (sum_y - slope * sum_x) / n; + + // Sum of squared residuals: Σ (y - ŷ)^2, ŷ = intercept + slope * x + let mut sse = 0.0; + for (x, y) in pairs { + let y_hat = intercept + slope * x; + let diff = y - y_hat; + sse += diff * diff; + } + + let dof = n - 2.0; + if dof <= 0.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "STEYX has non-positive degrees of freedom".to_string(), + ); + } + + let sey = (sse / dof).sqrt(); + if !sey.is_finite() { + return CalcResult::new_error(Error::DIV, cell, "Numerical error in STEYX".to_string()); + } + + CalcResult::Number(sey) + } +} diff --git a/base/src/functions/statistical/mod.rs b/base/src/functions/statistical/mod.rs index cb08021..6e31366 100644 --- a/base/src/functions/statistical/mod.rs +++ b/base/src/functions/statistical/mod.rs @@ -1,6 +1,7 @@ mod beta; mod binom; mod chisq; +mod correl; mod count_and_average; mod covariance; mod devsq; diff --git a/base/src/functions/statistical/pearson.rs b/base/src/functions/statistical/pearson.rs index a72da17..fd523e0 100644 --- a/base/src/functions/statistical/pearson.rs +++ b/base/src/functions/statistical/pearson.rs @@ -63,4 +63,51 @@ impl Model { CalcResult::Number(num / denom) } + + // RSQ(array1, array2) = CORREL(array1, array2)^2 + pub(crate) fn fn_rsq(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult { + let (_rows, _cols, values1, values2) = match self.fn_get_two_matrices(args, cell) { + Ok(s) => s, + Err(e) => return e, + }; + + let mut n = 0.0_f64; + let mut sum_x = 0.0_f64; + let mut sum_y = 0.0_f64; + let mut sum_x2 = 0.0_f64; + let mut sum_y2 = 0.0_f64; + let mut sum_xy = 0.0_f64; + + let len = values1.len().min(values2.len()); + for i in 0..len { + if let (Some(x), Some(y)) = (values1[i], values2[i]) { + n += 1.0; + sum_x += x; + sum_y += y; + sum_x2 += x * x; + sum_y2 += y * y; + sum_xy += x * y; + } + } + + if n < 2.0 { + return CalcResult::new_error( + Error::DIV, + cell, + "RSQ requires at least two numeric data points in each range".to_string(), + ); + } + + let num = n * sum_xy - sum_x * sum_y; + let denom_x = n * sum_x2 - sum_x * sum_x; + let denom_y = n * sum_y2 - sum_y * sum_y; + let denom = (denom_x * denom_y).sqrt(); + + if denom == 0.0 || !denom.is_finite() { + return CalcResult::new_error(Error::DIV, cell, "Division by zero in RSQ".to_string()); + } + + let r = num / denom; + CalcResult::Number(r * r) + } } diff --git a/xlsx/tests/statistical/CORREL_SLOPE_INTERCEPT_RSQ_STEYX.xlsx b/xlsx/tests/statistical/CORREL_SLOPE_INTERCEPT_RSQ_STEYX.xlsx new file mode 100644 index 0000000..7e745bb Binary files /dev/null and b/xlsx/tests/statistical/CORREL_SLOPE_INTERCEPT_RSQ_STEYX.xlsx differ