UPDATE: Implements CORREL, SLOPE, INTERCEPT, RSQ and STEYX

These are all functions that follow a very simmilar path code
This commit is contained in:
Nicolás Hatcher
2025-11-26 21:45:38 +01:00
committed by Nicolás Hatcher Andrés
parent 01b19b9c35
commit 8597d14a4e
6 changed files with 312 additions and 1 deletions

View File

@@ -990,6 +990,11 @@ fn get_function_args_signature(kind: &Function, arg_count: usize) -> Vec<Signatu
Function::Sumx2my2 => vec![Signature::Vector; 2],
Function::Sumx2py2 => vec![Signature::Vector; 2],
Function::Sumxmy2 => vec![Signature::Vector; 2],
Function::Correl => vec![Signature::Vector; 2],
Function::Rsq => vec![Signature::Vector; 2],
Function::Intercept => vec![Signature::Vector; 2],
Function::Slope => vec![Signature::Vector; 2],
Function::Steyx => vec![Signature::Vector; 2],
}
}
@@ -1324,5 +1329,10 @@ fn static_analysis_on_function(kind: &Function, args: &[Node]) -> StaticResult {
Function::Sumx2my2 => StaticResult::Scalar,
Function::Sumx2py2 => StaticResult::Scalar,
Function::Sumxmy2 => StaticResult::Scalar,
Function::Correl => StaticResult::Scalar,
Function::Rsq => StaticResult::Scalar,
Function::Intercept => StaticResult::Scalar,
Function::Slope => StaticResult::Scalar,
Function::Steyx => StaticResult::Scalar,
}
}

View File

@@ -421,10 +421,16 @@ pub enum Function {
Dvar,
Dvarp,
Dstdevp,
Correl,
Rsq,
Intercept,
Slope,
Steyx,
}
impl Function {
pub fn into_iter() -> IntoIter<Function, 328> {
pub fn into_iter() -> IntoIter<Function, 333> {
[
Function::And,
Function::False,
@@ -754,6 +760,11 @@ impl Function {
Function::VarA,
Function::WeibullDist,
Function::ZTest,
Function::Correl,
Function::Rsq,
Function::Intercept,
Function::Slope,
Function::Steyx,
]
.into_iter()
}
@@ -1234,6 +1245,11 @@ impl Function {
"SUMX2MY2" => Some(Function::Sumx2my2),
"SUMX2PY2" => Some(Function::Sumx2py2),
"SUMXMY2" => Some(Function::Sumxmy2),
"CORREL" => Some(Function::Correl),
"RSQ" => Some(Function::Rsq),
"INTERCEPT" => Some(Function::Intercept),
"SLOPE" => Some(Function::Slope),
"STEYX" => Some(Function::Steyx),
_ => None,
}
@@ -1573,6 +1589,11 @@ impl fmt::Display for Function {
Function::Sumx2my2 => write!(f, "SUMX2MY2"),
Function::Sumx2py2 => write!(f, "SUMX2PY2"),
Function::Sumxmy2 => write!(f, "SUMXMY2"),
Function::Correl => write!(f, "CORREL"),
Function::Rsq => write!(f, "RSQ"),
Function::Intercept => write!(f, "INTERCEPT"),
Function::Slope => write!(f, "SLOPE"),
Function::Steyx => write!(f, "STEYX"),
}
}
}
@@ -1929,6 +1950,11 @@ impl Model {
Function::Sumx2my2 => self.fn_sumx2my2(args, cell),
Function::Sumx2py2 => self.fn_sumx2py2(args, cell),
Function::Sumxmy2 => self.fn_sumxmy2(args, cell),
Function::Correl => self.fn_correl(args, cell),
Function::Rsq => self.fn_rsq(args, cell),
Function::Intercept => self.fn_intercept(args, cell),
Function::Slope => self.fn_slope(args, cell),
Function::Steyx => self.fn_steyx(args, cell),
}
}
}

View File

@@ -0,0 +1,227 @@
use crate::expressions::types::CellReferenceIndex;
use crate::{
calc_result::CalcResult, expressions::parser::Node, expressions::token::Error, model::Model,
};
impl Model {
// CORREL(array1, array2) - Returns the correlation coefficient of two data sets
pub(crate) fn fn_correl(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult {
let (_, _, values_left, values_right) = match self.fn_get_two_matrices(args, cell) {
Ok(s) => s,
Err(e) => return e,
};
let mut n = 0.0;
let mut sum_x = 0.0;
let mut sum_y = 0.0;
let mut sum_x2 = 0.0;
let mut sum_y2 = 0.0;
let mut sum_xy = 0.0;
for (x_opt, y_opt) in values_left.into_iter().zip(values_right.into_iter()) {
if let (Some(x), Some(y)) = (x_opt, y_opt) {
n += 1.0;
sum_x += x;
sum_y += y;
sum_x2 += x * x;
sum_y2 += y * y;
sum_xy += x * y;
}
}
// Need at least 2 valid pairs
if n < 2.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"CORREL requires at least two numeric data points in each range".to_string(),
);
}
let num = n * sum_xy - sum_x * sum_y;
let denom_x = n * sum_x2 - sum_x * sum_x;
let denom_y = n * sum_y2 - sum_y * sum_y;
let denom = (denom_x * denom_y).sqrt();
if denom == 0.0 || !denom.is_finite() {
return CalcResult::new_error(
Error::DIV,
cell,
"Division by zero in CORREL".to_string(),
);
}
let r = num / denom;
CalcResult::Number(r)
}
// SLOPE(known_y's, known_x's) - Returns the slope of the linear regression line
pub(crate) fn fn_slope(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult {
let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) {
Ok(s) => s,
Err(e) => return e,
};
let mut n = 0.0;
let mut sum_x = 0.0;
let mut sum_y = 0.0;
let mut sum_x2 = 0.0;
let mut sum_xy = 0.0;
let len = values_y.len().min(values_x.len());
for i in 0..len {
if let (Some(y), Some(x)) = (values_y[i], values_x[i]) {
n += 1.0;
sum_x += x;
sum_y += y;
sum_x2 += x * x;
sum_xy += x * y;
}
}
if n < 2.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"SLOPE requires at least two numeric data points".to_string(),
);
}
let denom = n * sum_x2 - sum_x * sum_x;
if denom == 0.0 || !denom.is_finite() {
return CalcResult::new_error(
Error::DIV,
cell,
"Division by zero in SLOPE".to_string(),
);
}
let num = n * sum_xy - sum_x * sum_y;
let slope = num / denom;
CalcResult::Number(slope)
}
// INTERCEPT(known_y's, known_x's) - Returns the y-intercept of the linear regression line
pub(crate) fn fn_intercept(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult {
let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) {
Ok(s) => s,
Err(e) => return e,
};
let mut n = 0.0;
let mut sum_x = 0.0;
let mut sum_y = 0.0;
let mut sum_x2 = 0.0;
let mut sum_xy = 0.0;
let len = values_y.len().min(values_x.len());
for i in 0..len {
if let (Some(y), Some(x)) = (values_y[i], values_x[i]) {
n += 1.0;
sum_x += x;
sum_y += y;
sum_x2 += x * x;
sum_xy += x * y;
}
}
if n < 2.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"INTERCEPT requires at least two numeric data points".to_string(),
);
}
let denom = n * sum_x2 - sum_x * sum_x;
if denom == 0.0 || !denom.is_finite() {
return CalcResult::new_error(
Error::DIV,
cell,
"Division by zero in INTERCEPT".to_string(),
);
}
let num = n * sum_xy - sum_x * sum_y;
let slope = num / denom;
let intercept = (sum_y - slope * sum_x) / n;
CalcResult::Number(intercept)
}
// STEYX(known_y's, known_x's) - Returns the standard error of the predicted y-values
pub(crate) fn fn_steyx(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult {
let (_rows, _cols, values_y, values_x) = match self.fn_get_two_matrices(args, cell) {
Ok(s) => s,
Err(e) => return e,
};
let mut n = 0.0;
let mut sum_x = 0.0;
let mut sum_y = 0.0;
let mut sum_x2 = 0.0;
let mut sum_xy = 0.0;
// We need the actual pairs again later for residuals
let mut pairs: Vec<(f64, f64)> = Vec::new();
let len = values_y.len().min(values_x.len());
for i in 0..len {
if let (Some(y), Some(x)) = (values_y[i], values_x[i]) {
n += 1.0;
sum_x += x;
sum_y += y;
sum_x2 += x * x;
sum_xy += x * y;
pairs.push((x, y));
}
}
// Need at least 3 points for STEYX (n - 2 in denominator)
if n < 3.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"STEYX requires at least three numeric data points".to_string(),
);
}
let denom = n * sum_x2 - sum_x * sum_x;
if denom == 0.0 || !denom.is_finite() {
return CalcResult::new_error(
Error::DIV,
cell,
"Division by zero in STEYX".to_string(),
);
}
let num = n * sum_xy - sum_x * sum_y;
let slope = num / denom;
let intercept = (sum_y - slope * sum_x) / n;
// Sum of squared residuals: Σ (y - ŷ)^2, ŷ = intercept + slope * x
let mut sse = 0.0;
for (x, y) in pairs {
let y_hat = intercept + slope * x;
let diff = y - y_hat;
sse += diff * diff;
}
let dof = n - 2.0;
if dof <= 0.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"STEYX has non-positive degrees of freedom".to_string(),
);
}
let sey = (sse / dof).sqrt();
if !sey.is_finite() {
return CalcResult::new_error(Error::DIV, cell, "Numerical error in STEYX".to_string());
}
CalcResult::Number(sey)
}
}

View File

@@ -1,6 +1,7 @@
mod beta;
mod binom;
mod chisq;
mod correl;
mod count_and_average;
mod covariance;
mod devsq;

View File

@@ -63,4 +63,51 @@ impl Model {
CalcResult::Number(num / denom)
}
// RSQ(array1, array2) = CORREL(array1, array2)^2
pub(crate) fn fn_rsq(&mut self, args: &[Node], cell: CellReferenceIndex) -> CalcResult {
let (_rows, _cols, values1, values2) = match self.fn_get_two_matrices(args, cell) {
Ok(s) => s,
Err(e) => return e,
};
let mut n = 0.0_f64;
let mut sum_x = 0.0_f64;
let mut sum_y = 0.0_f64;
let mut sum_x2 = 0.0_f64;
let mut sum_y2 = 0.0_f64;
let mut sum_xy = 0.0_f64;
let len = values1.len().min(values2.len());
for i in 0..len {
if let (Some(x), Some(y)) = (values1[i], values2[i]) {
n += 1.0;
sum_x += x;
sum_y += y;
sum_x2 += x * x;
sum_y2 += y * y;
sum_xy += x * y;
}
}
if n < 2.0 {
return CalcResult::new_error(
Error::DIV,
cell,
"RSQ requires at least two numeric data points in each range".to_string(),
);
}
let num = n * sum_xy - sum_x * sum_y;
let denom_x = n * sum_x2 - sum_x * sum_x;
let denom_y = n * sum_y2 - sum_y * sum_y;
let denom = (denom_x * denom_y).sqrt();
if denom == 0.0 || !denom.is_finite() {
return CalcResult::new_error(Error::DIV, cell, "Division by zero in RSQ".to_string());
}
let r = num / denom;
CalcResult::Number(r * r)
}
}