UPDATE: Dump of initial files

This commit is contained in:
Nicolás Hatcher
2023-11-18 21:26:18 +01:00
commit c5b8efd83d
279 changed files with 42654 additions and 0 deletions

257
xlsx/src/import/colors.rs Normal file
View File

@@ -0,0 +1,257 @@
use core::cmp::max;
use core::cmp::min;
// https://gist.github.com/emanuel-sanabria-developer/5793377
// https://github.com/ClosedXML/ClosedXML/wiki/Excel-Indexed-Colors
// Warning: Excel uses a weird normalization for HSL colors (0, 255)
// We use a more standard one but our HSL numbers will not coincide with Excel's
pub(crate) fn hex_to_rgb(h: &str) -> [i32; 3] {
let r = i32::from_str_radix(&h[1..3], 16).unwrap();
let g = i32::from_str_radix(&h[3..5], 16).unwrap();
let b = i32::from_str_radix(&h[5..7], 16).unwrap();
[r, g, b]
}
pub(crate) fn rgb_to_hex(rgb: [i32; 3]) -> String {
format!("#{:02X}{:02X}{:02X}", rgb[0], rgb[1], rgb[2])
}
pub(crate) fn rgb_to_hsl(rgb: [i32; 3]) -> [i32; 3] {
let r = rgb[0];
let g = rgb[1];
let b = rgb[2];
let red = r as f64 / 255.0;
let green = g as f64 / 255.0;
let blue = b as f64 / 255.0;
let max_color = max(max(r, g), b);
let min_color = min(min(r, g), b);
let chroma = (max_color - min_color) as f64 / 255.0;
if chroma == 0.0 {
return [0, 0, (red * 100.0).round() as i32];
}
let hue;
let luminosity = (max_color + min_color) as f64 / (255.0 * 2.0);
let saturation = if luminosity > 0.5 {
0.5 * chroma / (1.0 - luminosity)
} else {
0.5 * chroma / luminosity
};
if max_color == r {
if green >= blue {
hue = 60.0 * (green - blue) / chroma;
} else {
hue = ((green - blue) / chroma + 6.0) * 60.0;
}
} else if max_color == g {
hue = ((blue - red) / chroma + 2.0) * 60.0;
} else {
hue = ((red - green) / chroma + 4.0) * 60.0;
}
let hue = hue.round() as i32;
let saturation = (saturation * 100.0).round() as i32;
let luminosity = (luminosity * 100.0).round() as i32;
[hue, saturation, luminosity]
}
fn hue_to_rgb(p: f64, q: f64, t: f64) -> f64 {
let mut c = t;
if c < 0.0 {
c += 1.0;
}
if c > 1.0 {
c -= 1.0;
}
if c < 1.0 / 6.0 {
return p + (q - p) * 6.0 * t;
};
if c < 0.5 {
return q;
};
if c < 2.0 / 3.0 {
return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
};
p
}
pub(crate) fn hsl_to_rgb(hsl: [i32; 3]) -> [i32; 3] {
let hue = (hsl[0] as f64) / 360.0;
let saturation = (hsl[1] as f64) / 100.0;
let luminosity = (hsl[2] as f64) / 100.0;
let red;
let green;
let blue;
if saturation == 0.0 {
// achromatic
red = luminosity * 255.0;
green = luminosity * 255.0;
blue = luminosity * 255.0;
} else {
let q = if luminosity < 0.5 {
luminosity * (1.0 + saturation)
} else {
luminosity + saturation - luminosity * saturation
};
let p = 2.0 * luminosity - q;
red = 255.0 * hue_to_rgb(p, q, hue + 1.0 / 3.0);
green = 255.0 * hue_to_rgb(p, q, hue);
blue = 255.0 * hue_to_rgb(p, q, hue - 1.0 / 3.0);
}
[
red.round() as i32,
green.round() as i32,
blue.round() as i32,
]
}
/* 18.8.3 bgColor tint algorithm */
fn hex_with_tint_to_rgb(hex: &str, tint: f64) -> String {
if tint == 0.0 {
return hex.to_string();
}
let mut hsl = rgb_to_hsl(hex_to_rgb(hex));
let l = hsl[2] as f64;
if tint < 0.0 {
// Lum = Lum * (1.0 + tint)
hsl[2] = (l * (1.0 + tint)).round() as i32;
} else {
// HLSMAX here would be 100, for Excel 255
// Lum = Lum * (1.0-tint) + (HLSMAX HLSMAX * (1.0-tint))
hsl[2] = (l + (100.0 - l) * tint).round() as i32;
};
rgb_to_hex(hsl_to_rgb(hsl))
}
pub fn get_themed_color(theme: i32, tint: f64) -> String {
let color_theme = [
"#FFFFFF", "#000000", // "window",
"#E7E6E6", "#44546A", "#4472C4", "#ED7D31", "#A5A5A5", "#FFC000", "#5B9BD5", "#70AD47",
"#0563C1", "#954F72",
];
hex_with_tint_to_rgb(color_theme[theme as usize], tint)
}
pub fn get_indexed_color(index: i32) -> String {
let color_list = [
"#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF",
"#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF",
"#800000", "#008000", "#000080", "#808000", "#800080", "#008080", "#C0C0C0", "#808080",
"#9999FF", "#993366", "#FFFFCC", "#CCFFFF", "#660066", "#FF8080", "#0066CC", "#CCCCFF",
"#000080", "#FF00FF", "#FFFF00", "#00FFFF", "#800080", "#800000", "#008080", "#0000FF",
"#00CCFF", "#CCFFFF", "#CCFFCC", "#FFFF99", "#99CCFF", "#FF99CC", "#CC99FF", "#FFCC99",
"#3366FF", "#33CCCC", "#99CC00", "#FFCC00", "#FF9900", "#FF6600", "#666699", "#969696",
"#003366", "#339966", "#003300", "#333300", "#993300", "#993366", "#333399",
"#333333",
// 64, Transparent)
];
if index > 63 {
return color_list[0].to_string();
}
color_list[index as usize].to_string()
}
#[cfg(test)]
mod tests {
use crate::import::colors::*;
#[test]
fn test_known_colors() {
let color1 = get_themed_color(0, -0.05);
assert_eq!(color1, "#F2F2F2");
let color2 = get_themed_color(5, -0.25);
// Excel returns "#C65911" (rounding error)
assert_eq!(color2, "#C55911");
let color3 = get_themed_color(4, 0.6);
// Excel returns "#b4c6e7" (rounding error)
assert_eq!(color3, "#B5C8E8");
}
#[test]
fn test_rgb_hex() {
struct ColorTest {
hex: String,
rgb: [i32; 3],
hsl: [i32; 3],
}
let color_tests = [
ColorTest {
hex: "#FFFFFF".to_string(),
rgb: [255, 255, 255],
hsl: [0, 0, 100],
},
ColorTest {
hex: "#000000".to_string(),
rgb: [0, 0, 0],
hsl: [0, 0, 0],
},
ColorTest {
hex: "#44546A".to_string(),
rgb: [68, 84, 106],
hsl: [215, 22, 34],
},
ColorTest {
hex: "#E7E6E6".to_string(),
rgb: [231, 230, 230],
hsl: [0, 2, 90],
},
ColorTest {
hex: "#4472C4".to_string(),
rgb: [68, 114, 196],
hsl: [218, 52, 52],
},
ColorTest {
hex: "#ED7D31".to_string(),
rgb: [237, 125, 49],
hsl: [24, 84, 56],
},
ColorTest {
hex: "#A5A5A5".to_string(),
rgb: [165, 165, 165],
hsl: [0, 0, 65],
},
ColorTest {
hex: "#FFC000".to_string(),
rgb: [255, 192, 0],
hsl: [45, 100, 50],
},
ColorTest {
hex: "#5B9BD5".to_string(),
rgb: [91, 155, 213],
hsl: [209, 59, 60],
},
ColorTest {
hex: "#70AD47".to_string(),
rgb: [112, 173, 71],
hsl: [96, 42, 48],
},
ColorTest {
hex: "#0563C1".to_string(),
rgb: [5, 99, 193],
hsl: [210, 95, 39],
},
ColorTest {
hex: "#954F72".to_string(),
rgb: [149, 79, 114],
hsl: [330, 31, 45],
},
];
for color in color_tests.iter() {
let rgb = color.rgb;
let hsl = color.hsl;
assert_eq!(rgb, hex_to_rgb(&color.hex));
assert_eq!(hsl, rgb_to_hsl(rgb));
assert_eq!(rgb_to_hex(rgb), color.hex);
// The round trip has rounding errors
// FIXME: We could also hardcode the hsl21 in the testcase
let rgb2 = hsl_to_rgb(hsl);
let diff =
(rgb2[0] - rgb[0]).abs() + (rgb2[1] - rgb[1]).abs() + (rgb2[2] - rgb[2]).abs();
assert!(diff < 4);
}
}
}

View File

@@ -0,0 +1,81 @@
use std::io::Read;
use ironcalc_base::types::Metadata;
use crate::error::XlsxError;
use super::util::get_value_or_default;
struct AppData {
application: String,
app_version: String,
}
struct CoreData {
creator: String,
last_modified_by: String,
created: String,
last_modified: String,
}
fn load_core<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<CoreData, XlsxError> {
let mut file = archive.by_name("docProps/core.xml")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let core_data = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
// Note the namespace should be "http://purl.org/dc/elements/1.1/"
let creator = get_value_or_default(&core_data, "creator", "Anonymous User");
// Note namespace is "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
let last_modified_by = get_value_or_default(&core_data, "lastModifiedBy", "Anonymous User");
// In these two cases the namespace is "http://purl.org/dc/terms/"
let created = get_value_or_default(&core_data, "created", "");
let last_modified = get_value_or_default(&core_data, "modified", "");
Ok(CoreData {
creator,
last_modified_by,
created,
last_modified,
})
}
fn load_app<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<AppData, XlsxError> {
let mut file = archive.by_name("docProps/app.xml")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let app_data = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
let application = get_value_or_default(&app_data, "Application", "Unknown application");
let app_version = get_value_or_default(&app_data, "AppVersion", "");
Ok(AppData {
application,
app_version,
})
}
pub(super) fn load_metadata<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<Metadata, XlsxError> {
let app_data = load_app(archive)?;
let core_data = load_core(archive)?;
Ok(Metadata {
application: app_data.application,
app_version: app_data.app_version,
creator: core_data.creator,
last_modified_by: core_data.last_modified_by,
created: core_data.created,
last_modified: core_data.last_modified,
})
}

124
xlsx/src/import/mod.rs Normal file
View File

@@ -0,0 +1,124 @@
mod colors;
mod metadata;
mod shared_strings;
mod styles;
mod tables;
mod util;
mod workbook;
mod worksheets;
use std::{
collections::HashMap,
fs,
io::{BufReader, Read},
};
use roxmltree::Node;
use ironcalc_base::{
model::Model,
types::{Metadata, Workbook, WorkbookSettings},
};
use crate::error::XlsxError;
use shared_strings::read_shared_strings;
use metadata::load_metadata;
use styles::load_styles;
use util::get_attribute;
use workbook::load_workbook;
use worksheets::{load_sheets, Relationship};
fn load_relationships<R: Read + std::io::Seek>(
archive: &mut zip::ZipArchive<R>,
) -> Result<HashMap<String, Relationship>, XlsxError> {
let mut file = archive.by_name("xl/_rels/workbook.xml.rels")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let nodes: Vec<Node> = doc
.descendants()
.filter(|n| n.has_tag_name("Relationship"))
.collect();
let mut rels = HashMap::new();
for node in nodes {
rels.insert(
get_attribute(&node, "Id")?.to_string(),
Relationship {
rel_type: get_attribute(&node, "Type")?.to_string(),
target: get_attribute(&node, "Target")?.to_string(),
},
);
}
Ok(rels)
}
fn load_xlsx_from_reader<R: Read + std::io::Seek>(
name: String,
reader: R,
locale: &str,
tz: &str,
) -> Result<Workbook, XlsxError> {
let mut archive = zip::ZipArchive::new(reader)?;
let mut shared_strings = read_shared_strings(&mut archive)?;
let workbook = load_workbook(&mut archive)?;
let rels = load_relationships(&mut archive)?;
let mut tables = HashMap::new();
let worksheets = load_sheets(
&mut archive,
&rels,
&workbook,
&mut tables,
&mut shared_strings,
)?;
let styles = load_styles(&mut archive)?;
let metadata = match load_metadata(&mut archive) {
Ok(metadata) => metadata,
Err(_) => {
// In case there is no metadata, add some
Metadata {
application: "Unknown application".to_string(),
app_version: "".to_string(),
creator: "".to_string(),
last_modified_by: "".to_string(),
created: "".to_string(),
last_modified: "".to_string(),
}
}
};
Ok(Workbook {
shared_strings,
defined_names: workbook.defined_names,
worksheets,
styles,
name,
settings: WorkbookSettings {
tz: tz.to_string(),
locale: locale.to_string(),
},
metadata,
tables,
})
}
// Public methods
/// Imports a file from disk into an internal representation
pub fn load_from_excel(file_name: &str, locale: &str, tz: &str) -> Result<Workbook, XlsxError> {
let file_path = std::path::Path::new(file_name);
let file = fs::File::open(file_path)?;
let reader = BufReader::new(file);
let name = file_path
.file_stem()
.ok_or_else(|| XlsxError::IO("Could not extract workbook name".to_string()))?
.to_string_lossy()
.to_string();
load_xlsx_from_reader(name, reader, locale, tz)
}
pub fn load_model_from_xlsx(file_name: &str, locale: &str, tz: &str) -> Result<Model, XlsxError> {
let workbook = load_from_excel(file_name, locale, tz)?;
Model::from_workbook(workbook).map_err(XlsxError::Workbook)
}

View File

@@ -0,0 +1,80 @@
use std::io::Read;
use roxmltree::Node;
use crate::error::XlsxError;
/// Reads the list of shared strings in an Excel workbook
/// Note than in IronCalc we lose _internal_ styling of a string
/// See Section 18.4
pub(crate) fn read_shared_strings<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<Vec<String>, XlsxError> {
match archive.by_name("xl/sharedStrings.xml") {
Ok(mut file) => {
let mut text = String::new();
file.read_to_string(&mut text)?;
read_shared_strings_from_string(&text)
}
Err(_e) => Ok(Vec::new()),
}
}
fn read_shared_strings_from_string(text: &str) -> Result<Vec<String>, XlsxError> {
let doc = roxmltree::Document::parse(text)?;
let mut shared_strings = Vec::new();
let nodes: Vec<Node> = doc.descendants().filter(|n| n.has_tag_name("si")).collect();
for node in nodes {
let text = node
.descendants()
.filter(|n| n.has_tag_name("t"))
.map(|n| n.text().unwrap_or("").to_string())
.collect::<Vec<String>>()
.join("");
shared_strings.push(text);
}
Ok(shared_strings)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_shared_strings() {
let xml_string = r#"
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="3" uniqueCount="3">
<si>
<t>A string</t>
</si>
<si>
<t>A second String</t>
</si>
<si>
<r>
<t>Hello</t>
</r>
<r>
<rPr>
<b/>
<sz val="11"/>
<color rgb="FFFF0000"/>
<rFont val="Calibri"/>
<family val="2"/>
<scheme val="minor"/>
</rPr>
<t xml:space="preserve"> World</t>
</r>
</si>
</sst>"#;
let shared_strings = read_shared_strings_from_string(xml_string.trim()).unwrap();
assert_eq!(
shared_strings,
[
"A string".to_string(),
"A second String".to_string(),
"Hello World".to_string()
]
);
}
}

386
xlsx/src/import/styles.rs Normal file
View File

@@ -0,0 +1,386 @@
use std::{collections::HashMap, io::Read};
use ironcalc_base::types::{
Alignment, Border, BorderItem, BorderStyle, CellStyleXfs, CellStyles, CellXfs, Fill, Font,
FontScheme, HorizontalAlignment, NumFmt, Styles, VerticalAlignment,
};
use roxmltree::Node;
use crate::error::XlsxError;
use super::util::{get_attribute, get_bool, get_bool_false, get_color, get_number};
fn get_border(node: Node, name: &str) -> Result<Option<BorderItem>, XlsxError> {
let style;
let color;
let border_nodes = node
.children()
.filter(|n| n.has_tag_name(name))
.collect::<Vec<Node>>();
if border_nodes.len() == 1 {
let border = border_nodes[0];
style = match border.attribute("style") {
Some("thin") => BorderStyle::Thin,
Some("medium") => BorderStyle::Medium,
Some("thick") => BorderStyle::Thick,
Some("double") => BorderStyle::Double,
Some("slantdashdot") => BorderStyle::SlantDashDot,
Some("mediumdashed") => BorderStyle::MediumDashed,
Some("mediumdashdot") => BorderStyle::MediumDashDot,
Some("mediumdashdotdot") => BorderStyle::MediumDashDotDot,
// TODO: Should we fail in this case or set the border to None?
Some(_) => BorderStyle::Thin,
None => {
return Ok(None);
}
};
let color_node = border
.children()
.filter(|n| n.has_tag_name("color"))
.collect::<Vec<Node>>();
if color_node.len() == 1 {
color = get_color(color_node[0])?;
} else {
color = None;
}
} else {
return Ok(None);
}
Ok(Some(BorderItem { style, color }))
}
pub(super) fn load_styles<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<Styles, XlsxError> {
let mut file = archive.by_name("xl/styles.xml")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let style_sheet = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
let mut num_fmts = Vec::new();
let num_fmts_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("numFmts"))
.collect::<Vec<Node>>();
if num_fmts_nodes.len() == 1 {
for num_fmt in num_fmts_nodes[0].children() {
let num_fmt_id = get_number(num_fmt, "numFmtId");
let format_code = num_fmt.attribute("formatCode").unwrap_or("").to_string();
num_fmts.push(NumFmt {
num_fmt_id,
format_code,
});
}
}
let mut fonts = Vec::new();
let font_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("fonts"))
.collect::<Vec<Node>>()[0];
for font in font_nodes.children() {
let mut sz = 11;
let mut name = "Calibri".to_string();
// NOTE: In Excel you can have simple underline or double underline
// In IronCalc convert double underline to simple
// This in excel is u with a value of "double"
let mut u = false;
let mut b = false;
let mut i = false;
let mut strike = false;
let mut color = Some("FFFFFF00".to_string());
let mut family = 2;
let mut scheme = FontScheme::default();
for feature in font.children() {
match feature.tag_name().name() {
"sz" => {
sz = feature
.attribute("val")
.unwrap_or("11")
.parse::<i32>()
.unwrap_or(11);
}
"color" => {
color = get_color(feature)?;
}
"u" => {
u = true;
}
"b" => {
b = true;
}
"i" => {
i = true;
}
"strike" => {
strike = true;
}
"name" => name = feature.attribute("val").unwrap_or("Calibri").to_string(),
// If there is a theme the font scheme and family overrides other properties like the name
"family" => {
family = feature
.attribute("val")
.unwrap_or("2")
.parse::<i32>()
.unwrap_or(2);
}
"scheme" => {
scheme = match feature.attribute("val") {
None => FontScheme::default(),
Some("minor") => FontScheme::Minor,
Some("major") => FontScheme::Major,
Some("none") => FontScheme::None,
// TODO: Should we fail?
Some(_) => FontScheme::default(),
}
}
"charset" => {}
_ => {
println!("Unexpected feature {:?}", feature);
}
}
}
fonts.push(Font {
strike,
u,
b,
i,
sz,
color,
name,
family,
scheme,
});
}
let mut fills = Vec::new();
let fill_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("fills"))
.collect::<Vec<Node>>()[0];
for fill in fill_nodes.children() {
let pattern_fill = fill
.children()
.filter(|n| n.has_tag_name("patternFill"))
.collect::<Vec<Node>>();
if pattern_fill.len() != 1 {
// safety belt
// Some fills do not have a patternFill, but they have gradientFill
fills.push(Fill {
pattern_type: "solid".to_string(),
fg_color: None,
bg_color: None,
});
continue;
}
let pattern_fill = pattern_fill[0];
let pattern_type = pattern_fill
.attribute("patternType")
.unwrap_or("none")
.to_string();
let mut fg_color = None;
let mut bg_color = None;
for feature in pattern_fill.children() {
match feature.tag_name().name() {
"fgColor" => {
fg_color = get_color(feature)?;
}
"bgColor" => {
bg_color = get_color(feature)?;
}
_ => {
println!("Unexpected pattern");
dbg!(feature);
}
}
}
fills.push(Fill {
pattern_type,
fg_color,
bg_color,
})
}
let mut borders = Vec::new();
let border_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("borders"))
.collect::<Vec<Node>>()[0];
for border in border_nodes.children() {
let diagonal_up = get_bool_false(border, "diagonal_up");
let diagonal_down = get_bool_false(border, "diagonal_down");
let left = get_border(border, "left")?;
let right = get_border(border, "right")?;
let top = get_border(border, "top")?;
let bottom = get_border(border, "bottom")?;
let diagonal = get_border(border, "diagonal")?;
borders.push(Border {
diagonal_up,
diagonal_down,
left,
right,
top,
bottom,
diagonal,
});
}
let mut cell_style_xfs = Vec::new();
let cell_style_xfs_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("cellStyleXfs"))
.collect::<Vec<Node>>()[0];
for xfs in cell_style_xfs_nodes.children() {
let num_fmt_id = get_number(xfs, "numFmtId");
let font_id = get_number(xfs, "fontId");
let fill_id = get_number(xfs, "fillId");
let border_id = get_number(xfs, "borderId");
let apply_number_format = get_bool(xfs, "applyNumberFormat");
let apply_border = get_bool(xfs, "applyBorder");
let apply_alignment = get_bool(xfs, "applyAlignment");
let apply_protection = get_bool(xfs, "applyProtection");
let apply_font = get_bool(xfs, "applyFont");
let apply_fill = get_bool(xfs, "applyFill");
cell_style_xfs.push(CellStyleXfs {
num_fmt_id,
font_id,
fill_id,
border_id,
apply_number_format,
apply_border,
apply_alignment,
apply_protection,
apply_font,
apply_fill,
});
}
let mut cell_styles = Vec::new();
let mut style_names = HashMap::new();
let cell_style_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("cellStyles"))
.collect::<Vec<Node>>()[0];
for cell_style in cell_style_nodes.children() {
let name = get_attribute(&cell_style, "name")?.to_string();
let xf_id = get_number(cell_style, "xfId");
let builtin_id = get_number(cell_style, "builtinId");
style_names.insert(xf_id, name.clone());
cell_styles.push(CellStyles {
name,
xf_id,
builtin_id,
})
}
let mut cell_xfs = Vec::new();
let cell_xfs_nodes = style_sheet
.children()
.filter(|n| n.has_tag_name("cellXfs"))
.collect::<Vec<Node>>()[0];
for xfs in cell_xfs_nodes.children() {
let xf_id = get_attribute(&xfs, "xfId")?.parse::<i32>()?;
let num_fmt_id = get_number(xfs, "numFmtId");
let font_id = get_number(xfs, "fontId");
let fill_id = get_number(xfs, "fillId");
let border_id = get_number(xfs, "borderId");
let apply_number_format = get_bool_false(xfs, "applyNumberFormat");
let apply_border = get_bool_false(xfs, "applyBorder");
let apply_alignment = get_bool_false(xfs, "applyAlignment");
let apply_protection = get_bool_false(xfs, "applyProtection");
let apply_font = get_bool_false(xfs, "applyFont");
let apply_fill = get_bool_false(xfs, "applyFill");
let quote_prefix = get_bool_false(xfs, "quotePrefix");
// TODO: Pivot Tables
// let pivotButton = get_bool(xfs, "pivotButton");
let alignment_nodes = xfs
.children()
.filter(|n| n.has_tag_name("alignment"))
.collect::<Vec<Node>>();
let alignment = if alignment_nodes.len() == 1 {
let alignment_node = alignment_nodes[0];
let wrap_text = get_bool_false(alignment_node, "wrapText");
let horizontal = match alignment_node.attribute("horizontal") {
Some("center") => HorizontalAlignment::Center,
Some("centerContinuous") => HorizontalAlignment::CenterContinuous,
Some("distributed") => HorizontalAlignment::Distributed,
Some("fill") => HorizontalAlignment::Fill,
Some("general") => HorizontalAlignment::General,
Some("justify") => HorizontalAlignment::Justify,
Some("left") => HorizontalAlignment::Left,
Some("right") => HorizontalAlignment::Right,
// TODO: Should we fail in this case or set the alignment to default?
Some(_) => HorizontalAlignment::default(),
None => HorizontalAlignment::default(),
};
let vertical = match alignment_node.attribute("vertical") {
Some("bottom") => VerticalAlignment::Bottom,
Some("center") => VerticalAlignment::Center,
Some("distributed") => VerticalAlignment::Distributed,
Some("justify") => VerticalAlignment::Justify,
Some("top") => VerticalAlignment::Top,
// TODO: Should we fail in this case or set the alignment to default?
Some(_) => VerticalAlignment::default(),
None => VerticalAlignment::default(),
};
Some(Alignment {
horizontal,
vertical,
wrap_text,
})
} else {
None
};
cell_xfs.push(CellXfs {
xf_id,
num_fmt_id,
font_id,
fill_id,
border_id,
apply_number_format,
apply_border,
apply_alignment,
apply_protection,
apply_font,
apply_fill,
quote_prefix,
alignment,
});
}
// TODO
// let mut dxfs = Vec::new();
// let mut tableStyles = Vec::new();
// let mut colors = Vec::new();
// <colors>
// <mruColors>
// <color rgb="FFB1BB4D"/>
// <color rgb="FFFF99CC"/>
// <color rgb="FF6C56DC"/>
// <color rgb="FFFF66CC"/>
// </mruColors>
// </colors>
Ok(Styles {
num_fmts,
fonts,
fills,
borders,
cell_style_xfs,
cell_xfs,
cell_styles,
})
}

215
xlsx/src/import/tables.rs Normal file
View File

@@ -0,0 +1,215 @@
use std::io::Read;
use ironcalc_base::types::{Table, TableColumn, TableStyleInfo};
use roxmltree::Node;
use crate::error::XlsxError;
use super::util::{get_bool, get_bool_false};
// <table name="Table" displayName="Table" totalsRowCount ref="A1:D6">
// <autoFilter ref="A1:D6">
// <filterColumn colId="0">
// <customFilters><customFilter operator="greaterThan" val=20></customFilter></customFilters>
// </filterColumn>
// </autoFilter>
// <tableColumns count="5">
// <tableColumn name="Monday" totalsRowFunction="sum" />
// ...
// </tableColumns>
// <tableStyleInfo name="TableStyle5"/>
// </table>
/// Reads a table in an Excel workbook
pub(crate) fn load_table<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
path: &str,
sheet_name: &str,
) -> Result<Table, XlsxError> {
let mut file = archive.by_name(path)?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let document = roxmltree::Document::parse(&text)?;
// table
let table = document
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
// Name and display name are normally the same and are unique in a workbook
// They also need to be different from any defined name
let name = table
.attribute("name")
.expect("Missing table name")
.to_string();
let display_name = table
.attribute("name")
.expect("Missing table display name")
.to_string();
// Range of the table, including the totals if any and headers.
let reference = table
.attribute("ref")
.expect("Missing table ref")
.to_string();
// Either 0 or 1, indicates if the table has a formula for totals at the bottom of the table
let totals_row_count = match table.attribute("totalsRowCount") {
Some(s) => s.parse::<u32>().expect("Invalid totalsRowCount"),
None => 0,
};
// Either 0 or 1, indicates if the table has headers at the top of the table
let header_row_count = match table.attribute("headerRowCount") {
Some(s) => s.parse::<u32>().expect("Invalid headerRowCount"),
None => 1,
};
// style index of the header row of the table
let header_row_dxf_id = if let Some(index_str) = table.attribute("headerRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// style index of the header row of the table
let data_dxf_id = if let Some(index_str) = table.attribute("headerRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// style index of the totals row of the table
let totals_row_dxf_id = if let Some(index_str) = table.attribute("totalsRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// Missing in Calc: styles can also be defined via a name:
// headerRowCellStyle, dataCellStyle, totalsRowCellStyle
// Missing in Calc: styles can also be applied to the borders:
// headerRowBorderDxfId, tableBorderDxfId, totalsRowBorderDxfId
// TODO: Conformant implementations should panic if header_row_dxf_id or data_dxf_id are out of bounds.
// Note that filters are non dynamic
// The only thing important for us is whether or not it has filters
let auto_filter = table
.descendants()
.filter(|n| n.has_tag_name("autoFilter"))
.collect::<Vec<Node>>();
let has_filters = if let Some(filter) = auto_filter.get(0) {
filter.children().count() > 0
} else {
false
};
// tableColumn
let table_column = table
.descendants()
.filter(|n| n.has_tag_name("tableColumn"))
.collect::<Vec<Node>>();
let mut columns = Vec::new();
for table_column in table_column {
let column_name = table_column.attribute("name").expect("Missing column name");
let id = table_column.attribute("id").expect("Missing column id");
let id = id.parse::<u32>().expect("Invalid id");
// style index of the header row of the table
let header_row_dxf_id = if let Some(index_str) = table_column.attribute("headerRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// style index of the header row of the table column
let data_dxf_id = if let Some(index_str) = table_column.attribute("headerRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// style index of the totals row of the table column
let totals_row_dxf_id = if let Some(index_str) = table_column.attribute("totalsRowDxfId") {
match index_str.parse::<u32>() {
Ok(i) => Some(i),
Err(_) => None,
}
} else {
None
};
// NOTE: Same as before, we should panic if indices to differential formatting records are out of bounds
// Missing in Calc: styles can also be defined via a name:
// headerRowCellStyle, dataCellStyle, totalsRowCellStyle
columns.push(TableColumn {
id,
name: column_name.to_string(),
totals_row_label: None,
header_row_dxf_id,
data_dxf_id,
totals_row_function: None,
totals_row_dxf_id,
});
}
// tableInfo
let table_info = table
.descendants()
.filter(|n| n.has_tag_name("tableInfo"))
.collect::<Vec<Node>>();
let style_info = match table_info.get(0) {
Some(node) => {
let name = node.attribute("name").map(|s| s.to_string());
TableStyleInfo {
name,
show_first_column: get_bool_false(*node, "showFirstColumn"),
show_last_column: get_bool_false(*node, "showLastColumn"),
show_row_stripes: get_bool(*node, "showRowStripes"),
show_column_stripes: get_bool_false(*node, "showColumnStripes"),
}
}
None => TableStyleInfo {
name: None,
show_first_column: false,
show_last_column: false,
show_row_stripes: true,
show_column_stripes: false,
},
};
Ok(Table {
name,
display_name,
reference,
totals_row_count,
header_row_count,
header_row_dxf_id,
data_dxf_id,
totals_row_dxf_id,
columns,
style_info,
has_filters,
sheet_name: sheet_name.to_string(),
})
}

78
xlsx/src/import/util.rs Normal file
View File

@@ -0,0 +1,78 @@
use colors::{get_indexed_color, get_themed_color};
use roxmltree::{ExpandedName, Node};
use crate::error::XlsxError;
use super::colors;
pub(crate) fn get_number(node: Node, s: &str) -> i32 {
node.attribute(s).unwrap_or("0").parse::<i32>().unwrap_or(0)
}
#[inline]
pub(super) fn get_attribute<'a, 'n, 'm, N>(
node: &'a Node,
attr_name: N,
) -> Result<&'a str, XlsxError>
where
N: Into<ExpandedName<'n, 'm>>,
{
let attr_name = attr_name.into();
node.attribute(attr_name)
.ok_or_else(|| XlsxError::Xml(format!("Missing \"{:?}\" XML attribute", attr_name)))
}
pub(super) fn get_value_or_default(node: &Node, tag_name: &str, default: &str) -> String {
let application_nodes = node
.children()
.filter(|n| n.has_tag_name(tag_name))
.collect::<Vec<Node>>();
if application_nodes.len() == 1 {
application_nodes[0].text().unwrap_or(default).to_string()
} else {
default.to_string()
}
}
pub(super) fn get_color(node: Node) -> Result<Option<String>, XlsxError> {
// 18.3.1.15 color (Data Bar Color)
if node.has_attribute("rgb") {
let mut val = node.attribute("rgb").unwrap().to_string();
// FIXME the two first values is normally the alpha.
if val.len() == 8 {
val = format!("#{}", &val[2..8]);
}
Ok(Some(val))
} else if node.has_attribute("indexed") {
let index = node.attribute("indexed").unwrap().parse::<i32>()?;
let rgb = get_indexed_color(index);
Ok(Some(rgb))
// Color::Indexed(val)
} else if node.has_attribute("theme") {
let theme = node.attribute("theme").unwrap().parse::<i32>()?;
let tint = match node.attribute("tint") {
Some(t) => t.parse::<f64>().unwrap_or(0.0),
None => 0.0,
};
let rgb = get_themed_color(theme, tint);
Ok(Some(rgb))
// Color::Theme { theme, tint }
} else if node.has_attribute("auto") {
// TODO: Is this correct?
// A boolean value indicating the color is automatic and system color dependent.
Ok(None)
} else {
println!("Unexpected color node {:?}", node);
Ok(None)
}
}
pub(super) fn get_bool(node: Node, s: &str) -> bool {
// defaults to true
!matches!(node.attribute(s), Some("0"))
}
pub(super) fn get_bool_false(node: Node, s: &str) -> bool {
// defaults to false
matches!(node.attribute(s), Some("1"))
}

View File

@@ -0,0 +1,79 @@
use std::io::Read;
use ironcalc_base::types::{DefinedName, SheetState};
use roxmltree::Node;
use crate::error::XlsxError;
use super::{
util::get_attribute,
worksheets::{Sheet, WorkbookXML},
};
pub(super) fn load_workbook<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
) -> Result<WorkbookXML, XlsxError> {
let mut file = archive.by_name("xl/workbook.xml")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let mut defined_names = Vec::new();
let mut sheets = Vec::new();
// Get the sheets
let sheet_nodes: Vec<Node> = doc
.descendants()
.filter(|n| n.has_tag_name("sheet"))
.collect();
for sheet in sheet_nodes {
let name = get_attribute(&sheet, "name")?.to_string();
let sheet_id = get_attribute(&sheet, "sheetId")?.to_string();
let sheet_id = sheet_id.parse::<u32>()?;
let id = get_attribute(
&sheet,
(
"http://schemas.openxmlformats.org/officeDocument/2006/relationships",
"id",
),
)?
.to_string();
let state = match sheet.attribute("state") {
Some("visible") | None => SheetState::Visible,
Some("hidden") => SheetState::Hidden,
Some("veryHidden") => SheetState::VeryHidden,
Some(state) => return Err(XlsxError::Xml(format!("Unknown sheet state: {}", state))),
};
sheets.push(Sheet {
name,
sheet_id,
id,
state,
});
}
// Get the defined names
let name_nodes: Vec<Node> = doc
.descendants()
.filter(|n| n.has_tag_name("definedName"))
.collect();
for node in name_nodes {
let name = get_attribute(&node, "name")?.to_string();
let formula = node.text().unwrap_or("").to_string();
// NOTE: In Excel the `localSheetId` is just the index of the worksheet and unrelated to the sheetId
let sheet_id = match node.attribute("localSheetId") {
Some(s) => {
let index = s.parse::<usize>()?;
Some(sheets[index].sheet_id)
}
None => None,
};
defined_names.push(DefinedName {
name,
formula,
sheet_id,
})
}
// read the relationships file
Ok(WorkbookXML {
worksheets: sheets,
defined_names,
})
}

View File

@@ -0,0 +1,925 @@
use std::{collections::HashMap, io::Read, num::ParseIntError};
use ironcalc_base::{
expressions::{
parser::{stringify::to_rc_format, Parser},
token::{get_error_by_english_name, Error},
types::CellReferenceRC,
utils::column_to_number,
},
types::{Cell, Col, Comment, DefinedName, Row, SheetData, SheetState, Table, Worksheet},
};
use roxmltree::Node;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::error::XlsxError;
use super::{
tables::load_table,
util::{get_attribute, get_color, get_number},
};
#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct Sheet {
pub(crate) name: String,
pub(crate) sheet_id: u32,
pub(crate) id: String,
pub(crate) state: SheetState,
}
#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct WorkbookXML {
pub(crate) worksheets: Vec<Sheet>,
pub(crate) defined_names: Vec<DefinedName>,
}
#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct Relationship {
pub(crate) target: String,
pub(crate) rel_type: String,
}
fn get_column_from_ref(s: &str) -> String {
let cs = s.chars();
let mut column = Vec::<char>::new();
for c in cs {
if !c.is_ascii_digit() {
column.push(c);
}
}
column.into_iter().collect()
}
fn load_dimension(ws: Node) -> String {
// <dimension ref="A1:O18"/>
let application_nodes = ws
.children()
.filter(|n| n.has_tag_name("dimension"))
.collect::<Vec<Node>>();
if application_nodes.len() == 1 {
application_nodes[0]
.attribute("ref")
.unwrap_or("A1")
.to_string()
} else {
"A1".to_string()
}
}
fn load_columns(ws: Node) -> Result<Vec<Col>, XlsxError> {
// cols
// <cols>
// <col min="5" max="5" width="38.26953125" customWidth="1"/>
// <col min="6" max="6" width="9.1796875" style="1"/>
// <col min="8" max="8" width="4" customWidth="1"/>
// </cols>
let mut cols = Vec::new();
let columns = ws
.children()
.filter(|n| n.has_tag_name("cols"))
.collect::<Vec<Node>>();
if columns.len() == 1 {
for col in columns[0].children() {
let min = get_attribute(&col, "min")?;
let min = min.parse::<i32>()?;
let max = get_attribute(&col, "max")?;
let max = max.parse::<i32>()?;
let width = get_attribute(&col, "width")?;
let width = width.parse::<f64>()?;
let custom_width = matches!(col.attribute("customWidth"), Some("1"));
let style = col
.attribute("style")
.map(|s| s.parse::<i32>().unwrap_or(0));
cols.push(Col {
min,
max,
width,
custom_width,
style,
})
}
}
Ok(cols)
}
fn load_merge_cells(ws: Node) -> Result<Vec<String>, XlsxError> {
// 18.3.1.55 Merge Cells
// <mergeCells count="1">
// <mergeCell ref="K7:L10"/>
// </mergeCells>
let mut merge_cells = Vec::new();
let merge_cells_nodes = ws
.children()
.filter(|n| n.has_tag_name("mergeCells"))
.collect::<Vec<Node>>();
if merge_cells_nodes.len() == 1 {
for merge_cell in merge_cells_nodes[0].children() {
let reference = get_attribute(&merge_cell, "ref")?.to_string();
merge_cells.push(reference);
}
}
Ok(merge_cells)
}
fn load_sheet_color(ws: Node) -> Result<Option<String>, XlsxError> {
// <sheetPr>
// <tabColor theme="5" tint="-0.249977111117893"/>
// </sheetPr>
let mut color = None;
let sheet_pr = ws
.children()
.filter(|n| n.has_tag_name("sheetPr"))
.collect::<Vec<Node>>();
if sheet_pr.len() == 1 {
let tabs = sheet_pr[0]
.children()
.filter(|n| n.has_tag_name("tabColor"))
.collect::<Vec<Node>>();
if tabs.len() == 1 {
color = get_color(tabs[0])?;
}
}
Ok(color)
}
fn load_comments<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
path: &str,
) -> Result<Vec<Comment>, XlsxError> {
let mut comments = Vec::new();
let mut file = archive.by_name(path)?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let ws = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
let comment_list = ws
.children()
.filter(|n| n.has_tag_name("commentList"))
.collect::<Vec<Node>>();
if comment_list.len() == 1 {
for comment in comment_list[0].children() {
let text = comment
.descendants()
.filter(|n| n.has_tag_name("t"))
.map(|n| n.text().unwrap().to_string())
.collect::<Vec<String>>()
.join("");
let cell_ref = get_attribute(&comment, "ref")?.to_string();
// TODO: Read author_name from the list of authors
let author_name = "".to_string();
comments.push(Comment {
text,
author_name,
author_id: None,
cell_ref,
});
}
}
Ok(comments)
}
#[derive(Error, Debug, PartialEq, Eq)]
enum ParseReferenceError {
#[error("RowError: {0}")]
RowError(ParseIntError),
#[error("ColumnError: {0}")]
ColumnError(String),
}
// This parses Sheet1!AS23 into sheet, column and row
// FIXME: This is buggy. Does not check that is a valid sheet name
// There is a similar named function in ironcalc_base. We probably should fix both at the same time.
// NB: Maybe use regexes for this?
fn parse_reference(s: &str) -> Result<CellReferenceRC, ParseReferenceError> {
let bytes = s.as_bytes();
let mut sheet_name = "".to_string();
let mut column = "".to_string();
let mut row = "".to_string();
let mut state = "sheet"; // "sheet", "col", "row"
for &byte in bytes {
match state {
"sheet" => {
if byte == b'!' {
state = "col"
} else {
sheet_name.push(byte as char);
}
}
"col" => {
if byte.is_ascii_alphabetic() {
column.push(byte as char);
} else {
state = "row";
row.push(byte as char);
}
}
_ => {
row.push(byte as char);
}
}
}
Ok(CellReferenceRC {
sheet: sheet_name,
row: row.parse::<i32>().map_err(ParseReferenceError::RowError)?,
column: column_to_number(&column).map_err(ParseReferenceError::ColumnError)?,
})
}
fn from_a1_to_rc(
formula: String,
worksheets: &[String],
context: String,
tables: HashMap<String, Table>,
) -> Result<String, XlsxError> {
let mut parser = Parser::new(worksheets.to_owned(), tables);
let cell_reference =
parse_reference(&context).map_err(|error| XlsxError::Xml(error.to_string()))?;
let t = parser.parse(&formula, &Some(cell_reference));
Ok(to_rc_format(&t))
}
fn get_formula_index(formula: &str, shared_formulas: &[String]) -> Option<i32> {
for (index, f) in shared_formulas.iter().enumerate() {
if f == formula {
return Some(index as i32);
}
}
None
}
// FIXME
#[allow(clippy::too_many_arguments)]
fn get_cell_from_excel(
cell_value: Option<&str>,
value_metadata: Option<&str>,
cell_type: &str,
cell_style: i32,
formula_index: i32,
sheet_name: &str,
cell_ref: &str,
shared_strings: &mut Vec<String>,
) -> Cell {
// Possible cell types:
// 18.18.11 ST_CellType (Cell Type)
// b (Boolean)
// d (Date)
// e (Error)
// inlineStr (Inline String)
// n (Number)
// s (Shared String)
// str (String)
if formula_index == -1 {
match cell_type {
"b" => Cell::BooleanCell {
v: cell_value == Some("1"),
s: cell_style,
},
"n" => Cell::NumberCell {
v: cell_value.unwrap_or("0").parse::<f64>().unwrap_or(0.0),
s: cell_style,
},
"e" => {
// For compatibility reasons Excel does not put the value #SPILL! but adds it as a metadata
// Older engines would just import #VALUE!
let mut error_name = cell_value.unwrap_or("#ERROR!");
if error_name == "#VALUE!" && value_metadata.is_some() {
error_name = match value_metadata {
Some("1") => "#CALC!",
Some("2") => "#SPILL!",
_ => error_name,
}
}
Cell::ErrorCell {
ei: get_error_by_english_name(error_name).unwrap_or(Error::ERROR),
s: cell_style,
}
}
"s" => Cell::SharedString {
si: cell_value.unwrap_or("0").parse::<i32>().unwrap_or(0),
s: cell_style,
},
"str" => {
let s = cell_value.unwrap_or("");
let si = if let Some(i) = shared_strings.iter().position(|r| r == s) {
i
} else {
shared_strings.push(s.to_string());
shared_strings.len() - 1
} as i32;
Cell::SharedString { si, s: cell_style }
}
"d" => {
// Not implemented
println!("Invalid type (d) in {}!{}", sheet_name, cell_ref);
Cell::ErrorCell {
ei: Error::NIMPL,
s: cell_style,
}
}
"inlineStr" => {
// Not implemented
println!("Invalid type (inlineStr) in {}!{}", sheet_name, cell_ref);
Cell::ErrorCell {
ei: Error::NIMPL,
s: cell_style,
}
}
"empty" => Cell::EmptyCell { s: cell_style },
_ => {
// error
println!(
"Unexpected type ({}) in {}!{}",
cell_type, sheet_name, cell_ref
);
Cell::ErrorCell {
ei: Error::ERROR,
s: cell_style,
}
}
}
} else {
match cell_type {
"b" => Cell::CellFormulaBoolean {
f: formula_index,
v: cell_value == Some("1"),
s: cell_style,
},
"n" => Cell::CellFormulaNumber {
f: formula_index,
v: cell_value.unwrap_or("0").parse::<f64>().unwrap_or(0.0),
s: cell_style,
},
"e" => {
// For compatibility reasons Excel does not put the value #SPILL! but adds it as a metadata
// Older engines would just import #VALUE!
let mut error_name = cell_value.unwrap_or("#ERROR!");
if error_name == "#VALUE!" && value_metadata.is_some() {
error_name = match value_metadata {
Some("1") => "#CALC!",
Some("2") => "#SPILL!",
_ => error_name,
}
}
Cell::CellFormulaError {
f: formula_index,
ei: get_error_by_english_name(error_name).unwrap_or(Error::ERROR),
s: cell_style,
o: format!("{}!{}", sheet_name, cell_ref),
m: cell_value.unwrap_or("#ERROR!").to_string(),
}
}
"s" => {
// Not implemented
let o = format!("{}!{}", sheet_name, cell_ref);
let m = Error::NIMPL.to_string();
println!("Invalid type (s) in {}!{}", sheet_name, cell_ref);
Cell::CellFormulaError {
f: formula_index,
ei: Error::NIMPL,
s: cell_style,
o,
m,
}
}
"str" => {
// In Excel and in IronCalc all strings in cells result of a formula are *not* shared strings.
Cell::CellFormulaString {
f: formula_index,
v: cell_value.unwrap_or("").to_string(),
s: cell_style,
}
}
"d" => {
// Not implemented
println!("Invalid type (d) in {}!{}", sheet_name, cell_ref);
let o = format!("{}!{}", sheet_name, cell_ref);
let m = Error::NIMPL.to_string();
Cell::CellFormulaError {
f: formula_index,
ei: Error::NIMPL,
s: cell_style,
o,
m,
}
}
"inlineStr" => {
// Not implemented
let o = format!("{}!{}", sheet_name, cell_ref);
let m = Error::NIMPL.to_string();
println!("Invalid type (inlineStr) in {}!{}", sheet_name, cell_ref);
Cell::CellFormulaError {
f: formula_index,
ei: Error::NIMPL,
s: cell_style,
o,
m,
}
}
_ => {
// error
println!(
"Unexpected type ({}) in {}!{}",
cell_type, sheet_name, cell_ref
);
let o = format!("{}!{}", sheet_name, cell_ref);
let m = Error::ERROR.to_string();
Cell::CellFormulaError {
f: formula_index,
ei: Error::ERROR,
s: cell_style,
o,
m,
}
}
}
}
}
fn load_sheet_rels<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
path: &str,
tables: &mut HashMap<String, Table>,
sheet_name: &str,
) -> Result<Vec<Comment>, XlsxError> {
// ...xl/worksheets/sheet6.xml -> xl/worksheets/_rels/sheet6.xml.rels
let mut comments = Vec::new();
let v: Vec<&str> = path.split("/worksheets/").collect();
let mut path = v[0].to_string();
path.push_str("/worksheets/_rels/");
path.push_str(v[1]);
path.push_str(".rels");
let file = archive.by_name(&path);
if file.is_err() {
return Ok(comments);
}
let mut text = String::new();
file.unwrap().read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let rels = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?
.children()
.collect::<Vec<Node>>();
for rel in rels {
let t = get_attribute(&rel, "Type")?.to_string();
if t.ends_with("comments") {
let mut target = get_attribute(&rel, "Target")?.to_string();
// Target="../comments1.xlsx"
target.replace_range(..2, v[0]);
comments = load_comments(archive, &target)?;
} else if t.ends_with("table") {
let mut target = get_attribute(&rel, "Target")?.to_string();
let path = if let Some(p) = target.strip_prefix('/') {
p.to_string()
} else {
// Target="../table1.xlsx"
target.replace_range(..2, v[0]);
target
};
let table = load_table(archive, &path, sheet_name)?;
tables.insert(table.name.clone(), table);
}
}
Ok(comments)
}
fn get_frozen_rows_and_columns(ws: Node) -> (i32, i32) {
// <sheetViews>
// <sheetView workbookViewId="0">
// <selection activeCell="E10" sqref="E10"/>
// </sheetView>
// </sheetViews>
// <sheetFormatPr defaultRowHeight="14.5" x14ac:dyDescent="0.35"/>
// If we have frozen rows and columns:
// <sheetView tabSelected="1" workbookViewId="0">
// <pane xSplit="3" ySplit="2" topLeftCell="D3" activePane="bottomRight" state="frozen"/>
// <selection pane="topRight" activeCell="D1" sqref="D1"/>
// <selection pane="bottomLeft" activeCell="A3" sqref="A3"/>
// <selection pane="bottomRight" activeCell="K16" sqref="K16"/>
// </sheetView>
// 18.18.52 ST_Pane (Pane Types)
// bottomLeft, bottomRight, topLeft, topRight
// NB: bottomLeft is used when only rows are frozen, etc
// Calc ignores all those.
let mut frozen_rows = 0;
let mut frozen_columns = 0;
// In Calc there can only be one sheetView
let sheet_views = ws
.children()
.filter(|n| n.has_tag_name("sheetViews"))
.collect::<Vec<Node>>();
if sheet_views.len() != 1 {
return (0, 0);
}
let sheet_view = sheet_views[0]
.children()
.filter(|n| n.has_tag_name("sheetView"))
.collect::<Vec<Node>>();
if sheet_view.len() != 1 {
return (0, 0);
}
let pane = sheet_view[0]
.children()
.filter(|n| n.has_tag_name("pane"))
.collect::<Vec<Node>>();
// 18.18.53 ST_PaneState (Pane State)
// frozen, frozenSplit, split
if pane.len() == 1 && pane[0].attribute("state").unwrap_or("split") == "frozen" {
// TODO: Should we assert that topLeft is consistent?
// let top_left_cell = pane[0].attribute("topLeftCell").unwrap_or("A1").to_string();
frozen_columns = get_number(pane[0], "xSplit");
frozen_rows = get_number(pane[0], "ySplit");
}
(frozen_rows, frozen_columns)
}
pub(super) struct SheetSettings {
pub id: u32,
pub name: String,
pub state: SheetState,
pub comments: Vec<Comment>,
}
pub(super) fn load_sheet<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
path: &str,
settings: SheetSettings,
worksheets: &[String],
tables: &HashMap<String, Table>,
shared_strings: &mut Vec<String>,
) -> Result<Worksheet, XlsxError> {
let sheet_name = &settings.name;
let sheet_id = settings.id;
let state = &settings.state;
let mut file = archive.by_name(path)?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let doc = roxmltree::Document::parse(&text)?;
let ws = doc
.root()
.first_child()
.ok_or_else(|| XlsxError::Xml("Corrupt XML structure".to_string()))?;
let mut shared_formulas = Vec::new();
let dimension = load_dimension(ws);
let (frozen_rows, frozen_columns) = get_frozen_rows_and_columns(ws);
let cols = load_columns(ws)?;
let color = load_sheet_color(ws)?;
// sheetData
// <row r="1" spans="1:15" x14ac:dyDescent="0.35">
// <c r="A1" t="s">
// <v>0</v>
// </c>
// <c r="D1">
// <f>C1+1</f>
// </c>
// </row>
// holds the row heights
let mut rows = Vec::new();
let mut sheet_data = SheetData::new();
let sheet_data_nodes = ws
.children()
.filter(|n| n.has_tag_name("sheetData"))
.collect::<Vec<Node>>()[0];
let default_row_height = 14.5;
// holds a map from the formula index in Excel to the index in IronCalc
let mut index_map = HashMap::new();
for row in sheet_data_nodes.children() {
// This is the row number 1-indexed
let row_index = get_attribute(&row, "r")?.parse::<i32>()?;
// `spans` is not used in IronCalc at the moment (it's an optimization)
// let spans = row.attribute("spans");
// This is the height of the row
let has_height_attribute;
let height = match row.attribute("ht") {
Some(s) => {
has_height_attribute = true;
s.parse::<f64>().unwrap_or(default_row_height)
}
None => {
has_height_attribute = false;
default_row_height
}
};
let custom_height = matches!(row.attribute("customHeight"), Some("1"));
// The height of the row is always the visible height of the row
// If custom_height is false that means the height was calculated automatically:
// for example because a cell has many lines or a larger font
let row_style = match row.attribute("s") {
Some(s) => s.parse::<i32>().unwrap_or(0),
None => 0,
};
let custom_format = matches!(row.attribute("customFormat"), Some("1"));
let hidden = matches!(row.attribute("hidden"), Some("1"));
if custom_height || custom_format || row_style != 0 || has_height_attribute || hidden {
rows.push(Row {
r: row_index,
height,
s: row_style,
custom_height,
custom_format,
hidden,
});
}
// Unused attributes:
// * thickBot, thickTop, ph, collapsed, outlineLevel
let mut data_row = HashMap::new();
// 18.3.1.4 c (Cell)
// Child Elements:
// * v: Cell value
// * is: Rich Text Inline (not used in IronCalc)
// * f: Formula
// Attributes:
// r: reference. A1 style
// s: style index
// t: cell type
// Unused attributes
// cm (cell metadata), ph (Show Phonetic), vm (value metadata)
for cell in row.children() {
let cell_ref = get_attribute(&cell, "r")?;
let column_letter = get_column_from_ref(cell_ref);
let column = column_to_number(column_letter.as_str()).map_err(XlsxError::Xml)?;
let value_metadata = cell.attribute("vm");
// We check the value "v" child.
let vs: Vec<Node> = cell.children().filter(|n| n.has_tag_name("v")).collect();
let cell_value = if vs.len() == 1 {
Some(vs[0].text().unwrap_or(""))
} else {
None
};
// type, the default type being "n" for number
// If the cell does not have a value is an empty cell
let cell_type = match cell.attribute("t") {
Some(t) => t,
None => {
if cell_value.is_none() {
"empty"
} else {
"n"
}
}
};
// style index, the default style is 0
let cell_style = match cell.attribute("s") {
Some(s) => s.parse::<i32>().unwrap_or(0),
None => 0,
};
// Check for formula
// In Excel some formulas are shared and some are not, but in IronCalc all formulas are shared
// A cell with a "non-shared" formula is like:
// <c r="E3">
// <f>C2+1</f>
// <v>3</v>
// </c>
// A cell with a shared formula will be either a "mother" cell:
// <c r="D2">
// <f t="shared" ref="D2:D3" si="0">C2+1</f>
// <v>3</v>
// </c>
// Or a "daughter" cell:
// <c r="D3">
// <f t="shared" si="0"/>
// <v>4</v>
// </c>
// In IronCalc two cells have the same formula iff the R1C1 representation is the same
// TODO: This algorithm could end up with "repeated" shared formulas
// We could solve that with a second transversal.
let fs: Vec<Node> = cell.children().filter(|n| n.has_tag_name("f")).collect();
let mut formula_index = -1;
if fs.len() == 1 {
// formula types:
// 18.18.6 ST_CellFormulaType (Formula Type)
// array (Array Formula) Formula is an array formula.
// dataTable (Table Formula) Formula is a data table formula.
// normal (Normal) Formula is a regular cell formula. (Default)
// shared (Shared Formula) Formula is part of a shared formula.
let formula_type = fs[0].attribute("t").unwrap_or("normal");
match formula_type {
"shared" => {
// We have a shared formula
let si = get_attribute(&fs[0], "si")?;
let si = si.parse::<i32>()?;
match fs[0].attribute("ref") {
Some(_) => {
// It's the mother cell. We do not use the ref attribute in IronCalc
let formula = fs[0].text().unwrap_or("").to_string();
let context = format!("{}!{}", sheet_name, cell_ref);
let formula =
from_a1_to_rc(formula, worksheets, context, tables.clone())?;
match index_map.get(&si) {
Some(index) => {
// The index for that formula already exists meaning we bumped into a daughter cell first
// TODO: Worth assert the content is a placeholder?
formula_index = *index;
shared_formulas.insert(formula_index as usize, formula);
}
None => {
// We haven't met any of the daughter cells
match get_formula_index(&formula, &shared_formulas) {
// The formula is already present, use that index
Some(index) => {
formula_index = index;
}
None => {
shared_formulas.push(formula);
formula_index = shared_formulas.len() as i32 - 1;
}
};
index_map.insert(si, formula_index);
}
}
}
None => {
// It's a daughter cell
match index_map.get(&si) {
Some(index) => {
formula_index = *index;
}
None => {
// Haven't bumped into the mother cell yet. We insert a placeholder.
// Note that it is perfectly possible that the formula of the mother cell
// is already in the set of array formulas. This will lead to the above mention duplicity.
// This is not a problem
let placeholder = "".to_string();
shared_formulas.push(placeholder);
formula_index = shared_formulas.len() as i32 - 1;
index_map.insert(si, formula_index);
}
}
}
}
}
"array" => {
return Err(XlsxError::NotImplemented("array formulas".to_string()));
}
"dataTable" => {
return Err(XlsxError::NotImplemented("data table formulas".to_string()));
}
"normal" => {
// Its a cell with a simple formula
let formula = fs[0].text().unwrap_or("").to_string();
let context = format!("{}!{}", sheet_name, cell_ref);
let formula = from_a1_to_rc(formula, worksheets, context, tables.clone())?;
match get_formula_index(&formula, &shared_formulas) {
Some(index) => formula_index = index,
None => {
shared_formulas.push(formula);
formula_index = shared_formulas.len() as i32 - 1;
}
}
}
_ => {
return Err(XlsxError::Xml(format!(
"Invalid formula type {:?}.",
formula_type,
)));
}
}
}
let cell = get_cell_from_excel(
cell_value,
value_metadata,
cell_type,
cell_style,
formula_index,
sheet_name,
cell_ref,
shared_strings,
);
data_row.insert(column, cell);
}
sheet_data.insert(row_index, data_row);
}
let merge_cells = load_merge_cells(ws)?;
// Conditional Formatting
// <conditionalFormatting sqref="B1:B9">
// <cfRule type="colorScale" priority="1">
// <colorScale>
// <cfvo type="min"/>
// <cfvo type="max"/>
// <color rgb="FFF8696B"/>
// <color rgb="FFFCFCFF"/>
// </colorScale>
// </cfRule>
// </conditionalFormatting>
// pageSetup
// <pageSetup orientation="portrait" r:id="rId1"/>
Ok(Worksheet {
dimension,
cols,
rows,
shared_formulas,
sheet_data,
name: sheet_name.to_string(),
sheet_id,
state: state.to_owned(),
color,
merge_cells,
comments: settings.comments,
frozen_rows,
frozen_columns,
})
}
pub(super) fn load_sheets<R: Read + std::io::Seek>(
archive: &mut zip::read::ZipArchive<R>,
rels: &HashMap<String, Relationship>,
workbook: &WorkbookXML,
tables: &mut HashMap<String, Table>,
shared_strings: &mut Vec<String>,
) -> Result<Vec<Worksheet>, XlsxError> {
// load comments and tables
let mut comments = HashMap::new();
for sheet in &workbook.worksheets {
let rel = &rels[&sheet.id];
if rel.rel_type.ends_with("worksheet") {
let path = &rel.target;
let path = if let Some(p) = path.strip_prefix('/') {
p.to_string()
} else {
format!("xl/{path}")
};
comments.insert(
&sheet.id,
load_sheet_rels(archive, &path, tables, &sheet.name)?,
);
}
}
// load all sheets
let worksheets: &Vec<String> = &workbook.worksheets.iter().map(|s| s.name.clone()).collect();
let mut sheets = Vec::new();
for sheet in &workbook.worksheets {
let sheet_name = &sheet.name;
let rel_id = &sheet.id;
let state = &sheet.state;
let rel = &rels[rel_id];
if rel.rel_type.ends_with("worksheet") {
let path = &rel.target;
let path = if let Some(p) = path.strip_prefix('/') {
p.to_string()
} else {
format!("xl/{path}")
};
let settings = SheetSettings {
name: sheet_name.to_string(),
id: sheet.sheet_id,
state: state.clone(),
comments: comments.get(rel_id).expect("").to_vec(),
};
sheets.push(load_sheet(
archive,
&path,
settings,
worksheets,
tables,
shared_strings,
)?);
}
}
Ok(sheets)
}