From 196e074ef52984692fbede953d62252e43a76271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Hatcher=20Andr=C3=A9s?= Date: Thu, 11 Apr 2024 23:45:00 +0200 Subject: [PATCH] UPDATE: Serialize/Deserialize with bitcode (#31) * UPDATE: Serialize/Deserialize with bitcode Fixes #12 --- Cargo.lock | 29 ++++++++++ base/Cargo.toml | 1 + base/src/expressions/token.rs | 3 +- base/src/model.rs | 26 ++++----- base/src/test/user_model/test_general.rs | 21 +++++++ .../src/test/user_model/test_to_from_bytes.rs | 2 +- base/src/types.rs | 57 ++++++++++--------- base/src/user_model.rs | 22 ++++--- xlsx/Cargo.toml | 1 + xlsx/src/bin/xlsx_2_icalc.rs | 26 +++++++++ xlsx/tests/test.rs | 2 +- 11 files changed, 139 insertions(+), 51 deletions(-) create mode 100644 xlsx/src/bin/xlsx_2_icalc.rs diff --git a/Cargo.lock b/Cargo.lock index 68dfac0..1f73b63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,27 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bitcode" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48bc1c27654127a24c476d40198746860ef56475f41a601bfa5c4d0f832968f0" +dependencies = [ + "bitcode_derive", + "bytemuck", +] + +[[package]] +name = "bitcode_derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2966755a19aad59ee2aae91e2d48842c667a99d818ec72168efdab07200701cc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -70,6 +91,12 @@ version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" + [[package]] name = "byteorder" version = "1.5.0" @@ -317,6 +344,7 @@ dependencies = [ name = "ironcalc" version = "0.1.3" dependencies = [ + "bitcode", "chrono", "ironcalc_base", "itertools", @@ -332,6 +360,7 @@ dependencies = [ name = "ironcalc_base" version = "0.1.3" dependencies = [ + "bitcode", "chrono", "chrono-tz", "js-sys", diff --git a/base/Cargo.toml b/base/Cargo.toml index aa19109..629293a 100644 --- a/base/Cargo.toml +++ b/base/Cargo.toml @@ -19,6 +19,7 @@ chrono = "0.4" chrono-tz = "0.9" regex = "1.0" once_cell = "1.16.0" +bitcode = "0.6.0" [target.'cfg(target_arch = "wasm32")'.dependencies] js-sys = { version = "0.3.69" } diff --git a/base/src/expressions/token.rs b/base/src/expressions/token.rs index ef6abe6..b4e45ac 100644 --- a/base/src/expressions/token.rs +++ b/base/src/expressions/token.rs @@ -1,5 +1,6 @@ use std::fmt; +use bitcode::{Decode, Encode}; use serde::{Deserialize, Serialize}; use serde_repr::{Deserialize_repr, Serialize_repr}; @@ -80,7 +81,7 @@ impl fmt::Display for OpProduct { /// * "#ERROR!" means there was an error processing the formula (for instance "=A1+") /// * "#N/IMPL!" means the formula or feature in Excel but has not been implemented in IronCalc /// Note that they are serialized/deserialized by index -#[derive(Serialize_repr, Deserialize_repr, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize_repr, Deserialize_repr, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[repr(u8)] pub enum Error { REF, diff --git a/base/src/model.rs b/base/src/model.rs index c44e3c1..eb0f8a2 100644 --- a/base/src/model.rs +++ b/base/src/model.rs @@ -1,7 +1,5 @@ #![deny(missing_docs)] -use serde_json::json; - use std::collections::HashMap; use std::vec::Vec; @@ -810,7 +808,7 @@ impl Model { /// # fn main() -> Result<(), Box> { /// let mut model = Model::new_empty("model", "en", "UTC")?; /// model.set_user_input(0, 1, 1, "Stella!".to_string()); - /// let model2 = Model::from_json(&model.to_json_str())?; + /// let model2 = Model::from_bytes(&model.to_bytes())?; /// assert_eq!( /// model2.get_cell_value_by_index(0, 1, 1), /// Ok(CellValue::String("Stella!".to_string())) @@ -818,9 +816,9 @@ impl Model { /// # Ok(()) /// # } /// ``` - pub fn from_json(s: &str) -> Result { + pub fn from_bytes(s: &[u8]) -> Result { let workbook: Workbook = - serde_json::from_str(s).map_err(|_| "Error parsing workbook".to_string())?; + bitcode::decode(s).map_err(|_| "Error parsing workbook".to_string())?; Model::from_workbook(workbook) } @@ -1596,6 +1594,14 @@ impl Model { } } + /// Return the typeof a cell + pub fn get_cell_type(&self, sheet: u32, row: i32, column: i32) -> Result { + Ok(match self.workbook.worksheet(sheet)?.cell(row, column) { + Some(c) => c.get_type(), + None => CellType::Number, + }) + } + /// Returns a string with the cell content. If there is a formula returns the formula /// If the cell is empty returns the empty string /// Raises an error if there is no worksheet @@ -1755,14 +1761,8 @@ impl Model { } /// Returns a JSON string of the workbook - pub fn to_json_str(&self) -> String { - match serde_json::to_string(&self.workbook) { - Ok(s) => s, - Err(_) => { - // TODO, is this branch possible at all? - json!({"error": "Error stringifying workbook"}).to_string() - } - } + pub fn to_bytes(&self) -> Vec { + bitcode::encode(&self.workbook) } /// Returns data about the worksheets diff --git a/base/src/test/user_model/test_general.rs b/base/src/test/user_model/test_general.rs index 7f562e7..d284692 100644 --- a/base/src/test/user_model/test_general.rs +++ b/base/src/test/user_model/test_general.rs @@ -2,6 +2,7 @@ use crate::constants::{LAST_COLUMN, LAST_ROW}; use crate::test::util::new_empty_model; +use crate::types::CellType; use crate::UserModel; #[test] @@ -29,6 +30,26 @@ fn user_model_debug_message() { assert_eq!(s, "UserModel"); } +#[test] +fn cell_type() { + let mut model = UserModel::new_empty("model", "en", "UTC").unwrap(); + model.set_user_input(0, 1, 1, "1").unwrap(); + model.set_user_input(0, 1, 2, "Wish you were here").unwrap(); + model.set_user_input(0, 1, 3, "true").unwrap(); + model.set_user_input(0, 1, 4, "=1/0").unwrap(); + + assert_eq!(model.get_cell_type(0, 1, 1).unwrap(), CellType::Number); + assert_eq!(model.get_cell_type(0, 1, 2).unwrap(), CellType::Text); + assert_eq!( + model.get_cell_type(0, 1, 3).unwrap(), + CellType::LogicalValue + ); + assert_eq!(model.get_cell_type(0, 1, 4).unwrap(), CellType::ErrorValue); + + // empty cells are number type + assert_eq!(model.get_cell_type(0, 40, 40).unwrap(), CellType::Number); +} + #[test] fn insert_remove_rows() { let model = new_empty_model(); diff --git a/base/src/test/user_model/test_to_from_bytes.rs b/base/src/test/user_model/test_to_from_bytes.rs index a973945..637eb15 100644 --- a/base/src/test/user_model/test_to_from_bytes.rs +++ b/base/src/test/user_model/test_to_from_bytes.rs @@ -22,7 +22,7 @@ fn basic() { #[test] fn errors() { - let model_bytes = "Early in the morning, late in the century, Cricklewood Broadway."; + let model_bytes = "Early in the morning, late in the century, Cricklewood Broadway.".as_bytes(); assert_eq!( &UserModel::from_bytes(model_bytes).unwrap_err(), "Error parsing workbook" diff --git a/base/src/types.rs b/base/src/types.rs index c6069f0..54e73da 100644 --- a/base/src/types.rs +++ b/base/src/types.rs @@ -1,3 +1,4 @@ +use bitcode::{Decode, Encode}; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, fmt::Display}; @@ -33,7 +34,7 @@ fn hashmap_is_empty(h: &HashMap) -> bool { h.values().len() == 0 } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Metadata { pub application: String, pub app_version: String, @@ -43,13 +44,13 @@ pub struct Metadata { pub last_modified: String, //"2020-11-20T16:24:35" } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct WorkbookSettings { pub tz: String, pub locale: String, } /// An internal representation of an IronCalc Workbook -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Clone)] #[serde(deny_unknown_fields)] pub struct Workbook { pub shared_strings: Vec, @@ -65,7 +66,7 @@ pub struct Workbook { } /// A defined name. The `sheet_id` is the sheet index in case the name is local -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct DefinedName { pub name: String, pub formula: String, @@ -79,7 +80,7 @@ pub struct DefinedName { /// * state: /// 18.18.68 ST_SheetState (Sheet Visibility Types) /// hidden, veryHidden, visible -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[serde(rename_all = "lowercase")] pub enum SheetState { Visible, @@ -98,7 +99,7 @@ impl Display for SheetState { } /// Internal representation of a worksheet Excel object -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Clone)] pub struct Worksheet { pub dimension: String, pub cols: Vec, @@ -125,7 +126,7 @@ pub struct Worksheet { pub type SheetData = HashMap>; // ECMA-376-1:2016 section 18.3.1.73 -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Clone)] pub struct Row { /// Row index pub r: i32, @@ -139,7 +140,7 @@ pub struct Row { } // ECMA-376-1:2016 section 18.3.1.13 -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Clone)] pub struct Col { // Column definitions are defined on ranges, unlike rows which store unique, per-row entries. /// First column affected by this record. Settings apply to column in \[min, max\] range. @@ -164,7 +165,7 @@ pub enum CellType { CompoundData = 128, } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, Clone, PartialEq)] #[serde(tag = "t", deny_unknown_fields)] pub enum Cell { #[serde(rename = "empty")] @@ -208,7 +209,7 @@ impl Default for Cell { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Comment { pub text: String, pub author_name: String, @@ -218,7 +219,7 @@ pub struct Comment { } // ECMA-376-1:2016 section 18.5.1.2 -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Table { pub name: String, pub display_name: String, @@ -241,7 +242,7 @@ pub struct Table { // totals_row_label vs totals_row_function might be mutually exclusive. Use an enum? // the totals_row_function is an enum not String methinks -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct TableColumn { pub id: u32, pub name: String, @@ -271,7 +272,7 @@ impl Default for TableColumn { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone, Default)] pub struct TableStyleInfo { #[serde(skip_serializing_if = "Option::is_none")] pub name: Option, @@ -289,7 +290,7 @@ pub struct TableStyleInfo { pub show_column_stripes: bool, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Styles { pub num_fmts: Vec, pub fonts: Vec, @@ -314,7 +315,7 @@ impl Default for Styles { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Style { #[serde(skip_serializing_if = "Option::is_none")] pub alignment: Option, @@ -325,7 +326,7 @@ pub struct Style { pub quote_prefix: bool, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct NumFmt { pub num_fmt_id: i32, pub format_code: String, @@ -343,7 +344,7 @@ impl Default for NumFmt { // ST_FontScheme simple type (ยง18.18.33). // Usually major fonts are used for styles like headings, // and minor fonts are used for body and paragraph text. -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[serde(rename_all = "lowercase")] #[derive(Default)] pub enum FontScheme { @@ -363,7 +364,7 @@ impl Display for FontScheme { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Font { #[serde(default = "default_as_false")] #[serde(skip_serializing_if = "is_false")] @@ -406,7 +407,7 @@ impl Default for Font { } // TODO: Maybe use an enum for the pattern_type values here? -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct Fill { pub pattern_type: String, #[serde(skip_serializing_if = "Option::is_none")] @@ -425,7 +426,7 @@ impl Default for Fill { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[serde(rename_all = "lowercase")] pub enum HorizontalAlignment { Center, @@ -467,7 +468,7 @@ impl Display for HorizontalAlignment { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[serde(rename_all = "lowercase")] pub enum VerticalAlignment { Bottom, @@ -502,7 +503,7 @@ impl Display for VerticalAlignment { } // 1762 -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone, Default)] pub struct Alignment { #[serde(default)] #[serde(skip_serializing_if = "HorizontalAlignment::is_default")] @@ -515,7 +516,7 @@ pub struct Alignment { pub wrap_text: bool, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct CellStyleXfs { pub num_fmt_id: i32, pub font_id: i32, @@ -558,7 +559,7 @@ impl Default for CellStyleXfs { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone, Default)] pub struct CellXfs { pub xf_id: i32, pub num_fmt_id: i32, @@ -590,7 +591,7 @@ pub struct CellXfs { pub alignment: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct CellStyles { pub name: String, pub xf_id: i32, @@ -607,7 +608,7 @@ impl Default for CellStyles { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] #[serde(rename_all = "lowercase")] pub enum BorderStyle { Thin, @@ -637,13 +638,13 @@ impl Display for BorderStyle { } } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone)] pub struct BorderItem { pub style: BorderStyle, pub color: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Default)] +#[derive(Serialize, Deserialize, Encode, Decode, Debug, PartialEq, Eq, Clone, Default)] pub struct Border { #[serde(default = "default_as_false")] #[serde(skip_serializing_if = "is_false")] diff --git a/base/src/user_model.rs b/base/src/user_model.rs index 4cfc9a5..3bc29e8 100644 --- a/base/src/user_model.rs +++ b/base/src/user_model.rs @@ -12,8 +12,8 @@ use crate::{ }, model::Model, types::{ - Alignment, BorderItem, BorderStyle, Cell, Col, HorizontalAlignment, Row, SheetProperties, - Style, VerticalAlignment, + Alignment, BorderItem, BorderStyle, Cell, CellType, Col, HorizontalAlignment, Row, + SheetProperties, Style, VerticalAlignment, }, utils::is_valid_hex_color, }; @@ -315,9 +315,9 @@ impl UserModel { /// Creates a model from it's internal representation /// /// See also: - /// * [Model::from_json] - pub fn from_bytes(s: &str) -> Result { - let model = Model::from_json(s)?; + /// * [Model::from_bytes] + pub fn from_bytes(s: &[u8]) -> Result { + let model = Model::from_bytes(s)?; Ok(UserModel { model, history: History::default(), @@ -330,8 +330,8 @@ impl UserModel { /// /// See also: /// * [Model::to_json_str] - pub fn to_bytes(&self) -> String { - self.model.to_json_str() + pub fn to_bytes(&self) -> Vec { + self.model.to_bytes() } /// Undoes last change if any, places the change in the redo list and evaluates the model if needed @@ -498,6 +498,14 @@ impl UserModel { self.model.get_formatted_cell_value(sheet, row, column) } + /// Returns the type of the cell + /// + /// See also + /// * [Model::get_cell_type] + pub fn get_cell_type(&self, sheet: u32, row: i32, column: i32) -> Result { + self.model.get_cell_type(sheet, row, column) + } + /// Adds new sheet /// /// See also: diff --git a/xlsx/Cargo.toml b/xlsx/Cargo.toml index 4a4569e..d357208 100644 --- a/xlsx/Cargo.toml +++ b/xlsx/Cargo.toml @@ -23,6 +23,7 @@ thiserror = "1.0" ironcalc_base = { path = "../base", version = "0.1.0" } itertools = "0.12" chrono = "0.4" +bitcode = "0.6.0" [dev-dependencies] uuid = { version = "1.2.2", features = ["serde", "v4"] } diff --git a/xlsx/src/bin/xlsx_2_icalc.rs b/xlsx/src/bin/xlsx_2_icalc.rs new file mode 100644 index 0000000..7368aba --- /dev/null +++ b/xlsx/src/bin/xlsx_2_icalc.rs @@ -0,0 +1,26 @@ +//! Tests an Excel xlsx file. +//! Returns a list of differences in json format. +//! Saves an IronCalc version +//! This is primary for QA internal testing and will be superseded by an official +//! IronCalc CLI. +//! +//! Usage: test file.xlsx + +use std::path; + +use ironcalc::{export::save_to_json, import::load_model_from_xlsx}; + +fn main() { + let args: Vec<_> = std::env::args().collect(); + if args.len() != 2 { + panic!("Usage: {} ", args[0]); + } + // first test the file + let file_name = &args[1]; + + let file_path = path::Path::new(file_name); + let base_name = file_path.file_stem().unwrap().to_str().unwrap(); + let output_file_name = &format!("{base_name}.ic"); + let model = load_model_from_xlsx(file_name, "en", "UTC").unwrap(); + save_to_json(model.workbook, output_file_name); +} diff --git a/xlsx/tests/test.rs b/xlsx/tests/test.rs index c23151a..358f6ff 100644 --- a/xlsx/tests/test.rs +++ b/xlsx/tests/test.rs @@ -161,7 +161,7 @@ fn test_simple_text() { fn test_defined_names_casing() { let test_file_path = "tests/calc_tests/defined_names_for_unit_test.xlsx"; let loaded_workbook = load_from_excel(test_file_path, "en", "UTC").unwrap(); - let mut model = Model::from_json(&serde_json::to_string(&loaded_workbook).unwrap()).unwrap(); + let mut model = Model::from_bytes(&bitcode::encode(&loaded_workbook)).unwrap(); let (row, column) = (2, 13); // B13 let test_cases = [