FIX: Removes the csv-sniffer in favour of a simple guess

This removes 500Kb form the was build so it is worth it.
We were using a very old version of the sniffer, the last one might not
have this bug though
This commit is contained in:
Nicolás Hatcher
2024-11-26 19:44:17 +01:00
committed by Nicolás Hatcher Andrés
parent 1f1fd24334
commit 949eafc97f
5 changed files with 63 additions and 93 deletions

90
Cargo.lock generated
View File

@@ -19,15 +19,6 @@ dependencies = [
"cpufeatures", "cpufeatures",
] ]
[[package]]
name = "aho-corasick"
version = "0.6.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "1.1.3" version = "1.1.3"
@@ -85,12 +76,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.10.4" version = "0.10.4"
@@ -278,19 +263,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "csv-sniffer"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b8e952164bb270a505d6cb6136624174c34cfb9abd16e0011f5e53058317f39"
dependencies = [
"bitflags",
"csv",
"csv-core",
"memchr",
"regex 0.2.11",
]
[[package]] [[package]]
name = "deranged" name = "deranged"
version = "0.3.11" version = "0.3.11"
@@ -425,11 +397,10 @@ dependencies = [
"chrono", "chrono",
"chrono-tz", "chrono-tz",
"csv", "csv",
"csv-sniffer",
"js-sys", "js-sys",
"once_cell", "once_cell",
"rand", "rand",
"regex 1.10.4", "regex",
"ryu", "ryu",
"serde", "serde",
"serde_json", "serde_json",
@@ -468,12 +439,6 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.153" version = "0.2.153"
@@ -537,7 +502,7 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41"
dependencies = [ dependencies = [
"regex 1.10.4", "regex",
] ]
[[package]] [[package]]
@@ -746,29 +711,16 @@ dependencies = [
"getrandom", "getrandom",
] ]
[[package]]
name = "regex"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
dependencies = [
"aho-corasick 0.6.10",
"memchr",
"regex-syntax 0.5.6",
"thread_local",
"utf8-ranges",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.10.4" version = "1.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
dependencies = [ dependencies = [
"aho-corasick 1.1.3", "aho-corasick",
"memchr", "memchr",
"regex-automata", "regex-automata",
"regex-syntax 0.8.3", "regex-syntax",
] ]
[[package]] [[package]]
@@ -777,18 +729,9 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
dependencies = [ dependencies = [
"aho-corasick 1.1.3", "aho-corasick",
"memchr", "memchr",
"regex-syntax 0.8.3", "regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
dependencies = [
"ucd-util",
] ]
[[package]] [[package]]
@@ -928,15 +871,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
dependencies = [
"lazy_static",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.3.34" version = "0.3.34"
@@ -962,12 +896,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-util"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd2fc5d32b590614af8b0a20d837f32eca055edd0bbead59a9cfe80858be003"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.12" version = "1.0.12"
@@ -980,12 +908,6 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
[[package]]
name = "utf8-ranges"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.8.0" version = "1.8.0"

View File

@@ -19,7 +19,6 @@ regex = "1.0"
once_cell = "1.16.0" once_cell = "1.16.0"
bitcode = "0.6.0" bitcode = "0.6.0"
csv = "1.3.0" csv = "1.3.0"
csv-sniffer = "0.1"
[dev-dependencies] [dev-dependencies]
serde_json = "1.0" serde_json = "1.0"

View File

@@ -26,6 +26,27 @@ fn csv_paste() {
); );
} }
#[test]
fn csv_paste_formula() {
let mut model = UserModel::new_empty("model", "en", "UTC").unwrap();
let csv = "=YEAR(TODAY())";
let area = Area {
sheet: 0,
row: 1,
column: 1,
width: 1,
height: 1,
};
model.set_selected_cell(1, 1).unwrap();
model.paste_csv_string(&area, csv).unwrap();
assert_eq!(
model.get_formatted_cell_value(0, 1, 1),
Ok("2022".to_string())
);
}
#[test] #[test]
fn tsv_crlf_paste() { fn tsv_crlf_paste() {
let mut model = UserModel::new_empty("model", "en", "UTC").unwrap(); let mut model = UserModel::new_empty("model", "en", "UTC").unwrap();

View File

@@ -3,7 +3,6 @@
use std::{collections::HashMap, fmt::Debug, io::Cursor}; use std::{collections::HashMap, fmt::Debug, io::Cursor};
use csv::{ReaderBuilder, WriterBuilder}; use csv::{ReaderBuilder, WriterBuilder};
use csv_sniffer::Sniffer;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{ use crate::{
@@ -62,6 +61,30 @@ pub struct BorderArea {
r#type: BorderType, r#type: BorderType,
} }
fn guess_delimiter(data: &str) -> char {
let delimiters = [',', ';', '\t', '|', ':'];
let mut best_delim = ',';
let mut max_fields = 0;
for &delim in &delimiters {
let mut fields_per_line = Vec::new();
for line in data.lines() {
let fields = line.split(delim).count();
fields_per_line.push(fields);
}
let first_count = fields_per_line.first().copied().unwrap_or(0);
if fields_per_line.iter().all(|&count| count == first_count) && first_count > max_fields {
max_fields = first_count;
best_delim = delim;
}
}
best_delim
}
fn boolean(value: &str) -> Result<bool, String> { fn boolean(value: &str) -> Result<bool, String> {
match value { match value {
"true" => Ok(true), "true" => Ok(true),
@@ -1509,18 +1532,13 @@ impl UserModel {
let sheet = area.sheet; let sheet = area.sheet;
let mut row = area.row; let mut row = area.row;
let mut column = area.column; let mut column = area.column;
// Create a sniffer with default settings
let mut sniffer = Sniffer::new();
let mut csv_reader = Cursor::new(csv); let mut csv_reader = Cursor::new(csv);
// Sniff the CSV metadata let delimiter = guess_delimiter(csv) as u8;
let metadata = sniffer
.sniff_reader(&mut csv_reader)
.map_err(|_| "Failed")?;
// Reset the cursor to the beginning after sniffing // Reset the cursor to the beginning after sniffing
csv_reader.set_position(0); csv_reader.set_position(0);
let mut reader = ReaderBuilder::new() let mut reader = ReaderBuilder::new()
.delimiter(metadata.dialect.delimiter) .delimiter(delimiter)
.has_headers(false) .has_headers(false)
.from_reader(csv_reader); .from_reader(csv_reader);
for record in reader.records() { for record in reader.records() {
@@ -1876,6 +1894,8 @@ mod tests {
user_model::common::{horizontal, vertical}, user_model::common::{horizontal, vertical},
}; };
use super::guess_delimiter;
#[test] #[test]
fn test_vertical() { fn test_vertical() {
let all = vec![ let all = vec![
@@ -1906,4 +1926,11 @@ mod tests {
assert_eq!(horizontal(&format!("{}", a)), Ok(a)); assert_eq!(horizontal(&format!("{}", a)), Ok(a));
} }
} }
#[test]
fn test_guess_delimiter() {
assert_eq!(guess_delimiter("1,2,3\n4,5,6"), ',');
assert_eq!(guess_delimiter("1\t2\t3\n4\t5\t6"), '\t');
assert_eq!(guess_delimiter("1"), ',');
}
} }

View File

@@ -248,6 +248,7 @@ const Editor = (options: EditorOptions) => {
maskRef.current.style.top = `-${textareaRef.current.scrollTop}px`; maskRef.current.style.top = `-${textareaRef.current.scrollTop}px`;
} }
}} }}
onPaste={(event) => event.stopPropagation()}
/> />
</div> </div>
); );