From 7df4943a966a6885b77bd160339aa50b6ff78993 Mon Sep 17 00:00:00 2001 From: Wynd Date: Thu, 12 Jun 2025 14:17:25 +0300 Subject: [PATCH] Moved to a locally patched docx-rs and made it so the excluded chars are read from a file --- .gitignore | 2 +- Cargo.lock | 8 +++----- Cargo.toml | 3 ++- src/main.rs | 44 ++++++++++++-------------------------------- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index 881b3fd..479defb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ /target - +excluded *.docx *.txt diff --git a/Cargo.lock b/Cargo.lock index 35c95cf..b734851 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,9 +22,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "base64" -version = "0.13.1" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -82,9 +82,7 @@ dependencies = [ [[package]] name = "docx-rs" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e593b51d4fe95d69d70fd40da4b314b029736302c986c3c760826e842fd27dc3" +version = "0.4.18-rc19" dependencies = [ "base64", "image", diff --git a/Cargo.toml b/Cargo.toml index d929945..e2f30ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,5 +5,6 @@ edition = "2021" [dependencies] anyhow = "1.0.98" -docx-rs = "0.4.17" +docx-rs = { path = "../../Tests/docx-rs/docx-core" } +# docx-rs = "0.4.17" serde_json = "1.0.140" diff --git a/src/main.rs b/src/main.rs index 989febd..a18d993 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::{ - env, + env, fs, io::{stdout, Read, Write}, }; @@ -12,50 +12,30 @@ fn main() -> anyhow::Result<()> { let mut lines: Vec = vec![]; - // for docx parse_docx(file_name, &mut lines)?; - // for txt - // for line in fs::read_to_string(file_name).unwrap().lines() { - // if !line.is_empty() { - // lines.push(line.to_string()) - // } - // } + let ascii_upper_limit = u32::from_str_radix("7F", 16).unwrap(); + let mut excluded = vec![]; + for line in fs::read_to_string("excluded")?.lines() { + if let Some(c) = line.chars().next() { + let c = format!("{:x}", c as u32); + let u = u32::from_str_radix(&c, 16)?; + excluded.push(u); + } + } let mut lock = stdout().lock(); - let ascii_upper_limit = u32::from_str_radix("7F", 16).unwrap(); - let excluded = [ - "0000021B", // ț - "00000219", // ș - "00000103", // ă - "000000E2", // â - "000000EE", // î - "0000021A", // Ț - "00000218", // Ș - "00000102", // Ă - "000000C2", // Â - "000000CE", // Î - "0000201E", // „ - "0000201D", // ” - "0000201A", // ‚ - "00002019", // ’ - ]; - let excluded: Vec = excluded - .into_iter() - .map(|c| u32::from_str_radix(c, 16).unwrap()) - .collect(); - for line in lines { for c in line.chars() { let uc = c as u32; if uc > ascii_upper_limit && !excluded.contains(&uc) { - writeln!(lock, "{c} - {line}").unwrap(); + writeln!(lock, "{c} - {line}")?; } } } - lock.flush().unwrap(); + lock.flush()?; Ok(()) }