feat: try to read sdt (#382)
* feat: try to read sdt * fix: reader * impl: toc reader * impl: improve toc reader * fix: add option * improve toc reader * fixmain
parent
26c9bafbac
commit
b8ad56daab
|
@ -4,7 +4,7 @@ use std::fs::File;
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
let mut file = File::open("./header.docx").unwrap();
|
let mut file = File::open("./toc1.docx").unwrap();
|
||||||
let mut buf = vec![];
|
let mut buf = vec![];
|
||||||
file.read_to_end(&mut buf).unwrap();
|
file.read_to_end(&mut buf).unwrap();
|
||||||
|
|
||||||
|
|
|
@ -1,26 +1,48 @@
|
||||||
|
use serde::ser::{SerializeStruct, Serializer};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::documents::*;
|
use crate::documents::*;
|
||||||
use crate::xml_builder::*;
|
use crate::xml_builder::*;
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct InstrText {
|
pub enum InstrText {
|
||||||
pub val: String,
|
TOC(InstrToC),
|
||||||
|
Unsupported(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InstrText {
|
impl BuildXML for Box<InstrText> {
|
||||||
pub fn new(i: impl Into<String>) -> Self {
|
fn build(&self) -> Vec<u8> {
|
||||||
Self { val: i.into() }
|
let instr = match self.as_ref() {
|
||||||
|
InstrText::TOC(toc) => toc.build(),
|
||||||
|
InstrText::Unsupported(s) => s.as_bytes().to_vec(),
|
||||||
|
};
|
||||||
|
XMLBuilder::new()
|
||||||
|
.open_instr_text()
|
||||||
|
.add_bytes(&instr)
|
||||||
|
.close()
|
||||||
|
.build()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuildXML for InstrText {
|
impl Serialize for InstrText {
|
||||||
fn build(&self) -> Vec<u8> {
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
XMLBuilder::new()
|
where
|
||||||
.open_instr_text()
|
S: Serializer,
|
||||||
.plain_text(&self.val)
|
{
|
||||||
.close()
|
match *self {
|
||||||
.build()
|
InstrText::TOC(ref s) => {
|
||||||
|
let mut t = serializer.serialize_struct("TOC", 2)?;
|
||||||
|
t.serialize_field("type", "toc")?;
|
||||||
|
t.serialize_field("data", s)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
InstrText::Unsupported(ref s) => {
|
||||||
|
let mut t = serializer.serialize_struct("Unsupported", 2)?;
|
||||||
|
t.serialize_field("type", "unsupported")?;
|
||||||
|
t.serialize_field("data", s)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,10 +56,10 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_toc_instr() {
|
fn test_toc_instr() {
|
||||||
let b = InstrText::new(r#"ToC \o "1-3""#).build();
|
let b = Box::new(InstrText::TOC(InstrToC::new().heading_styles_range(1, 3))).build();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
str::from_utf8(&b).unwrap(),
|
str::from_utf8(&b).unwrap(),
|
||||||
r#"<w:instrText>ToC \o "1-3"</w:instrText>"#
|
r#"<w:instrText>TOC \o "1-3"</w:instrText>"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,345 @@
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::documents::*;
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq, Default)]
|
||||||
|
pub struct StyleWithLevel(pub (String, usize));
|
||||||
|
|
||||||
|
impl StyleWithLevel {
|
||||||
|
pub fn new(s: impl Into<String>, l: usize) -> Self {
|
||||||
|
Self((s.into(), l))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// https://c-rex.net/projects/samples/ooxml/e1/Part4/OOXML_P4_DOCX_TOCTOC_topic_ID0ELZO1.html
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq, Default)]
|
||||||
|
pub struct InstrToC {
|
||||||
|
// \o If no heading range is specified, all heading levels used in the document are listed.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
heading_styles_range: Option<(usize, usize)>,
|
||||||
|
// \l Includes TC fields that assign entries to one of the levels specified by text in this switch's field-argument as a range having the form startLevel-endLevel,
|
||||||
|
// where startLevel and endLevel are integers, and startLevel has a value equal-to or less-than endLevel.
|
||||||
|
// TC fields that assign entries to lower levels are skipped.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
tc_field_level_range: Option<(usize, usize)>,
|
||||||
|
// \n Without field-argument, omits page numbers from the table of contents.
|
||||||
|
// .Page numbers are omitted from all levels unless a range of entry levels is specified by text in this switch's field-argument.
|
||||||
|
// A range is specified as for \l.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
omit_page_numbers_level_range: Option<(usize, usize)>,
|
||||||
|
// \b includes entries only from the portion of the document marked by the bookmark named by text in this switch's field-argument.
|
||||||
|
entry_bookmark_name: Option<String>,
|
||||||
|
// \t Uses paragraphs formatted with styles other than the built-in heading styles.
|
||||||
|
// . text in this switch's field-argument specifies those styles as a set of comma-separated doublets,
|
||||||
|
// with each doublet being a comma-separated set of style name and table of content level. \t can be combined with \o.
|
||||||
|
styles_with_levels: Vec<StyleWithLevel>,
|
||||||
|
// struct S texWin Lis switch's field-argument specifies a sequence of character
|
||||||
|
// . The default is a tab with leader dots.
|
||||||
|
entry_and_page_number_separator: Option<String>,
|
||||||
|
// \d
|
||||||
|
sequence_and_page_numbers_separator: Option<String>,
|
||||||
|
// \a
|
||||||
|
caption_label: Option<String>,
|
||||||
|
// \c
|
||||||
|
caption_label_including_numbers: Option<String>,
|
||||||
|
// \s
|
||||||
|
seq_field_identifier_for_prefix: Option<String>,
|
||||||
|
// \f
|
||||||
|
tc_field_identifier: Option<String>,
|
||||||
|
// \h
|
||||||
|
hyperlink: bool,
|
||||||
|
// \w
|
||||||
|
preserve_tab: bool,
|
||||||
|
// \x
|
||||||
|
preserve_new_line: bool,
|
||||||
|
// \u
|
||||||
|
use_applied_paragraph_line_level: bool,
|
||||||
|
// \z Hides tab leader and page numbers in Web layout view.
|
||||||
|
hide_tab_and_page_numbers_in_webview: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InstrToC {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn heading_styles_range(mut self, start: usize, end: usize) -> Self {
|
||||||
|
self.heading_styles_range = Some((start, end));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tc_field_level_range(mut self, start: usize, end: usize) -> Self {
|
||||||
|
self.tc_field_level_range = Some((start, end));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tc_field_identifier(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.tc_field_identifier = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn omit_page_numbers_level_range(mut self, start: usize, end: usize) -> Self {
|
||||||
|
self.omit_page_numbers_level_range = Some((start, end));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn entry_and_page_number_separator(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.entry_and_page_number_separator = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn entry_bookmark_name(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.entry_bookmark_name = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn caption_label(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.caption_label = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn caption_label_including_numbers(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.caption_label_including_numbers = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sequence_and_page_numbers_separator(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.sequence_and_page_numbers_separator = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn seq_field_identifier_for_prefix(mut self, t: impl Into<String>) -> Self {
|
||||||
|
self.seq_field_identifier_for_prefix = Some(t.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hyperlink(mut self) -> Self {
|
||||||
|
self.hyperlink = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn preserve_tab(mut self) -> Self {
|
||||||
|
self.preserve_tab = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn preserve_new_line(mut self) -> Self {
|
||||||
|
self.preserve_new_line = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn use_applied_paragraph_line_level(mut self) -> Self {
|
||||||
|
self.use_applied_paragraph_line_level = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hide_tab_and_page_numbers_in_webview(mut self) -> Self {
|
||||||
|
self.hide_tab_and_page_numbers_in_webview = true;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_style_with_level(mut self, s: StyleWithLevel) -> Self {
|
||||||
|
self.styles_with_levels.push(s);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BuildXML for InstrToC {
|
||||||
|
fn build(&self) -> Vec<u8> {
|
||||||
|
let mut instr = "TOC".to_string();
|
||||||
|
|
||||||
|
if let Some(heading_styles_range) = self.heading_styles_range {
|
||||||
|
instr = format!(
|
||||||
|
"{} \\o "{}-{}"",
|
||||||
|
instr, heading_styles_range.0, heading_styles_range.1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ref t) = self.entry_and_page_number_separator {
|
||||||
|
instr = format!("{} \\p "{}"", instr, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.hyperlink {
|
||||||
|
instr = format!("{} \\h", instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.hide_tab_and_page_numbers_in_webview {
|
||||||
|
instr = format!("{} \\z", instr);
|
||||||
|
}
|
||||||
|
|
||||||
|
instr.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_level_range(i: &str) -> Option<(usize, usize)> {
|
||||||
|
let r = i.replace(""", "").replace("\"", "");
|
||||||
|
let r: Vec<&str> = r.split('-').collect();
|
||||||
|
if let Some(s) = r.get(0) {
|
||||||
|
if let Ok(s) = usize::from_str(s) {
|
||||||
|
if let Some(e) = r.get(1) {
|
||||||
|
if let Ok(e) = usize::from_str(e) {
|
||||||
|
return Some((s, e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::str::FromStr for InstrToC {
|
||||||
|
type Err = ();
|
||||||
|
|
||||||
|
fn from_str(instr: &str) -> Result<Self, Self::Err> {
|
||||||
|
let mut s = instr.split(' ');
|
||||||
|
let mut toc = InstrToC::new();
|
||||||
|
loop {
|
||||||
|
if let Some(i) = s.next() {
|
||||||
|
match i {
|
||||||
|
"\\a" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.caption_label(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\b" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.entry_bookmark_name(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\c" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.caption_label_including_numbers(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\d" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.sequence_and_page_numbers_separator(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\f" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.tc_field_identifier(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\h" => toc = toc.hyperlink(),
|
||||||
|
"\\l" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
if let Some((s, e)) = parse_level_range(r) {
|
||||||
|
toc = toc.tc_field_level_range(s, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\n" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
if let Some((s, e)) = parse_level_range(r) {
|
||||||
|
toc = toc.omit_page_numbers_level_range(s, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\o" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
if let Some((s, e)) = parse_level_range(r) {
|
||||||
|
toc = toc.heading_styles_range(s, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\p" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.entry_and_page_number_separator(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\s" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
toc = toc.seq_field_identifier_for_prefix(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\t" => {
|
||||||
|
if let Some(r) = s.next() {
|
||||||
|
let r = r.replace(""", "").replace("\"", "");
|
||||||
|
dbg!(&r);
|
||||||
|
let mut r = r.split(',');
|
||||||
|
loop {
|
||||||
|
if let Some(style) = r.next() {
|
||||||
|
if let Some(level) = r
|
||||||
|
.next() {
|
||||||
|
if let Ok(level) = usize::from_str(level) {
|
||||||
|
toc = toc.add_style_with_level(StyleWithLevel((
|
||||||
|
style.to_string(),
|
||||||
|
level,
|
||||||
|
)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"\\u" => toc = toc.use_applied_paragraph_line_level(),
|
||||||
|
"\\w" => toc = toc.preserve_tab(),
|
||||||
|
"\\x" => toc = toc.preserve_new_line(),
|
||||||
|
"\\z" => toc = toc.hide_tab_and_page_numbers_in_webview(),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Ok(toc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
#[cfg(test)]
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
use std::str;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_toc() {
|
||||||
|
let b = InstrToC::new().heading_styles_range(1, 3).build();
|
||||||
|
assert_eq!(str::from_utf8(&b).unwrap(), r#"TOC \o "1-3""#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_toc_with_o_and_h() {
|
||||||
|
let i = r#"TOC \o "1-3" \h"#;
|
||||||
|
let i = InstrToC::from_str(i).unwrap();
|
||||||
|
assert_eq!(i, InstrToC::new().heading_styles_range(1, 3).hyperlink());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_toc_with_l_and_n() {
|
||||||
|
let i = r#"TOC \o "1-3" \l "4-5" \n "1-4" \h"#;
|
||||||
|
let i = InstrToC::from_str(i).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
i,
|
||||||
|
InstrToC::new()
|
||||||
|
.heading_styles_range(1, 3)
|
||||||
|
.hyperlink()
|
||||||
|
.omit_page_numbers_level_range(1, 4)
|
||||||
|
.tc_field_level_range(4, 5)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_toc_with_a_and_b_and_t() {
|
||||||
|
let i = r#"TOC \a "hoge" \b "test" \o "1-3" \t "MySpectacularStyle,1,MySpectacularStyle2,4""#;
|
||||||
|
let i = InstrToC::from_str(i).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
i,
|
||||||
|
InstrToC::new()
|
||||||
|
.caption_label("hoge")
|
||||||
|
.entry_bookmark_name("test")
|
||||||
|
.heading_styles_range(1, 3)
|
||||||
|
.add_style_with_level(StyleWithLevel::new("MySpectacularStyle", 1))
|
||||||
|
.add_style_with_level(StyleWithLevel::new("MySpectacularStyle2", 4))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,6 +18,7 @@ mod default_tab_stop;
|
||||||
mod delete;
|
mod delete;
|
||||||
mod delete_text;
|
mod delete_text;
|
||||||
mod div;
|
mod div;
|
||||||
|
mod instr_toc;
|
||||||
mod doc_defaults;
|
mod doc_defaults;
|
||||||
mod doc_grid;
|
mod doc_grid;
|
||||||
mod doc_id;
|
mod doc_id;
|
||||||
|
@ -203,3 +204,4 @@ pub use wp_anchor::*;
|
||||||
pub use wps_shape::*;
|
pub use wps_shape::*;
|
||||||
pub use wps_text_box::*;
|
pub use wps_text_box::*;
|
||||||
pub use zoom::*;
|
pub use zoom::*;
|
||||||
|
pub use instr_toc::*;
|
||||||
|
|
|
@ -33,7 +33,7 @@ pub enum RunChild {
|
||||||
CommentStart(Box<CommentRangeStart>),
|
CommentStart(Box<CommentRangeStart>),
|
||||||
CommentEnd(CommentRangeEnd),
|
CommentEnd(CommentRangeEnd),
|
||||||
FieldChar(FieldChar),
|
FieldChar(FieldChar),
|
||||||
InstrText(InstrText),
|
InstrText(Box<InstrText>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for RunChild {
|
impl Serialize for RunChild {
|
||||||
|
@ -126,8 +126,8 @@ impl Run {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_instr_text(mut self, i: impl Into<String>) -> Run {
|
pub fn add_instr_text(mut self, i: InstrText) -> Run {
|
||||||
self.children.push(RunChild::InstrText(InstrText::new(i)));
|
self.children.push(RunChild::InstrText(Box::new(i)));
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,12 @@ impl Default for StructuredDataTag {
|
||||||
pub enum StructuredDataTagChild {
|
pub enum StructuredDataTagChild {
|
||||||
Run(Box<Run>),
|
Run(Box<Run>),
|
||||||
Paragraph(Box<Paragraph>),
|
Paragraph(Box<Paragraph>),
|
||||||
|
Table(Box<Table>),
|
||||||
|
BookmarkStart(BookmarkStart),
|
||||||
|
BookmarkEnd(BookmarkEnd),
|
||||||
|
CommentStart(Box<CommentRangeStart>),
|
||||||
|
CommentEnd(CommentRangeEnd),
|
||||||
|
StructuredDataTag(Box<StructuredDataTag>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuildXML for StructuredDataTagChild {
|
impl BuildXML for StructuredDataTagChild {
|
||||||
|
@ -35,6 +41,12 @@ impl BuildXML for StructuredDataTagChild {
|
||||||
match self {
|
match self {
|
||||||
StructuredDataTagChild::Run(v) => v.build(),
|
StructuredDataTagChild::Run(v) => v.build(),
|
||||||
StructuredDataTagChild::Paragraph(v) => v.build(),
|
StructuredDataTagChild::Paragraph(v) => v.build(),
|
||||||
|
StructuredDataTagChild::Table(v) => v.build(),
|
||||||
|
StructuredDataTagChild::BookmarkStart(v) => v.build(),
|
||||||
|
StructuredDataTagChild::BookmarkEnd(v) => v.build(),
|
||||||
|
StructuredDataTagChild::CommentStart(v) => v.build(),
|
||||||
|
StructuredDataTagChild::CommentEnd(v) => v.build(),
|
||||||
|
StructuredDataTagChild::StructuredDataTag(v) => v.build(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -57,6 +69,42 @@ impl Serialize for StructuredDataTagChild {
|
||||||
t.serialize_field("data", r)?;
|
t.serialize_field("data", r)?;
|
||||||
t.end()
|
t.end()
|
||||||
}
|
}
|
||||||
|
StructuredDataTagChild::Table(ref r) => {
|
||||||
|
let mut t = serializer.serialize_struct("Table", 2)?;
|
||||||
|
t.serialize_field("type", "table")?;
|
||||||
|
t.serialize_field("data", r)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
StructuredDataTagChild::BookmarkStart(ref c) => {
|
||||||
|
let mut t = serializer.serialize_struct("BookmarkStart", 2)?;
|
||||||
|
t.serialize_field("type", "bookmarkStart")?;
|
||||||
|
t.serialize_field("data", c)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
StructuredDataTagChild::BookmarkEnd(ref c) => {
|
||||||
|
let mut t = serializer.serialize_struct("BookmarkEnd", 2)?;
|
||||||
|
t.serialize_field("type", "bookmarkEnd")?;
|
||||||
|
t.serialize_field("data", c)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
StructuredDataTagChild::CommentStart(ref r) => {
|
||||||
|
let mut t = serializer.serialize_struct("CommentRangeStart", 2)?;
|
||||||
|
t.serialize_field("type", "commentRangeStart")?;
|
||||||
|
t.serialize_field("data", r)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
StructuredDataTagChild::CommentEnd(ref r) => {
|
||||||
|
let mut t = serializer.serialize_struct("CommentRangeEnd", 2)?;
|
||||||
|
t.serialize_field("type", "commentRangeEnd")?;
|
||||||
|
t.serialize_field("data", r)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
|
StructuredDataTagChild::StructuredDataTag(ref r) => {
|
||||||
|
let mut t = serializer.serialize_struct("StructuredDataTag", 2)?;
|
||||||
|
t.serialize_field("type", "structuredDataTag")?;
|
||||||
|
t.serialize_field("data", r)?;
|
||||||
|
t.end()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,6 +129,15 @@ impl StructuredDataTag {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_table(mut self, t: Table) -> Self {
|
||||||
|
if t.has_numbering {
|
||||||
|
self.has_numbering = true
|
||||||
|
}
|
||||||
|
self.children
|
||||||
|
.push(StructuredDataTagChild::Table(Box::new(t)));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn data_binding(mut self, d: DataBinding) -> Self {
|
pub fn data_binding(mut self, d: DataBinding) -> Self {
|
||||||
self.property = self.property.data_binding(d);
|
self.property = self.property.data_binding(d);
|
||||||
self
|
self
|
||||||
|
|
|
@ -2,6 +2,7 @@ use serde::Serialize;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::documents::BuildXML;
|
use crate::documents::BuildXML;
|
||||||
|
// use crate::types::*;
|
||||||
use crate::xml_builder::*;
|
use crate::xml_builder::*;
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
@ -13,7 +14,7 @@ pub struct StructuredDataTagProperty {
|
||||||
|
|
||||||
impl Default for StructuredDataTagProperty {
|
impl Default for StructuredDataTagProperty {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
StructuredDataTagProperty {
|
Self {
|
||||||
run_property: RunProperty::new(),
|
run_property: RunProperty::new(),
|
||||||
data_binding: None,
|
data_binding: None,
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,10 +6,7 @@ use crate::xml_builder::*;
|
||||||
|
|
||||||
// https://c-rex.net/projects/samples/ooxml/e1/Part4/OOXML_P4_DOCX_TOCTOC_topic_ID0ELZO1.html
|
// https://c-rex.net/projects/samples/ooxml/e1/Part4/OOXML_P4_DOCX_TOCTOC_topic_ID0ELZO1.html
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq, Default)]
|
#[derive(Serialize, Debug, Clone, PartialEq, Default)]
|
||||||
pub struct TableOfContents {
|
pub struct TableOfContents(pub InstrToC);
|
||||||
// If no heading range is specified, all heading levels used in the document are listed.
|
|
||||||
heading_styles_range: Option<(usize, usize)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TableOfContents {
|
impl TableOfContents {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
|
@ -17,21 +14,9 @@ impl TableOfContents {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn heading_styles_range(mut self, start: usize, end: usize) -> Self {
|
pub fn heading_styles_range(mut self, start: usize, end: usize) -> Self {
|
||||||
self.heading_styles_range = Some((start, end));
|
self.0 = self.0.heading_styles_range(start, end);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_instr_text(&self) -> String {
|
|
||||||
let mut instr = "TOC".to_string();
|
|
||||||
|
|
||||||
if let Some(heading_styles_range) = self.heading_styles_range {
|
|
||||||
instr = format!(
|
|
||||||
"{} \\o "{}-{}"",
|
|
||||||
instr, heading_styles_range.0, heading_styles_range.1
|
|
||||||
);
|
|
||||||
}
|
|
||||||
instr
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuildXML for TableOfContents {
|
impl BuildXML for TableOfContents {
|
||||||
|
@ -39,7 +24,7 @@ impl BuildXML for TableOfContents {
|
||||||
let p1 = Paragraph::new().add_run(
|
let p1 = Paragraph::new().add_run(
|
||||||
Run::new()
|
Run::new()
|
||||||
.add_field_char(FieldCharType::Begin, true)
|
.add_field_char(FieldCharType::Begin, true)
|
||||||
.add_instr_text(self.build_instr_text())
|
.add_instr_text(InstrText::TOC(self.0.clone()))
|
||||||
.add_field_char(FieldCharType::Separate, false),
|
.add_field_char(FieldCharType::Separate, false),
|
||||||
);
|
);
|
||||||
let p2 = Paragraph::new().add_run(Run::new().add_field_char(FieldCharType::End, false));
|
let p2 = Paragraph::new().add_run(Run::new().add_field_char(FieldCharType::End, false));
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
#![allow(clippy::single_match)]
|
||||||
|
|
||||||
|
use std::io::Read;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use xml::attribute::OwnedAttribute;
|
||||||
|
use xml::reader::{EventReader, XmlEvent};
|
||||||
|
|
||||||
|
use crate::reader::*;
|
||||||
|
|
||||||
|
impl ElementReader for InstrText {
|
||||||
|
fn read<R: Read>(
|
||||||
|
r: &mut EventReader<R>,
|
||||||
|
_attrs: &[OwnedAttribute],
|
||||||
|
) -> Result<Self, ReaderError> {
|
||||||
|
let mut instr = "".to_owned();
|
||||||
|
loop {
|
||||||
|
let e = r.next();
|
||||||
|
match e {
|
||||||
|
Ok(XmlEvent::Characters(c)) => {
|
||||||
|
instr = c;
|
||||||
|
}
|
||||||
|
Ok(XmlEvent::EndElement { name, .. }) => {
|
||||||
|
let e = XMLElement::from_str(&name.local_name).unwrap();
|
||||||
|
match e {
|
||||||
|
XMLElement::InstrText => {
|
||||||
|
let instr = instr.trim();
|
||||||
|
if instr.is_empty() {
|
||||||
|
return Err(ReaderError::XMLReadError);
|
||||||
|
} else {
|
||||||
|
if instr.starts_with("TOC") {
|
||||||
|
for i in instr.split(' ') {
|
||||||
|
dbg!(i);
|
||||||
|
}
|
||||||
|
return Ok(InstrText::TOC(InstrToC::new()));
|
||||||
|
}
|
||||||
|
return Ok(InstrText::Unsupported(instr.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => return Err(ReaderError::XMLReadError),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,6 +21,7 @@ mod from_xml;
|
||||||
mod header;
|
mod header;
|
||||||
mod ignore;
|
mod ignore;
|
||||||
mod insert;
|
mod insert;
|
||||||
|
mod instr_text;
|
||||||
mod level;
|
mod level;
|
||||||
mod level_override;
|
mod level_override;
|
||||||
mod mc_fallback;
|
mod mc_fallback;
|
||||||
|
@ -35,6 +36,7 @@ mod run_property;
|
||||||
mod section_property;
|
mod section_property;
|
||||||
mod settings;
|
mod settings;
|
||||||
mod shading;
|
mod shading;
|
||||||
|
mod structured_data_tag;
|
||||||
mod style;
|
mod style;
|
||||||
mod styles;
|
mod styles;
|
||||||
mod table;
|
mod table;
|
||||||
|
|
|
@ -8,8 +8,8 @@ use xml::reader::{EventReader, XmlEvent};
|
||||||
|
|
||||||
use super::Run;
|
use super::Run;
|
||||||
|
|
||||||
use crate::reader::*;
|
|
||||||
use crate::types::BreakType;
|
use crate::types::BreakType;
|
||||||
|
use crate::{reader::*, FieldCharType};
|
||||||
|
|
||||||
#[derive(PartialEq, Debug)]
|
#[derive(PartialEq, Debug)]
|
||||||
enum TextState {
|
enum TextState {
|
||||||
|
@ -18,6 +18,35 @@ enum TextState {
|
||||||
Delete,
|
Delete,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_field_char(attributes: &[OwnedAttribute]) -> Result<FieldChar, ReaderError> {
|
||||||
|
let mut t: Option<FieldCharType> = None;
|
||||||
|
let mut dirty = false;
|
||||||
|
for a in attributes {
|
||||||
|
let local_name = &a.name.local_name;
|
||||||
|
match local_name.as_str() {
|
||||||
|
"fldCharType" => {
|
||||||
|
if let Ok(ty) = FieldCharType::from_str(&a.value) {
|
||||||
|
t = Some(ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"dirty" => {
|
||||||
|
dirty = !is_false(&a.value);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(t) = t {
|
||||||
|
let mut f = FieldChar::new(t);
|
||||||
|
if dirty {
|
||||||
|
f = f.dirty();
|
||||||
|
}
|
||||||
|
Ok(f)
|
||||||
|
} else {
|
||||||
|
Err(ReaderError::XMLReadError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ElementReader for Run {
|
impl ElementReader for Run {
|
||||||
fn read<R: Read>(
|
fn read<R: Read>(
|
||||||
r: &mut EventReader<R>,
|
r: &mut EventReader<R>,
|
||||||
|
@ -55,9 +84,20 @@ impl ElementReader for Run {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
XMLElement::Drawing => {
|
XMLElement::Drawing => {
|
||||||
let drawing = Drawing::read(r, &attributes)?;
|
if let Ok(drawing) = Drawing::read(r, &attributes) {
|
||||||
run = run.add_drawing(drawing);
|
run = run.add_drawing(drawing);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
XMLElement::FieldChar => {
|
||||||
|
if let Ok(f) = read_field_char(&attributes) {
|
||||||
|
run.children.push(RunChild::FieldChar(f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
XMLElement::InstrText => {
|
||||||
|
if let Ok(i) = InstrText::read(r, &attributes) {
|
||||||
|
run.children.push(RunChild::InstrText(Box::new(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
use std::io::Read;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use xml::attribute::OwnedAttribute;
|
||||||
|
use xml::reader::{EventReader, XmlEvent};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use super::attributes::*;
|
||||||
|
|
||||||
|
impl ElementReader for StructuredDataTag {
|
||||||
|
fn read<R: Read>(
|
||||||
|
r: &mut EventReader<R>,
|
||||||
|
attrs: &[OwnedAttribute],
|
||||||
|
) -> Result<Self, ReaderError> {
|
||||||
|
let mut sdt = StructuredDataTag::new();
|
||||||
|
loop {
|
||||||
|
let e = r.next();
|
||||||
|
match e {
|
||||||
|
Ok(XmlEvent::StartElement {
|
||||||
|
attributes, name, ..
|
||||||
|
}) => {
|
||||||
|
let e = XMLElement::from_str(&name.local_name).unwrap();
|
||||||
|
|
||||||
|
ignore::ignore_element(e.clone(), XMLElement::ParagraphPropertyChange, r);
|
||||||
|
|
||||||
|
match e {
|
||||||
|
XMLElement::Paragraph => {
|
||||||
|
if let Ok(p) = Paragraph::read(r, &attributes) {
|
||||||
|
sdt.children
|
||||||
|
.push(StructuredDataTagChild::Paragraph(Box::new(p)));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::Table => {
|
||||||
|
if let Ok(t) = Table::read(r, &attributes) {
|
||||||
|
sdt.children
|
||||||
|
.push(StructuredDataTagChild::Table(Box::new(t)));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::BookmarkStart => {
|
||||||
|
if let Ok(s) = BookmarkStart::read(r, &attributes) {
|
||||||
|
sdt.children.push(StructuredDataTagChild::BookmarkStart(s));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::BookmarkEnd => {
|
||||||
|
if let Ok(e) = BookmarkEnd::read(r, &attributes) {
|
||||||
|
sdt.children.push(StructuredDataTagChild::BookmarkEnd(e));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::CommentRangeStart => {
|
||||||
|
if let Some(id) = read(&attributes, "id") {
|
||||||
|
if let Ok(id) = usize::from_str(&id) {
|
||||||
|
let comment = Comment::new(id);
|
||||||
|
sdt.children.push(StructuredDataTagChild::CommentStart(
|
||||||
|
Box::new(CommentRangeStart::new(comment)),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::CommentRangeEnd => {
|
||||||
|
if let Some(id) = read(&attributes, "id") {
|
||||||
|
if let Ok(id) = usize::from_str(&id) {
|
||||||
|
sdt.children.push(StructuredDataTagChild::CommentEnd(
|
||||||
|
CommentRangeEnd::new(id),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
XMLElement::Run => {
|
||||||
|
if let Ok(run) = Run::read(r, attrs) {
|
||||||
|
sdt.children.push(StructuredDataTagChild::Run(Box::new(run)));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(XmlEvent::EndElement { name, .. }) => {
|
||||||
|
let e = XMLElement::from_str(&name.local_name).unwrap();
|
||||||
|
if e == XMLElement::Paragraph {
|
||||||
|
return Ok(sdt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => return Err(ReaderError::XMLReadError),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,7 +17,6 @@ impl FromXML for WebSettings {
|
||||||
attributes, name, ..
|
attributes, name, ..
|
||||||
}) => {
|
}) => {
|
||||||
let e = XMLElement::from_str(&name.local_name).unwrap();
|
let e = XMLElement::from_str(&name.local_name).unwrap();
|
||||||
dbg!(&e);
|
|
||||||
if let XMLElement::Div = e {
|
if let XMLElement::Div = e {
|
||||||
if let Ok(div) = Div::read(&mut parser, &attributes) {
|
if let Ok(div) = Div::read(&mut parser, &attributes) {
|
||||||
settings.divs.push(div);
|
settings.divs.push(div);
|
||||||
|
|
|
@ -24,6 +24,8 @@ pub enum XMLElement {
|
||||||
Italic,
|
Italic,
|
||||||
ItalicCs,
|
ItalicCs,
|
||||||
Text,
|
Text,
|
||||||
|
FieldChar,
|
||||||
|
InstrText,
|
||||||
Highlight,
|
Highlight,
|
||||||
VertAlign,
|
VertAlign,
|
||||||
Bold,
|
Bold,
|
||||||
|
@ -217,6 +219,8 @@ impl FromStr for XMLElement {
|
||||||
"rPrChange" => Ok(XMLElement::RunPropertyChange),
|
"rPrChange" => Ok(XMLElement::RunPropertyChange),
|
||||||
"color" => Ok(XMLElement::Color),
|
"color" => Ok(XMLElement::Color),
|
||||||
"t" => Ok(XMLElement::Text),
|
"t" => Ok(XMLElement::Text),
|
||||||
|
"fldChar" => Ok(XMLElement::FieldChar),
|
||||||
|
"instrText" => Ok(XMLElement::InstrText),
|
||||||
"sz" => Ok(XMLElement::Size),
|
"sz" => Ok(XMLElement::Size),
|
||||||
"szCs" => Ok(XMLElement::SizeCs),
|
"szCs" => Ok(XMLElement::SizeCs),
|
||||||
"u" => Ok(XMLElement::Underline),
|
"u" => Ok(XMLElement::Underline),
|
||||||
|
|
|
@ -86,6 +86,12 @@ impl XMLBuilder {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn add_bytes(mut self, child: &[u8]) -> Self {
|
||||||
|
let text = str::from_utf8(child).unwrap();
|
||||||
|
self.writer.write(text).expect("should write to buf");
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn add_optional_child<T>(mut self, child: &Option<T>) -> Self
|
pub(crate) fn add_optional_child<T>(mut self, child: &Option<T>) -> Self
|
||||||
where
|
where
|
||||||
T: BuildXML,
|
T: BuildXML,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -81,6 +81,12 @@ describe("reader", () => {
|
||||||
expect(json).toMatchSnapshot();
|
expect(json).toMatchSnapshot();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("should read toc1 docx", () => {
|
||||||
|
const buffer = readFileSync("../fixtures/toc1/toc1.docx");
|
||||||
|
const json = w.readDocx(buffer);
|
||||||
|
expect(json).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
test("should read footer docx", () => {
|
test("should read footer docx", () => {
|
||||||
const buffer = readFileSync("../fixtures/footer/footer.docx");
|
const buffer = readFileSync("../fixtures/footer/footer.docx");
|
||||||
const json = w.readDocx(buffer);
|
const json = w.readDocx(buffer);
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue