2020-04-07 04:24:56 +03:00
|
|
|
mod a_graphic;
|
|
|
|
mod a_graphic_data;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod attributes;
|
2020-09-07 09:46:23 +03:00
|
|
|
mod bookmark_end;
|
|
|
|
mod bookmark_start;
|
2020-12-15 15:33:01 +02:00
|
|
|
mod comment;
|
|
|
|
mod comment_extended;
|
|
|
|
mod comments;
|
|
|
|
mod comments_extended;
|
2021-06-23 12:39:01 +03:00
|
|
|
mod custom_properties;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod delete;
|
2021-04-09 05:30:50 +03:00
|
|
|
mod div;
|
2020-12-14 08:01:23 +02:00
|
|
|
mod doc_defaults;
|
2021-04-09 05:30:50 +03:00
|
|
|
mod doc_grid;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod document;
|
2020-02-12 09:03:11 +02:00
|
|
|
mod document_rels;
|
2020-04-07 04:24:56 +03:00
|
|
|
mod drawing;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod errors;
|
|
|
|
mod from_xml;
|
2020-06-12 11:42:16 +03:00
|
|
|
mod ignore;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod insert;
|
2020-02-12 12:19:38 +02:00
|
|
|
mod level;
|
2020-06-08 07:41:13 +03:00
|
|
|
mod level_override;
|
2020-04-07 04:24:56 +03:00
|
|
|
mod mc_fallback;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod numbering_property;
|
2020-02-12 12:19:38 +02:00
|
|
|
mod numberings;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod paragraph;
|
2020-02-13 09:14:06 +02:00
|
|
|
mod read_zip;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod rels;
|
|
|
|
mod run;
|
2020-06-08 07:41:13 +03:00
|
|
|
mod run_property;
|
2020-10-09 14:30:55 +03:00
|
|
|
mod section_property;
|
2020-09-07 09:46:23 +03:00
|
|
|
mod settings;
|
2021-03-18 12:02:28 +02:00
|
|
|
mod shading;
|
2020-02-12 08:44:53 +02:00
|
|
|
mod style;
|
|
|
|
mod styles;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod table;
|
2020-10-30 13:29:06 +02:00
|
|
|
mod table_borders;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod table_cell;
|
2020-04-27 06:06:19 +03:00
|
|
|
mod table_cell_borders;
|
2021-04-14 07:47:46 +03:00
|
|
|
mod table_cell_margins;
|
2021-04-09 05:30:50 +03:00
|
|
|
mod table_cell_property;
|
2020-10-30 13:29:06 +02:00
|
|
|
mod table_property;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod table_row;
|
2020-04-07 04:24:56 +03:00
|
|
|
mod text_box_content;
|
2021-04-09 05:30:50 +03:00
|
|
|
mod web_settings;
|
2020-04-07 04:24:56 +03:00
|
|
|
mod wp_anchor;
|
|
|
|
mod wps_shape;
|
|
|
|
mod wps_text_box;
|
2020-02-11 10:01:39 +02:00
|
|
|
mod xml_element;
|
|
|
|
|
|
|
|
use std::io::Cursor;
|
|
|
|
|
|
|
|
use crate::documents::*;
|
|
|
|
|
|
|
|
pub use attributes::*;
|
2020-02-12 09:03:11 +02:00
|
|
|
pub use document_rels::*;
|
2020-02-11 10:01:39 +02:00
|
|
|
pub use errors::ReaderError;
|
|
|
|
pub use from_xml::*;
|
2020-04-07 04:24:56 +03:00
|
|
|
pub use mc_fallback::*;
|
2020-02-13 09:14:06 +02:00
|
|
|
pub use read_zip::*;
|
2020-02-11 10:01:39 +02:00
|
|
|
pub use xml_element::*;
|
|
|
|
|
2021-04-09 05:30:50 +03:00
|
|
|
// 2006
|
2020-02-11 10:01:39 +02:00
|
|
|
const DOC_RELATIONSHIP_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
|
2021-06-23 12:39:01 +03:00
|
|
|
const CUSTOM_PROPERTIES_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
|
2020-02-12 09:03:11 +02:00
|
|
|
const STYLE_RELATIONSHIP_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
|
2020-02-12 12:19:38 +02:00
|
|
|
const NUMBERING_RELATIONSHIP_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering";
|
2020-09-07 09:46:23 +03:00
|
|
|
const SETTINGS_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings";
|
2020-12-15 15:33:01 +02:00
|
|
|
const COMMENTS_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
|
2021-04-09 05:30:50 +03:00
|
|
|
const WEB_SETTINGS_TYPE: &str =
|
|
|
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings";
|
|
|
|
// 2011
|
2020-12-15 15:33:01 +02:00
|
|
|
const COMMENTS_EXTENDED_TYPE: &str =
|
|
|
|
"http://schemas.microsoft.com/office/2011/relationships/commentsExtended";
|
2020-02-11 10:01:39 +02:00
|
|
|
|
|
|
|
pub fn read_docx(buf: &[u8]) -> Result<Docx, ReaderError> {
|
2021-06-23 12:39:01 +03:00
|
|
|
let mut docx = Docx::new();
|
2020-02-11 10:01:39 +02:00
|
|
|
let cur = Cursor::new(buf);
|
|
|
|
let mut archive = zip::ZipArchive::new(cur)?;
|
|
|
|
// First, the content type for relationship parts and the Main Document part
|
|
|
|
// (the only required part) must be defined (physically located at /[Content_Types].xml in the package)
|
2020-02-13 09:14:06 +02:00
|
|
|
let _content_types = {
|
|
|
|
let data = read_zip(&mut archive, "[Content_Types].xml")?;
|
|
|
|
ContentTypes::from_xml(&data[..])?
|
|
|
|
};
|
|
|
|
|
2020-02-11 10:01:39 +02:00
|
|
|
// Next, the single required relationship (the package-level relationship to the Main Document part)
|
|
|
|
// must be defined (physically located at /_rels/.rels in the package)
|
2020-02-13 09:14:06 +02:00
|
|
|
let rels = {
|
|
|
|
let data = read_zip(&mut archive, "_rels/.rels")?;
|
|
|
|
Rels::from_xml(&data[..])?
|
|
|
|
};
|
2020-02-11 10:01:39 +02:00
|
|
|
// Finally, the minimum content for the Main Document part must be defined
|
|
|
|
// (physically located at /document.xml in the package):
|
|
|
|
let main_rel = rels
|
|
|
|
.find_target(DOC_RELATIONSHIP_TYPE)
|
2020-10-26 04:23:16 +02:00
|
|
|
.ok_or(ReaderError::DocumentNotFoundError);
|
|
|
|
|
|
|
|
let document_path = if let Ok(rel) = main_rel {
|
|
|
|
rel.2.clone()
|
|
|
|
} else {
|
|
|
|
"word/document.xml".to_owned()
|
|
|
|
};
|
2020-12-15 15:33:01 +02:00
|
|
|
|
2021-06-23 12:39:01 +03:00
|
|
|
if let Some(custom_props) = rels.find_target(CUSTOM_PROPERTIES_TYPE) {
|
|
|
|
let data = read_zip(&mut archive, &custom_props.2);
|
|
|
|
if let Ok(data) = data {
|
|
|
|
if let Ok(custom) = CustomProps::from_xml(&data[..]) {
|
|
|
|
docx.doc_props.custom = custom;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-15 15:33:01 +02:00
|
|
|
let rels = read_document_rels(&mut archive, &document_path)?;
|
|
|
|
|
|
|
|
// Read commentsExtended
|
|
|
|
let comments_extended_path = rels.find_target_path(COMMENTS_EXTENDED_TYPE);
|
|
|
|
let comments_extended = if let Some(comments_extended_path) = comments_extended_path {
|
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
comments_extended_path
|
|
|
|
.to_str()
|
|
|
|
.expect("should have comments extended."),
|
|
|
|
);
|
|
|
|
if let Ok(data) = data {
|
|
|
|
CommentsExtended::from_xml(&data[..])?
|
|
|
|
} else {
|
|
|
|
CommentsExtended::default()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CommentsExtended::default()
|
|
|
|
};
|
|
|
|
|
|
|
|
// Read comments
|
|
|
|
let comments_path = rels.find_target_path(COMMENTS_TYPE);
|
|
|
|
let comments = if let Some(comments_path) = comments_path {
|
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
comments_path.to_str().expect("should have comments."),
|
|
|
|
);
|
|
|
|
if let Ok(data) = data {
|
|
|
|
let mut comments = Comments::from_xml(&data[..])?.into_inner();
|
|
|
|
for i in 0..comments.len() {
|
|
|
|
let c = &comments[i];
|
2020-12-21 10:30:42 +02:00
|
|
|
let extended = comments_extended.children.iter().find(|ex| {
|
|
|
|
for child in &c.children {
|
|
|
|
if let CommentChild::Paragraph(p) = child {
|
|
|
|
if ex.paragraph_id == p.id {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
});
|
2020-12-15 15:33:01 +02:00
|
|
|
if let Some(CommentExtended {
|
|
|
|
parent_paragraph_id: Some(parent_paragraph_id),
|
|
|
|
..
|
|
|
|
}) = extended
|
|
|
|
{
|
2020-12-21 10:30:42 +02:00
|
|
|
if let Some(parent_comment) = comments.iter().find(|c| {
|
|
|
|
for child in &c.children {
|
|
|
|
if let CommentChild::Paragraph(p) = child {
|
|
|
|
if &p.id == parent_paragraph_id {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}) {
|
2020-12-15 15:33:01 +02:00
|
|
|
comments[i].parent_comment_id = Some(parent_comment.id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Comments { comments }
|
|
|
|
} else {
|
|
|
|
Comments::default()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Comments::default()
|
|
|
|
};
|
|
|
|
|
2020-02-13 09:14:06 +02:00
|
|
|
let document = {
|
2020-10-26 04:23:16 +02:00
|
|
|
let data = read_zip(&mut archive, &document_path)?;
|
2020-02-13 09:14:06 +02:00
|
|
|
Document::from_xml(&data[..])?
|
|
|
|
};
|
2021-06-23 12:39:01 +03:00
|
|
|
docx = docx.document(document);
|
2020-02-12 12:19:38 +02:00
|
|
|
|
2020-12-15 15:33:01 +02:00
|
|
|
// store comments to paragraphs.
|
|
|
|
if !comments.inner().is_empty() {
|
|
|
|
docx.store_comments(comments.inner());
|
|
|
|
docx = docx.comments(comments);
|
|
|
|
docx = docx.comments_extended(comments_extended);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read document relationships
|
2020-02-12 12:19:38 +02:00
|
|
|
// Read styles
|
2020-02-12 19:57:48 +02:00
|
|
|
let style_path = rels.find_target_path(STYLE_RELATIONSHIP_TYPE);
|
|
|
|
if let Some(style_path) = style_path {
|
2020-02-13 09:14:06 +02:00
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
style_path.to_str().expect("should have styles"),
|
|
|
|
)?;
|
|
|
|
let styles = Styles::from_xml(&data[..])?;
|
2020-02-12 19:57:48 +02:00
|
|
|
docx = docx.styles(styles);
|
|
|
|
}
|
2020-02-12 09:03:11 +02:00
|
|
|
|
2020-02-12 12:19:38 +02:00
|
|
|
// Read numberings
|
2020-02-12 19:57:48 +02:00
|
|
|
let num_path = rels.find_target_path(NUMBERING_RELATIONSHIP_TYPE);
|
|
|
|
if let Some(num_path) = num_path {
|
2020-02-13 09:14:06 +02:00
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
num_path.to_str().expect("should have numberings"),
|
|
|
|
)?;
|
|
|
|
let nums = Numberings::from_xml(&data[..])?;
|
2020-02-12 19:57:48 +02:00
|
|
|
docx = docx.numberings(nums);
|
|
|
|
}
|
2020-02-12 12:19:38 +02:00
|
|
|
|
2020-09-07 09:46:23 +03:00
|
|
|
// Read settings
|
|
|
|
let settings_path = rels.find_target_path(SETTINGS_TYPE);
|
|
|
|
if let Some(settings_path) = settings_path {
|
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
settings_path.to_str().expect("should have settings"),
|
|
|
|
)?;
|
|
|
|
let settings = Settings::from_xml(&data[..])?;
|
|
|
|
docx = docx.settings(settings);
|
|
|
|
}
|
|
|
|
|
2021-04-09 05:30:50 +03:00
|
|
|
// Read web settings
|
|
|
|
let web_settings_path = rels.find_target_path(WEB_SETTINGS_TYPE);
|
|
|
|
dbg!(&web_settings_path);
|
|
|
|
if let Some(web_settings_path) = web_settings_path {
|
|
|
|
let data = read_zip(
|
|
|
|
&mut archive,
|
|
|
|
web_settings_path
|
|
|
|
.to_str()
|
|
|
|
.expect("should have web settings"),
|
|
|
|
)?;
|
|
|
|
let web_settings = WebSettings::from_xml(&data[..])?;
|
|
|
|
docx = docx.web_settings(web_settings);
|
|
|
|
}
|
|
|
|
|
2020-02-11 10:01:39 +02:00
|
|
|
Ok(docx)
|
|
|
|
}
|