From 513faa67c1f5e8c39d13a69b44525c6830c6c2b1 Mon Sep 17 00:00:00 2001 From: bokuweb Date: Wed, 12 Feb 2020 16:03:11 +0900 Subject: [PATCH] Read styles (#37) * feat: style reader * feat: Add styles reader * fix: lint --- docx-core/src/documents/mod.rs | 19 ++++++ docx-core/src/reader/document.rs | 1 - docx-core/src/reader/document_rels.rs | 92 +++++++++++++++++++++++++++ docx-core/src/reader/errors.rs | 4 ++ docx-core/src/reader/mod.rs | 15 ++++- docx-core/src/reader/styles.rs | 61 ++++++++++-------- docx-core/src/reader/xml_element.rs | 4 ++ docx-core/src/types/style_type.rs | 7 +- 8 files changed, 175 insertions(+), 28 deletions(-) create mode 100644 docx-core/src/reader/document_rels.rs diff --git a/docx-core/src/documents/mod.rs b/docx-core/src/documents/mod.rs index 7ca826e..ee3013a 100644 --- a/docx-core/src/documents/mod.rs +++ b/docx-core/src/documents/mod.rs @@ -79,10 +79,29 @@ impl Docx { } pub fn document(mut self, d: Document) -> Docx { + for child in &self.document.children { + match child { + DocumentChild::Paragraph(paragraph) => { + if paragraph.has_numbering { + self.document_rels.has_numberings = true; + } + } + DocumentChild::Table(table) => { + if table.has_numbering { + self.document_rels.has_numberings = true; + } + } + } + } self.document = d; self } + pub fn styles(mut self, s: Styles) -> Self { + self.styles = s; + self + } + pub fn add_paragraph(mut self, p: Paragraph) -> Docx { if p.has_numbering { // If this document has numbering, set numberings.xml to document_rels. diff --git a/docx-core/src/reader/document.rs b/docx-core/src/reader/document.rs index 70ef4b0..be1a086 100644 --- a/docx-core/src/reader/document.rs +++ b/docx-core/src/reader/document.rs @@ -10,7 +10,6 @@ impl FromXML for Document { fn from_xml(reader: R) -> Result { let mut parser = EventReader::new(reader); let mut doc = Self::default(); - loop { let e = parser.next(); match e { diff --git a/docx-core/src/reader/document_rels.rs b/docx-core/src/reader/document_rels.rs new file mode 100644 index 0000000..45a5147 --- /dev/null +++ b/docx-core/src/reader/document_rels.rs @@ -0,0 +1,92 @@ +use std::collections::HashMap; +use std::io::{Cursor, Read}; +use std::path::*; +use std::str::FromStr; + +use xml::reader::{EventReader, XmlEvent}; + +use super::errors::*; +use super::*; + +#[derive(Debug, Clone, PartialEq)] +pub struct ReadDocumentRels { + rels: HashMap, +} + +impl ReadDocumentRels { + pub fn find_target_path(&self, target: &str) -> Option { + self.rels.get(target).cloned() + } +} + +pub fn read_document_rels( + archive: &mut zip::read::ZipArchive>, + main_path: impl AsRef, +) -> Result { + let dir = &main_path + .as_ref() + .parent() + .ok_or(ReaderError::DocumentRelsNotFoundError)?; + let p = find_rels_filename(&main_path)?; + let p = p.to_str().ok_or(ReaderError::DocumentRelsNotFoundError)?; + let rels_xml = archive.by_name(&p)?; + let rels = read_rels_xml(rels_xml, dir)?; + Ok(rels) +} + +fn read_rels_xml( + reader: R, + dir: impl AsRef, +) -> Result { + let mut parser = EventReader::new(reader); + let mut rels = ReadDocumentRels { + rels: HashMap::new(), + }; + loop { + let e = parser.next(); + match e { + Ok(XmlEvent::StartElement { + attributes, name, .. + }) => { + let e = XMLElement::from_str(&name.local_name).unwrap(); + if let XMLElement::Relationship = e { + let mut rel_type = "".to_owned(); + let mut target = PathBuf::default(); + for a in attributes { + let local_name = &a.name.local_name; + if local_name == "Type" { + rel_type = a.value.to_owned(); + } else if local_name == "Target" { + target = Path::new(dir.as_ref()).join(a.value); + } + } + rels.rels.insert(rel_type, target); + continue; + } + } + Ok(XmlEvent::EndElement { name, .. }) => { + let e = XMLElement::from_str(&name.local_name).unwrap(); + if let XMLElement::Relationships = e { + break; + } + } + Err(_) => return Err(ReaderError::XMLReadError), + _ => {} + } + } + Ok(rels) +} + +fn find_rels_filename(main_path: impl AsRef) -> Result { + let path = main_path.as_ref(); + let dir = path + .parent() + .ok_or(ReaderError::DocumentRelsNotFoundError)?; + let base = path + .file_stem() + .ok_or(ReaderError::DocumentRelsNotFoundError)?; + Ok(Path::new(dir) + .join("_rels") + .join(base) + .with_extension("xml.rels")) +} diff --git a/docx-core/src/reader/errors.rs b/docx-core/src/reader/errors.rs index 2fb13e0..d7387dd 100644 --- a/docx-core/src/reader/errors.rs +++ b/docx-core/src/reader/errors.rs @@ -12,6 +12,10 @@ pub enum ReaderError { XMLReadError, #[error("Failed to find document.")] DocumentNotFoundError, + #[error("Failed to find document rels.")] + DocumentRelsNotFoundError, + #[error("Failed to find styles.")] + DocumentStylesNotFoundError, #[error("Unknown error")] Unknown, } diff --git a/docx-core/src/reader/mod.rs b/docx-core/src/reader/mod.rs index 6242fa3..c2233ee 100644 --- a/docx-core/src/reader/mod.rs +++ b/docx-core/src/reader/mod.rs @@ -1,6 +1,7 @@ mod attributes; mod delete; mod document; +mod document_rels; mod errors; mod from_xml; mod insert; @@ -21,12 +22,15 @@ use zip; use crate::documents::*; pub use attributes::*; +pub use document_rels::*; pub use errors::ReaderError; pub use from_xml::*; pub use xml_element::*; const DOC_RELATIONSHIP_TYPE: &str = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; +const STYLE_RELATIONSHIP_TYPE: &str = + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; pub fn read_docx(buf: &[u8]) -> Result { let cur = Cursor::new(buf); @@ -46,6 +50,15 @@ pub fn read_docx(buf: &[u8]) -> Result { .ok_or(ReaderError::DocumentNotFoundError)?; let document_xml = archive.by_name(&main_rel.2)?; let document = Document::from_xml(document_xml)?; - let docx = Docx::new().document(document); + + // Read document relationships + let rels = read_document_rels(&mut archive, &main_rel.2)?; + let style_path = rels + .find_target_path(STYLE_RELATIONSHIP_TYPE) + .ok_or(ReaderError::DocumentStylesNotFoundError)?; + let styles_xml = archive.by_name(style_path.to_str().expect("should have styles"))?; + let styles = Styles::from_xml(styles_xml)?; + + let docx = Docx::new().document(document).styles(styles); Ok(docx) } diff --git a/docx-core/src/reader/styles.rs b/docx-core/src/reader/styles.rs index 2231833..4d59d1e 100644 --- a/docx-core/src/reader/styles.rs +++ b/docx-core/src/reader/styles.rs @@ -37,28 +37,39 @@ impl FromXML for Styles { } } -// #[cfg(test)] -// mod tests { -// -// use super::*; -// #[cfg(test)] -// use pretty_assertions::assert_eq; -// -// #[test] -// fn test_from_xml() { -// let xml = r#" -// -// -// "#; -// let c = Rels::from_xml(xml.as_bytes()).unwrap(); -// let mut rels = Vec::new(); -// rels.push(( -// "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" -// .to_owned(), -// "rId1".to_owned(), -// "docProps/core.xml".to_owned(), -// )); -// assert_eq!(Rels { rels }, c); -// } -// } -// +#[cfg(test)] +mod tests { + + use super::*; + use crate::types::*; + #[cfg(test)] + use pretty_assertions::assert_eq; + + #[test] + fn test_from_xml() { + let xml = + r#" + + + + + + + + + + + + + +"#; + let s = Styles::from_xml(xml.as_bytes()).unwrap(); + let mut styles = Styles::new(); + styles = styles.add_style( + Style::new("FootnoteTextChar", StyleType::Character) + .name("Footnote Text Char") + .size(20), + ); + assert_eq!(s, styles); + } +} diff --git a/docx-core/src/reader/xml_element.rs b/docx-core/src/reader/xml_element.rs index 93a68a5..9a3d5bd 100644 --- a/docx-core/src/reader/xml_element.rs +++ b/docx-core/src/reader/xml_element.rs @@ -65,6 +65,8 @@ pub enum XMLElement { VertAlign, Spacing, Styles, + Relationship, + Relationships, Unsupported, } @@ -129,6 +131,8 @@ impl FromStr for XMLElement { "vertAlign" => Ok(XMLElement::VertAlign), "spacing" => Ok(XMLElement::Spacing), "styles" => Ok(XMLElement::Styles), + "Relationships" => Ok(XMLElement::Relationships), + "Relationship" => Ok(XMLElement::Relationship), _ => Ok(XMLElement::Unsupported), } } diff --git a/docx-core/src/types/style_type.rs b/docx-core/src/types/style_type.rs index 15c8c6b..a9c99f6 100644 --- a/docx-core/src/types/style_type.rs +++ b/docx-core/src/types/style_type.rs @@ -11,6 +11,8 @@ use std::str::FromStr; pub enum StyleType { Paragraph, Character, + Numbering, + Unsupported, } impl fmt::Display for StyleType { @@ -18,6 +20,8 @@ impl fmt::Display for StyleType { match *self { StyleType::Paragraph => write!(f, "paragraph"), StyleType::Character => write!(f, "character"), + StyleType::Numbering => write!(f, "numbering"), + StyleType::Unsupported => write!(f, "unsupported"), } } } @@ -28,7 +32,8 @@ impl FromStr for StyleType { match s { "paragraph" => Ok(StyleType::Paragraph), "character" => Ok(StyleType::Character), - _ => Err(errors::TypeError::FromStrError), + "numbering" => Ok(StyleType::Numbering), + _ => Ok(StyleType::Unsupported), } } }