diff --git a/src/command/import.rs b/src/command/import.rs index eb8d038..4129047 100644 --- a/src/command/import.rs +++ b/src/command/import.rs @@ -16,7 +16,7 @@ use crate::{ dom::{ html_attributes_with_embedding_urls, html_attributes_with_non_embedding_urls, parse_html_document, parse_html_fragment, serialize_html_fragment, serialize_node_contents, - text_content, AttrsRefExt, QualName, QualNameExt, TendrilExt, Traverse, + text_content, AttrsRefExt, BreadthTraverse, QualName, QualNameExt, TendrilExt, }, migrations::run_migrations, path::PostsPath, @@ -92,7 +92,7 @@ async fn fetch_post(url: &str) -> eyre::Result { let response = client.get(url).send().await?; let dom = parse_html_document(&response.bytes().await?)?; let mut base_href = Url::parse(&url)?; - for node in Traverse::elements(dom.document.clone()) { + for node in BreadthTraverse::elements(dom.document.clone()) { let NodeData::Element { name, attrs, .. } = &node.data else { unreachable!() }; @@ -214,7 +214,7 @@ fn process_content( ) -> eyre::Result { let dom = parse_html_fragment(content.as_bytes())?; - for node in Traverse::nodes(dom.document.clone()) { + for node in BreadthTraverse::nodes(dom.document.clone()) { match &node.data { NodeData::Element { name, attrs, .. } => { let mut attrs = attrs.borrow_mut(); @@ -371,12 +371,13 @@ fn mf2_dt(node: Handle, class: &str) -> eyre::Result> { fn mf2_find(node: Handle, class: &str) -> Option { // TODO: handle errors from has_class() - Traverse::elements(node.clone()).find(|node| has_class(node.clone(), class).unwrap_or(false)) + BreadthTraverse::elements(node.clone()) + .find(|node| has_class(node.clone(), class).unwrap_or(false)) } fn mf2_find_all(node: Handle, class: &str) -> Vec { // TODO: handle errors from has_class() - Traverse::elements(node.clone()) + BreadthTraverse::elements(node.clone()) .filter(|node| has_class(node.clone(), class).unwrap_or(false)) .collect() } diff --git a/src/dom.rs b/src/dom.rs index b9ad816..3a3c4b6 100644 --- a/src/dom.rs +++ b/src/dom.rs @@ -133,11 +133,11 @@ static HTML_ATTRIBUTES_WITH_NON_EMBEDDING_URLS: LazyLock, elements_only: bool, } -impl Traverse { +impl BreadthTraverse { pub fn nodes(node: Handle) -> Self { Self { queue: VecDeque::from([node]), @@ -152,7 +152,7 @@ impl Traverse { } } } -impl Iterator for Traverse { +impl Iterator for BreadthTraverse { type Item = Handle; fn next(&mut self) -> Option { @@ -169,6 +169,53 @@ impl Iterator for Traverse { } } +pub struct DepthTraverse { + stack: Vec>, + elements_only: bool, +} +impl DepthTraverse { + pub fn nodes(node: Handle) -> Self { + Self { + stack: vec![VecDeque::from([node])], + elements_only: false, + } + } + + pub fn elements(node: Handle) -> Self { + Self { + stack: vec![VecDeque::from([node])], + elements_only: true, + } + } +} +impl Iterator for DepthTraverse { + type Item = Handle; + + fn next(&mut self) -> Option { + while !self.stack.is_empty() { + while self.stack.last().is_some_and(|queue| queue.is_empty()) { + self.stack.pop(); + } + if let Some(node) = self.stack.last_mut().and_then(|queue| queue.pop_front()) { + let kids = node + .children + .borrow() + .iter() + .cloned() + .collect::>(); + if !kids.is_empty() { + self.stack.push(kids); + } + if !self.elements_only || matches!(node.data, NodeData::Element { .. }) { + return Some(node); + } + } + } + + None + } +} + pub struct Transform(VecDeque); impl Transform { pub fn new(node: Handle) -> Self { @@ -495,7 +542,7 @@ fn test_convert_idl_to_content_attribute() { pub fn text_content(node: Handle) -> eyre::Result { let mut result = vec![]; - for node in Traverse::nodes(node) { + for node in DepthTraverse::nodes(node) { if let NodeData::Text { contents } = &node.data { result.push(contents.borrow().to_str().to_owned()); }