Skip to content

Commit

Permalink
import: fix textContent ordering in p-properties (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
delan committed Oct 26, 2024
1 parent 0c408c5 commit 9783546
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 9 deletions.
11 changes: 6 additions & 5 deletions src/command/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
dom::{
html_attributes_with_embedding_urls, html_attributes_with_non_embedding_urls,
parse_html_document, parse_html_fragment, serialize_html_fragment, serialize_node_contents,
text_content, AttrsRefExt, QualName, QualNameExt, TendrilExt, Traverse,
text_content, AttrsRefExt, BreadthTraverse, QualName, QualNameExt, TendrilExt,
},
migrations::run_migrations,
path::PostsPath,
Expand Down Expand Up @@ -92,7 +92,7 @@ async fn fetch_post(url: &str) -> eyre::Result<FetchPostResult> {
let response = client.get(url).send().await?;
let dom = parse_html_document(&response.bytes().await?)?;
let mut base_href = Url::parse(&url)?;
for node in Traverse::elements(dom.document.clone()) {
for node in BreadthTraverse::elements(dom.document.clone()) {
let NodeData::Element { name, attrs, .. } = &node.data else {
unreachable!()
};
Expand Down Expand Up @@ -214,7 +214,7 @@ fn process_content(
) -> eyre::Result<String> {
let dom = parse_html_fragment(content.as_bytes())?;

for node in Traverse::nodes(dom.document.clone()) {
for node in BreadthTraverse::nodes(dom.document.clone()) {
match &node.data {
NodeData::Element { name, attrs, .. } => {
let mut attrs = attrs.borrow_mut();
Expand Down Expand Up @@ -371,12 +371,13 @@ fn mf2_dt(node: Handle, class: &str) -> eyre::Result<Option<String>> {

fn mf2_find(node: Handle, class: &str) -> Option<Handle> {
// TODO: handle errors from has_class()
Traverse::elements(node.clone()).find(|node| has_class(node.clone(), class).unwrap_or(false))
BreadthTraverse::elements(node.clone())
.find(|node| has_class(node.clone(), class).unwrap_or(false))
}

fn mf2_find_all(node: Handle, class: &str) -> Vec<Handle> {
// TODO: handle errors from has_class()
Traverse::elements(node.clone())
BreadthTraverse::elements(node.clone())
.filter(|node| has_class(node.clone(), class).unwrap_or(false))
.collect()
}
Expand Down
55 changes: 51 additions & 4 deletions src/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ static HTML_ATTRIBUTES_WITH_NON_EMBEDDING_URLS: LazyLock<BTreeMap<QualName, BTre
result
});

pub struct Traverse {
pub struct BreadthTraverse {
queue: VecDeque<Handle>,
elements_only: bool,
}
impl Traverse {
impl BreadthTraverse {
pub fn nodes(node: Handle) -> Self {
Self {
queue: VecDeque::from([node]),
Expand All @@ -152,7 +152,7 @@ impl Traverse {
}
}
}
impl Iterator for Traverse {
impl Iterator for BreadthTraverse {
type Item = Handle;

fn next(&mut self) -> Option<Self::Item> {
Expand All @@ -169,6 +169,53 @@ impl Iterator for Traverse {
}
}

pub struct DepthTraverse {
stack: Vec<VecDeque<Handle>>,
elements_only: bool,
}
impl DepthTraverse {
pub fn nodes(node: Handle) -> Self {
Self {
stack: vec![VecDeque::from([node])],
elements_only: false,
}
}

pub fn elements(node: Handle) -> Self {
Self {
stack: vec![VecDeque::from([node])],
elements_only: true,
}
}
}
impl Iterator for DepthTraverse {
type Item = Handle;

fn next(&mut self) -> Option<Self::Item> {
while !self.stack.is_empty() {
while self.stack.last().is_some_and(|queue| queue.is_empty()) {
self.stack.pop();
}
if let Some(node) = self.stack.last_mut().and_then(|queue| queue.pop_front()) {
let kids = node
.children
.borrow()
.iter()
.cloned()
.collect::<VecDeque<_>>();
if !kids.is_empty() {
self.stack.push(kids);
}
if !self.elements_only || matches!(node.data, NodeData::Element { .. }) {
return Some(node);
}
}
}

None
}
}

pub struct Transform(VecDeque<Handle>);
impl Transform {
pub fn new(node: Handle) -> Self {
Expand Down Expand Up @@ -495,7 +542,7 @@ fn test_convert_idl_to_content_attribute() {

pub fn text_content(node: Handle) -> eyre::Result<String> {
let mut result = vec![];
for node in Traverse::nodes(node) {
for node in DepthTraverse::nodes(node) {
if let NodeData::Text { contents } = &node.data {
result.push(contents.borrow().to_str().to_owned());
}
Expand Down

0 comments on commit 9783546

Please sign in to comment.