Skip to content

Commit

Permalink
Merge pull request #63 from datafuselabs/dev-fix-md
Browse files Browse the repository at this point in the history
fix: fix section split
  • Loading branch information
BohuTANG authored Apr 23, 2023
2 parents 31a5ca7 + 0a48f26 commit 95631ed
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
31 changes: 20 additions & 11 deletions app/src/files/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,20 @@ use std::fs;

use anyhow::Result;
use comrak::format_commonmark;
use comrak::nodes::NodeValue;
use comrak::parse_document;
use comrak::Arena;
use comrak::ComrakOptions;

use crate::SnippetFile;
use crate::Parse;
use crate::SnippetFiles;
use crate::{replace_multiple_spaces, LengthWithoutSymbols, Parse};
use crate::{replace_multiple_spaces, SnippetFile};

pub struct Markdown;

impl Parse for Markdown {
fn parse(path: &str) -> Result<SnippetFile> {
let min_section_len = 1024;
let min_section_len = 400;

let content = fs::read_to_string(path)?;
let arena = Arena::new();
Expand All @@ -38,11 +39,21 @@ impl Parse for Markdown {
let mut current_section = String::new();

for node in root.children() {
let mut section_text = vec![];
format_commonmark(node, &ComrakOptions::default(), &mut section_text).unwrap();
let current_section_transformer =
replace_multiple_spaces(std::str::from_utf8(&section_text).unwrap());
current_section.push_str(&current_section_transformer);
match node.data.borrow().value {
NodeValue::Heading(_) => {
if !current_section.is_empty() {
sections.push(current_section);
current_section = String::new();
}
}
_ => {
let mut section_text = vec![];
format_commonmark(node, &ComrakOptions::default(), &mut section_text).unwrap();
let transformer =
replace_multiple_spaces(std::str::from_utf8(&section_text).unwrap());
current_section.push_str(&transformer);
}
}
}

if !current_section.is_empty() {
Expand All @@ -54,9 +65,7 @@ impl Parse for Markdown {
let mut prev_section = String::new();

for section in sections {
if (prev_section.length_without_symbols() + section.length_without_symbols())
< min_section_len
{
if (prev_section.len() + section.len()) < min_section_len {
prev_section.push_str(&section);
} else {
if !prev_section.is_empty() {
Expand Down
13 changes: 10 additions & 3 deletions app/tests/it/files/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,19 @@ use askbend::Parse;
pub fn test_markdown_files() -> Result<()> {
let file = FileOperator::create("tests/testdata/", "md", &[]);
let metas = file.list()?;
let files = metas
.iter()
.map(|x| x.full_path.clone())
.collect::<Vec<String>>();

let markdowns = Markdown::parse_multiple(&[metas[1].full_path.clone()])?;
let markdowns = Markdown::parse_multiple(&files)?;
for markdown in &markdowns.snippet_files {
assert_eq!(markdown.file_path, "tests/testdata/hash.md");
//assert_eq!(markdown.file_path, "tests/testdata/hash.md");
for section in &markdown.code_snippets {
println!("--{:?}", section);
println!(
"*****file:{}, ++++++++++--{:?}",
markdown.file_path, section
);
}
}

Expand Down

1 comment on commit 95631ed

@vercel
Copy link

@vercel vercel bot commented on 95631ed Apr 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.