From c60ec0c98c578f2d062c71258e68dc3eb5c95d54 Mon Sep 17 00:00:00 2001 From: Ko van der Sloot Date: Wed, 4 Dec 2024 20:05:41 +0100 Subject: [PATCH] more adaptations --- src/tokenize.cxx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/tokenize.cxx b/src/tokenize.cxx index cf6c0e3..7a5b0d8 100644 --- a/src/tokenize.cxx +++ b/src/tokenize.cxx @@ -1675,10 +1675,11 @@ namespace Tokenizer { int& sentence_done ){ // a Paragraph may contain both Word and Sentence nodes // Sentences will be handled - vector sv = p->select(false); + vector sv + = p->select(folia::SELECT_FLAGS::LOCAL); if ( sv.empty() ){ // No Sentence, so just text or Words - vector wv = p->select(false); + vector wv = p->select(folia::SELECT_FLAGS::LOCAL); if ( !wv.empty() ){ vector ev( wv.begin(), wv.end() ); // Words found @@ -1778,8 +1779,10 @@ namespace Tokenizer { // maybe
or or such // there may be embedded Paragraph, Word and Sentence nodes // if so, Paragraphs and Sentences should be handled separately - vector sv = e->select(false); - vector pv = e->select(false); + vector sv + = e->select(folia::SELECT_FLAGS::LOCAL); + vector pv + = e->select(folia::SELECT_FLAGS::LOCAL); if ( pv.empty() && sv.empty() ){ // just words or text UnicodeString text = e->unicode( text_policy );