From d181905784f1130ef601f1a80a7f5b8065a4404a Mon Sep 17 00:00:00 2001 From: dhruvil410 Date: Fri, 19 Mar 2021 15:57:36 +0530 Subject: [PATCH] fix #60 --- src/sentences/sentence_splitting.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentences/sentence_splitting.jl b/src/sentences/sentence_splitting.jl index 0f58147..4ff84d5 100644 --- a/src/sentences/sentence_splitting.jl +++ b/src/sentences/sentence_splitting.jl @@ -120,7 +120,8 @@ function postproc_splits(sentences::AbstractString) sentences = replace(sentences, r"(\bMs\.)\n" => s"\1 ") sentences = replace(sentences, r"(\bMrs\.)\n" => s"\1 ") - + # no sentence break in between two words with no punctuation + sentences=replace(sentences,r"([a-zA-Z0-9])\n([a-zA-Z0-9])"=>s"\1 \2") # possible TODO: filter excessively long / short sentences