From eaaeeea3857aeb107118b74f954e637c7c6b5cdf Mon Sep 17 00:00:00 2001
From: omergreen <71124454+omergreen@users.noreply.github.com>
Date: Tue, 6 Feb 2024 00:35:23 +0200
Subject: [PATCH 1/5] Add spaces before readings

---
 reading.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/reading.py b/reading.py
index 40e4371..647b208 100644
--- a/reading.py
+++ b/reading.py
@@ -143,14 +143,15 @@ def __init__(self, text: str, reading: Optional[str]):
         self.text = text
         self.reading = reading
 
-    def format(self, useRubyTags: bool) -> str:
+    def format(self, useRubyTags: bool, previous_character: str) -> str:
         if self.reading is None:
             return self.text
 
         if useRubyTags:
             return "<ruby>%s<rp>(</rp><rt>%s</rt><rp>)</rp></ruby>" % (self.text, self.reading)
         else:
-            return '%s[%s]' % (self.text, self.reading)
+            add_space = previous_character is not None and isKana(previous_character)
+            return '%s%s[%s]' % (" " if add_space else "", self.text, self.reading)
 
 class RegexDefinition:
     def __init__(self, text: str, regexGroupIndex: Optional[int]):
@@ -276,7 +277,9 @@ def reading(self, expr, ignoreNumbers = True, useRubyTags = False):
                     nodes.append(ReadingNode(definition.text, groupReading))
 
         # Combine our nodes together into a single sentece
-        fin = ''.join(node.format(useRubyTags) for node in nodes)
+        fin = ''
+        for node in nodes:
+            fin += node.format(useRubyTags, fin[-1] if len(fin) > 0 else None)
 
         # Finalize formatting
         fin = fin.replace(ASCII_SPACE_TOKEN, ' ')

From d6a4cb84e293ca9d9688761d99ed8f03c9dd78a7 Mon Sep 17 00:00:00 2001
From: omergreen <71124454+omergreen@users.noreply.github.com>
Date: Tue, 6 Feb 2024 00:47:57 +0200
Subject: [PATCH 2/5] add space before anything but ]

---
 reading.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reading.py b/reading.py
index 647b208..5074537 100644
--- a/reading.py
+++ b/reading.py
@@ -150,7 +150,7 @@ def format(self, useRubyTags: bool, previous_character: str) -> str:
         if useRubyTags:
             return "<ruby>%s<rp>(</rp><rt>%s</rt><rp>)</rp></ruby>" % (self.text, self.reading)
         else:
-            add_space = previous_character is not None and isKana(previous_character)
+            add_space = previous_character is not None and previous_character != "]"
             return '%s%s[%s]' % (" " if add_space else "", self.text, self.reading)
 
 class RegexDefinition:

From a58460d74ff57d18e4761d4e5c39981881a14f79 Mon Sep 17 00:00:00 2001
From: omergreen <71124454+omergreen@users.noreply.github.com>
Date: Sun, 11 Feb 2024 18:33:07 +0000
Subject: [PATCH 3/5] remove spaces when stripping bracket furigana

---
 utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/utils.py b/utils.py
index 66f3838..73c19d7 100644
--- a/utils.py
+++ b/utils.py
@@ -35,6 +35,10 @@ def removeFurigana(text: str):
         stripped = stripped.replace("<ruby>" + ruby + "</ruby>", body)
 
     # Next, remove the bracket notation
+    # remove spaces only if bracket notation was used
+    if "[" in stripped:
+        stripped = stripped.replace(" ", "")
+
     stripped, _ = re.subn('\[[^\]]*\]', '', stripped)
 
     # Return the final string

From c1fe7f6b11cdd036b47ce54579a8424b31efc28e Mon Sep 17 00:00:00 2001
From: omergreen <71124454+omergreen@users.noreply.github.com>
Date: Sun, 11 Feb 2024 18:33:18 +0000
Subject: [PATCH 4/5] update tests to reflect new space changes

---
 test/test_reading.py | 36 ++++++++++++++++++------------------
 test/test_utils.py   |  4 ++--
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/test/test_reading.py b/test/test_reading.py
index 2420206..80e09ea 100644
--- a/test/test_reading.py
+++ b/test/test_reading.py
@@ -24,7 +24,7 @@ class TestMecab(unittest.TestCase):
     # sentence should have readings
     def testNormalSentence(self):
         res = reading.mecab.reading("カリン、自分でまいた種は自分で刈り取れ")
-        self.assertEqual(res, "カリン、自分[じぶん]でまいた種[たね]は自分[じぶん]で刈[か]り取[と]れ")
+        self.assertEqual(res, "カリン、 自分[じぶん]でまいた 種[たね]は 自分[じぶん]で 刈[か]り 取[と]れ")
 
     # kanji should have a reading
     def testNormalKanji(self):
@@ -34,12 +34,12 @@ def testNormalKanji(self):
     # punctuation should be ignored
     def testWithPunctuation(self):
         res = reading.mecab.reading("昨日、林檎を2個買った。")
-        self.assertEqual(res, "昨日[きのう]、林檎[りんご]を2個[こ]買[か]った。")
+        self.assertEqual(res, "昨日[きのう]、 林檎[りんご]を2 個[こ]買[か]った。")
 
     # unicode characters should be ignored
     def testUnicodeChar(self):
         res = reading.mecab.reading("真莉、大好きだよん＾＾")
-        self.assertEqual(res, "真[ま]莉、大好[だいす]きだよん＾＾")
+        self.assertEqual(res, "真[ま]莉、 大好[だいす]きだよん＾＾")
 
     # katakana should not be given furigana readings
     def testKatakana(self):
@@ -49,43 +49,43 @@ def testKatakana(self):
     # romanji numbers should not have readings
     def testRomanjiNumbers(self):
         res = reading.mecab.reading("彼２０００万も使った。")
-        self.assertEqual(res, "彼[かれ]２０００万[まん]も使[つか]った。")
+        self.assertEqual(res, "彼[かれ]２０００ 万[まん]も 使[つか]った。")
 
     # kanji numbers should not have readings
     def testKanjiNumber(self):
         res = reading.mecab.reading("彼二千三百六十円も使った。")
-        self.assertEqual(res, "彼[かれ]二千[せん]三百[ひゃく]六十円[えん]も使[つか]った。")
+        self.assertEqual(res, "彼[かれ]二 千[せん]三 百[ひゃく]六十 円[えん]も 使[つか]った。")
 
     # ensure that verbs with okurigana don't produce furigana for the kana portions
     def testOkurigana(self):
         self.assertEqual(reading.mecab.reading("口走る"), "口走[くちばし]る")
-        self.assertEqual(reading.mecab.reading("テスト勉強の息抜きとか　どうしてんの"), "テスト勉強[べんきょう]の息抜[いきぬ]きとか　どうしてんの")
+        self.assertEqual(reading.mecab.reading("テスト勉強の息抜きとか　どうしてんの"), "テスト 勉強[べんきょう]の 息抜[いきぬ]きとか　どうしてんの")
     
     # ensure that a single word that has plain kana appearing before the kanji in
     # the word do not have attached furigana
     def testKanaPrefixes(self):
-        self.assertEqual(reading.mecab.reading("お前"), "お前[まえ]")
-        self.assertEqual(reading.mecab.reading("ローマ字"), "ローマ字[じ]")
-        self.assertEqual(reading.mecab.reading("ローマ帝国"), "ローマ帝国[ていこく]")
+        self.assertEqual(reading.mecab.reading("お前"), "お 前[まえ]")
+        self.assertEqual(reading.mecab.reading("ローマ字"), "ローマ 字[じ]")
+        self.assertEqual(reading.mecab.reading("ローマ帝国"), "ローマ 帝国[ていこく]")
 
     # ensure that a single word that both begins AND ends with kana but contains
     # kanji in the middle only generates furigana for the kanji portion, and not
     # for the kana
     def testKanaPrefixSuffix(self):
         actual = reading.mecab.reading("みじん切り")
-        self.assertEqual(actual, "みじん切[ぎ]り")
+        self.assertEqual(actual, "みじん 切[ぎ]り")
 
     # ensure that for words that have kana in between two kanji, that only the
     # kanji receive furigana readings and the kana does not
     def testKanaBetweenKanji(self):
-        self.assertEqual(reading.mecab.reading("書き込む"), "書[か]き込[こ]む")
-        self.assertEqual(reading.mecab.reading("走り抜く"), "走[はし]り抜[ぬ]く")
-        self.assertEqual(reading.mecab.reading("走り回る"), "走[はし]り回[まわ]る")
+        self.assertEqual(reading.mecab.reading("書き込む"), "書[か]き 込[こ]む")
+        self.assertEqual(reading.mecab.reading("走り抜く"), "走[はし]り 抜[ぬ]く")
+        self.assertEqual(reading.mecab.reading("走り回る"), "走[はし]り 回[まわ]る")
 
     # ensure that any regular ASCII space characters (0x20) that are in the original
     # string are found in the resultant string as well
     def testSpacesRetained(self):
-        self.assertEqual(reading.mecab.reading("この文に 空白が あります"), "この文[ぶん]に 空白[くうはく]が あります")
+        self.assertEqual(reading.mecab.reading("この文に 空白が あります"), "この 文[ぶん]に  空白[くうはく]が あります")
         self.assertEqual(reading.mecab.reading("hello world"), "hello world")
 
     # some kana characters will have different readings when used in readings
@@ -93,17 +93,17 @@ def testSpacesRetained(self):
     def testKanaWithAdditionalReadings(self):
         # Check that ヵ (small) stands in for か (large) in readings
         # This should generate furigana for the small ヵ
-        self.assertEqual(reading.mecab.reading("彼はトルコを2ヵ月間訪問するつもりです"), "彼[かれ]はトルコを2ヵ[か]月[げつ]間[かん]訪問[ほうもん]するつもりです")
+        self.assertEqual(reading.mecab.reading("彼はトルコを2ヵ月間訪問するつもりです"), "彼[かれ]はトルコを2 ヵ[か]月[げつ]間[かん]訪問[ほうもん]するつもりです")
 
         # Check that ヶ *also* stands in for か in readings
         # This should generate furigana for the small ヶ
-        self.assertEqual(reading.mecab.reading("彼はトルコを2ヶ月間訪問するつもりです"), "彼[かれ]はトルコを2ヶ[か]月[げつ]間[かん]訪問[ほうもん]するつもりです")
+        self.assertEqual(reading.mecab.reading("彼はトルコを2ヶ月間訪問するつもりです"), "彼[かれ]はトルコを2 ヶ[か]月[げつ]間[かん]訪問[ほうもん]するつもりです")
 
         # For the same sentence, also make sure that the full-sized か and カ
         # are also recognized.
         # However, neither of these should generate furigana.
-        self.assertEqual(reading.mecab.reading("彼はトルコを2か月間訪問するつもりです"), "彼[かれ]はトルコを2か月[げつ]間[かん]訪問[ほうもん]するつもりです")
-        self.assertEqual(reading.mecab.reading("彼はトルコを2カ月間訪問するつもりです"), "彼[かれ]はトルコを2カ月[げつ]間[かん]訪問[ほうもん]するつもりです")
+        self.assertEqual(reading.mecab.reading("彼はトルコを2か月間訪問するつもりです"), "彼[かれ]はトルコを2か 月[げつ]間[かん]訪問[ほうもん]するつもりです")
+        self.assertEqual(reading.mecab.reading("彼はトルコを2カ月間訪問するつもりです"), "彼[かれ]はトルコを2カ 月[げつ]間[かん]訪問[ほうもん]するつもりです")
 
         # Finally, ensure that we're not just ALWAYS adding furigana to ヶ and ヵ
         # whenever we encounter them
diff --git a/test/test_utils.py b/test/test_utils.py
index 90e7d65..68a9f7d 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -27,8 +27,8 @@ def testEmptyString(self):
 
     # ensure that bracket notation is correctly removed
     def testRemovesBrackets(self):
-        self.assertEqual(utils.removeFurigana("日本語[にほんご]を勉強[べんきょう]する"), "日本語を勉強する")
-        self.assertEqual(utils.removeFurigana("走[はし]り込[こ]む"), "走り込む")
+        self.assertEqual(utils.removeFurigana("日本語[にほんご]を 勉強[べんきょう]する"), "日本語を勉強する")
+        self.assertEqual(utils.removeFurigana("走[はし]り 込[こ]む"), "走り込む")
 
     # ensure that ruby tags are correctly removed
     def testRemovesRuby(self):

From a888782054ff78544da759bd2da4641e530969ca Mon Sep 17 00:00:00 2001
From: Yohann Leon <yohann@leon.re>
Date: Mon, 12 Feb 2024 23:00:52 +0900
Subject: [PATCH 5/5] Use more modern str.format

---
 reading.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/reading.py b/reading.py
index 5074537..b6d78be 100644
--- a/reading.py
+++ b/reading.py
@@ -148,10 +148,10 @@ def format(self, useRubyTags: bool, previous_character: str) -> str:
             return self.text
 
         if useRubyTags:
-            return "<ruby>%s<rp>(</rp><rt>%s</rt><rp>)</rp></ruby>" % (self.text, self.reading)
+            return "<ruby>{}<rp>(</rp><rt>{}</rt><rp>)</rp></ruby>".format(self.text, self.reading)
         else:
             add_space = previous_character is not None and previous_character != "]"
-            return '%s%s[%s]' % (" " if add_space else "", self.text, self.reading)
+            return '{}{}[{}]'.format(" " if add_space else "", self.text, self.reading)
 
 class RegexDefinition:
     def __init__(self, text: str, regexGroupIndex: Optional[int]):
@@ -206,7 +206,7 @@ def kanjiToRegex(kanji: str):
         definitions.append(RegexDefinition(captureGroup, numCaptureGroups))
         numCaptureGroups += 1
 
-    return ("^%s$" % ''.join(regexPieces), definitions)
+    return ("^{}$".format(str().join(regexPieces)), definitions)
 
 class MecabController(object):
 
@@ -277,7 +277,7 @@ def reading(self, expr, ignoreNumbers = True, useRubyTags = False):
                     nodes.append(ReadingNode(definition.text, groupReading))
 
         # Combine our nodes together into a single sentece
-        fin = ''
+        fin = str()
         for node in nodes:
             fin += node.format(useRubyTags, fin[-1] if len(fin) > 0 else None)