From 008ed81052097b85853da62e8e0dbdd4b1b05327 Mon Sep 17 00:00:00 2001 From: Jeronymous Date: Wed, 17 May 2023 09:01:48 +0200 Subject: [PATCH] glue apostrophes --- punctuation/recasepunc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/punctuation/recasepunc.py b/punctuation/recasepunc.py index 480f139..4865c05 100644 --- a/punctuation/recasepunc.py +++ b/punctuation/recasepunc.py @@ -3,11 +3,10 @@ """recasepunc file.""" import argparse -import collections import os import random import sys -import unicodedata +import re import numpy as np import torch @@ -201,6 +200,8 @@ def generate_predictions(config, line): output += cased_token + punctuation_syms[punc_label] if previous_label == 0: output += '.' + # Glue apostrophes back to words + output = re.sub(r"(\w) *' *(\w)", r"\1'\2", output) return output mapped_punctuation = {