diff --git a/arxiv_latex_cleaner/arxiv_latex_cleaner.py b/arxiv_latex_cleaner/arxiv_latex_cleaner.py index e93d0b9..9858cbf 100644 --- a/arxiv_latex_cleaner/arxiv_latex_cleaner.py +++ b/arxiv_latex_cleaner/arxiv_latex_cleaner.py @@ -157,9 +157,19 @@ def _remove_comments_inline(text): return text +def _strip_tex_contents(lines, end_str): + """Removes everything after end_str.""" + for i in range(len(lines)): + if end_str in lines[i]: + return lines[:i+1] + return lines + + def _read_file_content(filename): with open(filename, 'r', encoding='utf-8') as fp: - return fp.readlines() + lines = fp.readlines() + lines = _strip_tex_contents(lines, '\\end{document}') + return lines def _read_all_tex_contents(tex_files, parameters):