Skip to content

Commit

Permalink
feat: Added a notebook conversion class.
Browse files Browse the repository at this point in the history
  • Loading branch information
fredqi committed Jan 15, 2025
1 parent b331155 commit 16cd5ae
Show file tree
Hide file tree
Showing 4 changed files with 556 additions and 92 deletions.
17 changes: 15 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,21 @@ def setup_package():
description=__descr__,
license=LICENSE,
url=URL,
packages=find_packages(include=('xdufacool',)),
install_requires=['PySocks'],
packages=['xdufacool'],
install_requires=[
'jupyter',
'nbconvert',
'nbformat',
'jinja2',
'pandas',
'numpy',
'ipywidgets',
'jupyter_core',
'pytest',
'pytest-cov',
'pyyaml',
'requests',
],
# scripts
entry_points={'console_scripts':
['xdufacool = xdufacool.homework_manager:check_homeworks',
Expand Down
174 changes: 174 additions & 0 deletions tests/test_notebook_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import os
import pytest
import nbformat
from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell, new_output
from xdufacool.converters import NotebookConverter

# Define metadata as a global variable
global_metadata = {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
}

@pytest.fixture
def setup_test_environment(tmp_path):
"""
Fixture to set up the test environment:
- Creates a temporary directory for test files.
- Creates a sample .ipynb notebook for testing.
- Creates a dummy figure file.
"""
test_dir = tmp_path / "test_files"
notebook_file = test_dir / "test_notebook.ipynb"
output_dir = test_dir / "output"
figure_file = test_dir / "figure1.png"
test_dir.mkdir()
output_dir.mkdir()

# Create a sample notebook with various cell types
nb = new_notebook()
nb.cells.append(new_markdown_cell("# Test Notebook\nThis is a test notebook."))

# Markdown cell referencing the external figure
nb.cells.append(new_markdown_cell("![Figure 1](figure1.png)"))

nb.cells.append(new_code_cell("print('Hello, world!')", execution_count=1, outputs=[
new_output("stream", name="stdout", text="Hello, world!\n")
]))
nb.cells.append(new_code_cell("# This cell should be removed", execution_count=2, metadata={"tags": ["remove_cell"]}, outputs=[]))
nb.cells.append(new_code_cell("from IPython.display import Image\nImage(filename='figure1.png')", execution_count=3, outputs=[
new_output("display_data", data={
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=" # Placeholder for a 1x1 black pixel
}, metadata={})
]))
nb.metadata = global_metadata

with open(notebook_file, "w", encoding="utf-8") as f:
nbformat.write(nb, f)

with open(figure_file, "wb") as f:
f.write(b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=")

return test_dir, notebook_file, output_dir, figure_file

def test_convert_notebook(setup_test_environment):
"""
Test the convert_notebook method:
- Converts the sample notebook to LaTeX.
- Checks if the output .tex file and figures directory are created.
- Checks if the figure is copied to the output directory.
- Performs basic checks on the content of the .tex file.
"""
test_dir, notebook_file, output_dir, figure_file = setup_test_environment
print(notebook_file, output_dir)

converter = NotebookConverter()
tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), ["figure1.png"])

# Check if the .tex file is created
assert os.path.exists(tex_file)

# Check if the figures directory is created and contains the figure
figures_dir = output_dir / "figures"
assert os.path.exists(figures_dir)
assert os.path.exists(output_dir / "figure1.png")

# Check the content of the .tex file (basic checks)
with open(tex_file, "r", encoding="utf-8") as f:
tex_content = f.read()
assert "Test Notebook" in tex_content # Check for title
assert "Hello, world!" in tex_content # Check for code output
assert "# This cell should be removed" not in tex_content # Check for removed cell
assert "\\includegraphics{figure1.png}" in tex_content # Check for figure inclusion

def test_convert_notebook_exclude_input_output(setup_test_environment):
"""
Test the convert_notebook method with input and output cells excluded:
- Converts the notebook with exclude_input=True and exclude_output=True.
- Checks if the output .tex file excludes input and output cells.
"""
test_dir, notebook_file, output_dir, figure_file = setup_test_environment
converter = NotebookConverter(exclude_input=True, exclude_output=True)
tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), [])

# Check if the .tex file is created
assert os.path.exists(tex_file)

# Check the content of the .tex file
with open(tex_file, "r", encoding="utf-8") as f:
tex_content = f.read()
assert "print('Hello, world!')" not in tex_content # Check for excluded input cell
assert "Hello, world!" not in tex_content # Check for excluded output

def test_ensure_figures_available(tmp_path):
"""
Test the ensure_figures_available function:
- Creates a dummy figure in the assignment directory.
- Calls ensure_figures_available to copy the figure to the output directory.
- Checks if the figure is copied successfully.
"""
assignment_dir = tmp_path / "assignment"
output_dir = tmp_path / "output"
assignment_dir.mkdir()
output_dir.mkdir()
figure_file = assignment_dir / "figure2.png"

with open(figure_file, "wb") as f:
f.write(b"Dummy figure content") # Create a dummy figure

converter = NotebookConverter()
converter._ensure_figures_available(str(assignment_dir), str(output_dir), ["figure2.png"])
assert os.path.exists(os.path.join(str(output_dir), "figure2.png"))

def test_convert_notebook_long_output(setup_test_environment):
"""
Test the convert_notebook method with long output:
- Create a sample notebook with long output.
- Converts the sample notebook to LaTeX.
- Checks if the output .tex file are truncated.
"""
test_dir, notebook_file, output_dir, figure_file = setup_test_environment
nb = new_notebook()

long_output_lines = ['Line {}\n'.format(i) for i in range(100)]
long_output_text = ''.join(long_output_lines)

nb.cells.append(new_code_cell("print('Long output:')\nfor i in range(100):\n print('Line {}'.format(i))",
execution_count=3,
outputs=[new_output("stream", name="stdout", text=long_output_text)]))

nb.metadata = global_metadata

with open(notebook_file, "w", encoding="utf-8") as f:
nbformat.write(nb, f)

converter = NotebookConverter(max_output_lines=32)
tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), [])
assert os.path.exists(tex_file)
# Check the content of the .tex file (basic checks)
with open(tex_file, "r", encoding="utf-8") as f:
tex_content = f.read()
# Check that the long output is truncated
assert "Line 0" in tex_content
assert "Line 1" in tex_content
assert "Line 15" in tex_content
assert "Line 16" not in tex_content
assert " (output truncated) " in tex_content
assert "Line 83" not in tex_content
assert "Line 84" in tex_content
assert "Line 99" in tex_content
153 changes: 63 additions & 90 deletions xdufacool/collect_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@
from datetime import date

try:
from xdufacool.latex_converter import LaTeXConverter
from xdufacool.converters import LaTeXConverter
from xdufacool.converters import NotebookConverter
except ImportError:
from latex_converter import LaTeXConverter
from .converters import LaTeXConverter
from .converters import NotebookConverter

# Configure logging
logging.basicConfig(
Expand All @@ -60,6 +62,9 @@
if stdout_handler:
stdout_handler.setLevel(logging.WARNING)

# Add the parent directory of xdufacool to sys.path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

def temporal_func():
classid = sys.argv[1]

Expand Down Expand Up @@ -282,103 +287,69 @@ def truncate_long_outputs(nb, max_lines=128):

def process_ipynb_submission(ipynb_file, student_name, student_id, assignment_title, assignment_dir, figures):
"""Convert a single ipynb file to LaTeX format, compile to PDF, and ensure figures are available."""

def remove_hidden_folders(directory):
"""Remove hidden folders in the specified directory."""
for item in os.listdir(directory):
item_path = os.path.join(directory, item)
if item.startswith('.') and os.path.isdir(item_path):
shutil.rmtree(item_path) # Remove the hidden directory

try:
# Remove hidden folders in the directory of the input zip file
zip_dir = os.path.dirname(ipynb_file) # Get the directory of the input zip file
# remove_hidden_folders(zip_dir) # Call the function to remove hidden folders
# Initialize NotebookConverter
converter = NotebookConverter()

# Move the unzipped file to the directory containing the input zip file
unzipped_file = os.path.basename(ipynb_file) # Get the basename of the ipynb file
target_path = os.path.join(zip_dir, unzipped_file) # Target path in the zip directory
os.rename(ipynb_file, target_path) # Move the file
zip_dir = os.path.dirname(ipynb_file)
unzipped_file = os.path.basename(ipynb_file)
target_path = os.path.join(zip_dir, unzipped_file)
os.rename(ipynb_file, target_path)

with open(target_path, 'r', encoding='utf-8') as f: # Use the moved file
# Set metadata for the notebook
with open(target_path, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, as_version=4)
if 'metadata' not in nb:
nb.metadata = {}
nb.metadata['title'] = assignment_title
nb.metadata['authors'] = [{"name": f"{student_name} (ID: {student_id})"}]
nb.metadata['date'] = ""
with open(target_path, 'w', encoding='utf-8') as f:
nbformat.write(nb, f)

# Convert to LaTeX
output_dir = os.path.dirname(target_path)
tex_file = converter.convert_notebook(target_path, output_dir, figures)

if tex_file is None:
return None

# Truncate long outputs
truncate_long_outputs(nb)
# Store current directory
original_dir = os.getcwd()

# Set metadata using the provided arguments
if 'metadata' not in nb:
nb.metadata = {}
nb.metadata['title'] = assignment_title
nb.metadata['authors'] = [{"name": f"{student_name} (ID: {student_id})"}]
nb.metadata['date'] = ""
try:
# Change to output directory for compilation
os.chdir(output_dir)
tex_basename = os.path.basename(tex_file)

# Compile to PDF using latexmk
subprocess.run(
['latexmk', '-pdfxe', '-quiet', tex_basename],
check=True,
stdout=subprocess.DEVNULL, # Suppress standard output
stderr=subprocess.DEVNULL # Suppress error output
)

# Configure the LaTeX exporter with custom template
latex_exporter = LatexExporter()
latex_exporter.exclude_input = False
latex_exporter.exclude_output = False

# Convert to LaTeX
(body, resources) = latex_exporter.from_notebook_node(nb)
# Cleanup auxiliary files, excluding checkpoints
for ext in ['.aux', '.log', '.out']:
aux_file = tex_basename.replace('.tex', ext)
if os.path.exists(aux_file):
os.remove(aux_file)

# Return the full path to the generated PDF
pdf_file = tex_basename.replace('.tex', '.pdf')
if os.path.exists(pdf_file):
pathfile = os.path.join(output_dir, pdf_file)
parent_dir = os.path.dirname(output_dir)
relpathfile = os.path.relpath(pathfile, parent_dir)
return relpathfile

finally:
# Always return to original directory
os.chdir(original_dir)

# Ensure output directory exists
output_dir = os.path.dirname(target_path) # Use the new target path
os.makedirs(output_dir, exist_ok=True)

# Ensure required figures are available
ensure_figures_available(assignment_dir, output_dir, figures)

# Create figures directory if it doesn't exist
figures_dir = os.path.join(output_dir, 'figures')
os.makedirs(figures_dir, exist_ok=True)

# Save figures if they exist in resources
if 'outputs' in resources:
for filename, data in resources['outputs'].items():
figure_path = os.path.join(figures_dir, filename)
with open(figure_path, 'wb') as f:
f.write(data)

# Update the figure path in LaTeX content to use relative path
body = body.replace(filename, os.path.join('figures', filename))

# Save LaTeX file
tex_file = target_path.replace('.ipynb', '.tex')
with open(tex_file, 'w', encoding='utf-8') as f:
f.write(body)

# Store current directory
original_dir = os.getcwd()

try:
os.chdir(output_dir)
tex_basename = os.path.basename(tex_file)
subprocess.run(
['latexmk', '-pdfxe', '-quiet', tex_basename],
check=True,
stdout=subprocess.DEVNULL, # Suppress standard output
stderr=subprocess.DEVNULL # Suppress error output
)
# Cleanup auxiliary files, excluding checkpoints
for ext in ['.aux', '.log', '.out']:
aux_file = tex_basename.replace('.tex', ext)
if os.path.exists(aux_file):
os.remove(aux_file)

# Return the full path to the generated PDF
pdf_file = tex_basename.replace('.tex', '.pdf')
if os.path.exists(pdf_file):
pathfile = os.path.join(output_dir, pdf_file)
parent_dir = os.path.dirname(output_dir)
# logging.info(parent_dir)
relpathfile = os.path.relpath(pathfile, parent_dir)
# logging.info("PDF:", relpathfile)
return relpathfile

finally:
# Always return to original directory
os.chdir(original_dir)

except Exception as e:
logging.error(f"Error processing {ipynb_file}: {str(e)}")
return None
Expand Down Expand Up @@ -462,7 +433,9 @@ def process_ipynb_submissions(zip_file, assignment_dir, figures, merge=True):
pdf_files = []
for ipynb_file in ipynb_files:
# Process the notebook with extracted info
pdf_file = process_ipynb_submission(
# Create an instance of IpynbConverter
ipynb_converter = IpynbConverter(latex_converter)
pdf_file = ipynb_converter.convert_to_pdf(
ipynb_file,
student_name,
student_id,
Expand Down
Loading

0 comments on commit 16cd5ae

Please sign in to comment.