feat: Added a notebook conversion class.

fredqi · Jan 15, 2025 · 16cd5ae · 16cd5ae
1 parent b331155
commit 16cd5ae
Show file tree

Hide file tree

Showing 4 changed files with 556 additions and 92 deletions.
diff --git a/setup.py b/setup.py
@@ -40,8 +40,21 @@ def setup_package():
                     description=__descr__,
                     license=LICENSE,
                     url=URL,
-                    packages=find_packages(include=('xdufacool',)),
-                    install_requires=['PySocks'],
+                    packages=['xdufacool'],
+                    install_requires=[
+                        'jupyter',
+                        'nbconvert',
+                        'nbformat',
+                        'jinja2',
+                        'pandas',
+                        'numpy',
+                        'ipywidgets',
+                        'jupyter_core',
+                        'pytest',
+                        'pytest-cov',
+                        'pyyaml',
+                        'requests',
+                    ],
                     # scripts
                     entry_points={'console_scripts':
                                       ['xdufacool = xdufacool.homework_manager:check_homeworks',

diff --git a/tests/test_notebook_converter.py b/tests/test_notebook_converter.py
@@ -0,0 +1,174 @@
+import os
+import pytest
+import nbformat
+from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell, new_output
+from xdufacool.converters import NotebookConverter
+
+# Define metadata as a global variable
+global_metadata = {
+    "kernelspec": {
+        "display_name": "Python 3",
+        "language": "python",
+        "name": "python3"
+    },
+    "language_info": {
+        "codemirror_mode": {
+            "name": "ipython",
+            "version": 3
+        },
+        "file_extension": ".py",
+        "mimetype": "text/x-python",
+        "name": "python",
+        "nbconvert_exporter": "python",
+        "pygments_lexer": "ipython3",
+        "version": "3.12.2"
+    }
+}
+
+@pytest.fixture
+def setup_test_environment(tmp_path):
+    """
+    Fixture to set up the test environment:
+    - Creates a temporary directory for test files.
+    - Creates a sample .ipynb notebook for testing.
+    - Creates a dummy figure file.
+    """
+    test_dir = tmp_path / "test_files"
+    notebook_file = test_dir / "test_notebook.ipynb"
+    output_dir = test_dir / "output"
+    figure_file = test_dir / "figure1.png"
+    test_dir.mkdir()
+    output_dir.mkdir()
+
+    # Create a sample notebook with various cell types
+    nb = new_notebook()
+    nb.cells.append(new_markdown_cell("# Test Notebook\nThis is a test notebook."))
+
+    # Markdown cell referencing the external figure
+    nb.cells.append(new_markdown_cell("![Figure 1](figure1.png)"))
+
+    nb.cells.append(new_code_cell("print('Hello, world!')", execution_count=1, outputs=[
+        new_output("stream", name="stdout", text="Hello, world!\n")
+    ]))
+    nb.cells.append(new_code_cell("# This cell should be removed", execution_count=2, metadata={"tags": ["remove_cell"]}, outputs=[]))
+    nb.cells.append(new_code_cell("from IPython.display import Image\nImage(filename='figure1.png')", execution_count=3, outputs=[
+        new_output("display_data", data={
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="  # Placeholder for a 1x1 black pixel
+        }, metadata={})
+    ]))
+    nb.metadata = global_metadata
+
+    with open(notebook_file, "w", encoding="utf-8") as f:
+        nbformat.write(nb, f)
+
+    with open(figure_file, "wb") as f:
+        f.write(b"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=")
+
+    return test_dir, notebook_file, output_dir, figure_file
+
+def test_convert_notebook(setup_test_environment):
+    """
+    Test the convert_notebook method:
+    - Converts the sample notebook to LaTeX.
+    - Checks if the output .tex file and figures directory are created.
+    - Checks if the figure is copied to the output directory.
+    - Performs basic checks on the content of the .tex file.
+    """
+    test_dir, notebook_file, output_dir, figure_file = setup_test_environment
+    print(notebook_file, output_dir)
+
+    converter = NotebookConverter()
+    tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), ["figure1.png"])
+
+    # Check if the .tex file is created
+    assert os.path.exists(tex_file)
+
+    # Check if the figures directory is created and contains the figure
+    figures_dir = output_dir / "figures"
+    assert os.path.exists(figures_dir)
+    assert os.path.exists(output_dir / "figure1.png")
+
+    # Check the content of the .tex file (basic checks)
+    with open(tex_file, "r", encoding="utf-8") as f:
+        tex_content = f.read()
+    assert "Test Notebook" in tex_content  # Check for title
+    assert "Hello, world!" in tex_content  # Check for code output
+    assert "# This cell should be removed" not in tex_content  # Check for removed cell
+    assert "\\includegraphics{figure1.png}" in tex_content  # Check for figure inclusion
+
+def test_convert_notebook_exclude_input_output(setup_test_environment):
+    """
+    Test the convert_notebook method with input and output cells excluded:
+    - Converts the notebook with exclude_input=True and exclude_output=True.
+    - Checks if the output .tex file excludes input and output cells.
+    """
+    test_dir, notebook_file, output_dir, figure_file = setup_test_environment
+    converter = NotebookConverter(exclude_input=True, exclude_output=True)
+    tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), [])
+
+    # Check if the .tex file is created
+    assert os.path.exists(tex_file)
+
+    # Check the content of the .tex file
+    with open(tex_file, "r", encoding="utf-8") as f:
+        tex_content = f.read()
+    assert "print('Hello, world!')" not in tex_content  # Check for excluded input cell
+    assert "Hello, world!" not in tex_content  # Check for excluded output
+
+def test_ensure_figures_available(tmp_path):
+    """
+    Test the ensure_figures_available function:
+    - Creates a dummy figure in the assignment directory.
+    - Calls ensure_figures_available to copy the figure to the output directory.
+    - Checks if the figure is copied successfully.
+    """
+    assignment_dir = tmp_path / "assignment"
+    output_dir = tmp_path / "output"
+    assignment_dir.mkdir()
+    output_dir.mkdir()
+    figure_file = assignment_dir / "figure2.png"
+
+    with open(figure_file, "wb") as f:
+        f.write(b"Dummy figure content")  # Create a dummy figure
+
+    converter = NotebookConverter()
+    converter._ensure_figures_available(str(assignment_dir), str(output_dir), ["figure2.png"])
+    assert os.path.exists(os.path.join(str(output_dir), "figure2.png"))
+
+def test_convert_notebook_long_output(setup_test_environment):
+    """
+    Test the convert_notebook method with long output:
+    - Create a sample notebook with long output.
+    - Converts the sample notebook to LaTeX.
+    - Checks if the output .tex file are truncated.
+    """
+    test_dir, notebook_file, output_dir, figure_file = setup_test_environment
+    nb = new_notebook()
+
+    long_output_lines = ['Line {}\n'.format(i) for i in range(100)]
+    long_output_text = ''.join(long_output_lines)
+
+    nb.cells.append(new_code_cell("print('Long output:')\nfor i in range(100):\n    print('Line {}'.format(i))", 
+                                  execution_count=3, 
+                                  outputs=[new_output("stream", name="stdout", text=long_output_text)]))
+
+    nb.metadata = global_metadata
+
+    with open(notebook_file, "w", encoding="utf-8") as f:
+        nbformat.write(nb, f)
+
+    converter = NotebookConverter(max_output_lines=32)
+    tex_file = converter.convert_notebook(str(notebook_file), str(output_dir), [])
+    assert os.path.exists(tex_file)
+    # Check the content of the .tex file (basic checks)
+    with open(tex_file, "r", encoding="utf-8") as f:
+        tex_content = f.read()
+    # Check that the long output is truncated
+    assert "Line 0" in tex_content
+    assert "Line 1" in tex_content
+    assert "Line 15" in tex_content
+    assert "Line 16" not in tex_content
+    assert " (output truncated) " in tex_content
+    assert "Line 83" not in tex_content
+    assert "Line 84" in tex_content
+    assert "Line 99" in tex_content
diff --git a/xdufacool/collect_local.py b/xdufacool/collect_local.py
@@ -35,9 +35,11 @@
 from datetime import date
 
 try:
-    from xdufacool.latex_converter import LaTeXConverter
+    from xdufacool.converters import LaTeXConverter    
+    from xdufacool.converters import NotebookConverter
 except ImportError:
-    from latex_converter import LaTeXConverter
+    from .converters import LaTeXConverter
+    from .converters import NotebookConverter
 
 # Configure logging
 logging.basicConfig(
@@ -60,6 +62,9 @@
 if stdout_handler:
     stdout_handler.setLevel(logging.WARNING)
 
+# Add the parent directory of xdufacool to sys.path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
 def temporal_func():
     classid = sys.argv[1]
 
@@ -282,103 +287,69 @@ def truncate_long_outputs(nb, max_lines=128):
 
 def process_ipynb_submission(ipynb_file, student_name, student_id, assignment_title, assignment_dir, figures):
     """Convert a single ipynb file to LaTeX format, compile to PDF, and ensure figures are available."""
-
-    def remove_hidden_folders(directory):
-        """Remove hidden folders in the specified directory."""
-        for item in os.listdir(directory):
-            item_path = os.path.join(directory, item)
-            if item.startswith('.') and os.path.isdir(item_path):
-                shutil.rmtree(item_path)  # Remove the hidden directory
 
     try:
-        # Remove hidden folders in the directory of the input zip file
-        zip_dir = os.path.dirname(ipynb_file)  # Get the directory of the input zip file
-        # remove_hidden_folders(zip_dir)  # Call the function to remove hidden folders
+        # Initialize NotebookConverter
+        converter = NotebookConverter()
 
         # Move the unzipped file to the directory containing the input zip file
-        unzipped_file = os.path.basename(ipynb_file)  # Get the basename of the ipynb file
-        target_path = os.path.join(zip_dir, unzipped_file)  # Target path in the zip directory
-        os.rename(ipynb_file, target_path)  # Move the file
+        zip_dir = os.path.dirname(ipynb_file)
+        unzipped_file = os.path.basename(ipynb_file)
+        target_path = os.path.join(zip_dir, unzipped_file)
+        os.rename(ipynb_file, target_path)
 
-        with open(target_path, 'r', encoding='utf-8') as f:  # Use the moved file
+        # Set metadata for the notebook
+        with open(target_path, 'r', encoding='utf-8') as f:
             nb = nbformat.read(f, as_version=4)
+        if 'metadata' not in nb:
+            nb.metadata = {}
+        nb.metadata['title'] = assignment_title
+        nb.metadata['authors'] = [{"name": f"{student_name} (ID: {student_id})"}]
+        nb.metadata['date'] = ""
+        with open(target_path, 'w', encoding='utf-8') as f:
+            nbformat.write(nb, f)
+
+        # Convert to LaTeX
+        output_dir = os.path.dirname(target_path)
+        tex_file = converter.convert_notebook(target_path, output_dir, figures)
+
+        if tex_file is None:
+            return None
 
-            # Truncate long outputs
-            truncate_long_outputs(nb)
+        # Store current directory
+        original_dir = os.getcwd()
 
-            # Set metadata using the provided arguments
-            if 'metadata' not in nb:
-                nb.metadata = {}
-            nb.metadata['title'] = assignment_title
-            nb.metadata['authors'] = [{"name": f"{student_name} (ID: {student_id})"}]
-            nb.metadata['date'] = ""
+        try:
+            # Change to output directory for compilation
+            os.chdir(output_dir)
+            tex_basename = os.path.basename(tex_file)
+
+            # Compile to PDF using latexmk
+            subprocess.run(
+                ['latexmk', '-pdfxe', '-quiet', tex_basename],
+                check=True,
+                stdout=subprocess.DEVNULL,  # Suppress standard output
+                stderr=subprocess.DEVNULL   # Suppress error output
+            )
 
-            # Configure the LaTeX exporter with custom template
-            latex_exporter = LatexExporter()
-            latex_exporter.exclude_input = False
-            latex_exporter.exclude_output = False
-
-            # Convert to LaTeX
-            (body, resources) = latex_exporter.from_notebook_node(nb)
+            # Cleanup auxiliary files, excluding checkpoints
+            for ext in ['.aux', '.log', '.out']:
+                aux_file = tex_basename.replace('.tex', ext)
+                if os.path.exists(aux_file):
+                    os.remove(aux_file)
+
+            # Return the full path to the generated PDF
+            pdf_file = tex_basename.replace('.tex', '.pdf')
+            if os.path.exists(pdf_file):
+                pathfile = os.path.join(output_dir, pdf_file)
+                parent_dir = os.path.dirname(output_dir)
+                relpathfile = os.path.relpath(pathfile, parent_dir)
+                return relpathfile
+
+        finally:
+            # Always return to original directory
+            os.chdir(original_dir)
 
-            # Ensure output directory exists
-            output_dir = os.path.dirname(target_path)  # Use the new target path
-            os.makedirs(output_dir, exist_ok=True)
-
-            # Ensure required figures are available
-            ensure_figures_available(assignment_dir, output_dir, figures)
-
-            # Create figures directory if it doesn't exist
-            figures_dir = os.path.join(output_dir, 'figures')
-            os.makedirs(figures_dir, exist_ok=True)
-
-            # Save figures if they exist in resources
-            if 'outputs' in resources:
-                for filename, data in resources['outputs'].items():
-                    figure_path = os.path.join(figures_dir, filename)
-                    with open(figure_path, 'wb') as f:
-                        f.write(data)
-
-                    # Update the figure path in LaTeX content to use relative path
-                    body = body.replace(filename, os.path.join('figures', filename))
-
-            # Save LaTeX file
-            tex_file = target_path.replace('.ipynb', '.tex')
-            with open(tex_file, 'w', encoding='utf-8') as f:
-                f.write(body)
-
-            # Store current directory
-            original_dir = os.getcwd()
-
-            try:
-                os.chdir(output_dir)
-                tex_basename = os.path.basename(tex_file)
-                subprocess.run(
-                    ['latexmk', '-pdfxe', '-quiet', tex_basename],
-                    check=True,
-                    stdout=subprocess.DEVNULL,  # Suppress standard output
-                    stderr=subprocess.DEVNULL   # Suppress error output
-                )
-                # Cleanup auxiliary files, excluding checkpoints
-                for ext in ['.aux', '.log', '.out']:
-                    aux_file = tex_basename.replace('.tex', ext)
-                    if os.path.exists(aux_file):
-                        os.remove(aux_file)
-
-                # Return the full path to the generated PDF
-                pdf_file = tex_basename.replace('.tex', '.pdf')                
-                if os.path.exists(pdf_file):
-                    pathfile = os.path.join(output_dir, pdf_file)
-                    parent_dir = os.path.dirname(output_dir)
-                    # logging.info(parent_dir)
-                    relpathfile = os.path.relpath(pathfile, parent_dir)
-                    # logging.info("PDF:", relpathfile)
-                    return relpathfile
-
-            finally:
-                # Always return to original directory
-                os.chdir(original_dir)
-
     except Exception as e:
         logging.error(f"Error processing {ipynb_file}: {str(e)}")
         return None
@@ -462,7 +433,9 @@ def process_ipynb_submissions(zip_file, assignment_dir, figures, merge=True):
     pdf_files = []    
     for ipynb_file in ipynb_files:
         # Process the notebook with extracted info
-        pdf_file = process_ipynb_submission(
+        # Create an instance of IpynbConverter
+        ipynb_converter = IpynbConverter(latex_converter)
+        pdf_file = ipynb_converter.convert_to_pdf(
             ipynb_file,
             student_name,
             student_id,