gitingest/tests/test_notebook_utils.py
Filip Christiansen 58dbe2cb7e
refactor: code cleanup, utils extraction, and test improvements (#141)
* Refactor code for lifespan, template usage, and improved tests

- Move background tasks and rate-limit handler into utils.py
- Reference TEMPLATES from config instead of inline Jinja2Templates
- Adopt Given/When/Then docstrings for test clarity
- Parametrize some tests and consolidate code across query_parser tests
- Add pytest.warns context handler to test_parse_repo_source_with_failed_git_command
2025-01-23 02:46:17 +01:00

294 lines
11 KiB
Python

"""
Tests for the `notebook_utils` module.
These tests validate how notebooks are processed into Python-like output, ensuring that markdown/raw cells are
converted to triple-quoted blocks, code cells remain executable code, and various edge cases (multiple worksheets,
empty cells, outputs, etc.) are handled appropriately.
"""
import pytest
from gitingest.notebook_utils import process_notebook
from tests.conftest import WriteNotebookFunc
def test_process_notebook_all_cells(write_notebook: WriteNotebookFunc) -> None:
"""
Test processing a notebook containing markdown, code, and raw cells.
Given a notebook with:
- One markdown cell
- One code cell
- One raw cell
When `process_notebook` is invoked,
Then markdown and raw cells should appear in triple-quoted blocks, and code cells remain as normal code.
"""
notebook_content = {
"cells": [
{"cell_type": "markdown", "source": ["# Markdown cell"]},
{"cell_type": "code", "source": ['print("Hello Code")']},
{"cell_type": "raw", "source": ["<raw content>"]},
]
}
nb_path = write_notebook("all_cells.ipynb", notebook_content)
result = process_notebook(nb_path)
assert result.count('"""') == 4, "Two non-code cells => 2 triple-quoted blocks => 4 total triple quotes."
# Ensure markdown and raw cells are in triple quotes
assert "# Markdown cell" in result
assert "<raw content>" in result
# Ensure code cell is not in triple quotes
assert 'print("Hello Code")' in result
assert '"""\nprint("Hello Code")\n"""' not in result
def test_process_notebook_with_worksheets(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook containing the (as of IPEP-17 deprecated) 'worksheets' key.
Given a notebook that uses the 'worksheets' key with a single worksheet,
When `process_notebook` is called,
Then a `DeprecationWarning` should be raised, and the content should match an equivalent notebook
that has top-level 'cells'.
"""
with_worksheets = {
"worksheets": [
{
"cells": [
{"cell_type": "markdown", "source": ["# Markdown cell"]},
{"cell_type": "code", "source": ['print("Hello Code")']},
{"cell_type": "raw", "source": ["<raw content>"]},
]
}
]
}
without_worksheets = with_worksheets["worksheets"][0] # same, but no 'worksheets' key
nb_with = write_notebook("with_worksheets.ipynb", with_worksheets)
nb_without = write_notebook("without_worksheets.ipynb", without_worksheets)
with pytest.warns(DeprecationWarning, match="Worksheets are deprecated as of IPEP-17."):
result_with = process_notebook(nb_with)
# Should not raise a warning
result_without = process_notebook(nb_without)
assert result_with == result_without, "Content from the single worksheet should match the top-level equivalent."
def test_process_notebook_multiple_worksheets(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook containing multiple 'worksheets'.
Given a notebook with two worksheets:
- First with a markdown cell
- Second with a code cell
When `process_notebook` is called,
Then a warning about multiple worksheets should be raised, and the second worksheet's content should appear
in the final output.
"""
multi_worksheets = {
"worksheets": [
{"cells": [{"cell_type": "markdown", "source": ["# First Worksheet"]}]},
{"cells": [{"cell_type": "code", "source": ["# Second Worksheet"]}]},
]
}
single_worksheet = {
"worksheets": [
{"cells": [{"cell_type": "markdown", "source": ["# First Worksheet"]}]},
]
}
nb_multi = write_notebook("multiple_worksheets.ipynb", multi_worksheets)
nb_single = write_notebook("single_worksheet.ipynb", single_worksheet)
# Expect DeprecationWarning + UserWarning
with pytest.warns(
DeprecationWarning, match="Worksheets are deprecated as of IPEP-17. Consider updating the notebook."
):
with pytest.warns(
UserWarning, match="Multiple worksheets detected. Combining all worksheets into a single script."
):
result_multi = process_notebook(nb_multi)
# Expect DeprecationWarning only
with pytest.warns(
DeprecationWarning, match="Worksheets are deprecated as of IPEP-17. Consider updating the notebook."
):
result_single = process_notebook(nb_single)
assert result_multi != result_single, "Two worksheets should produce more content than one."
assert len(result_multi) > len(result_single), "The multi-worksheet notebook should have extra code content."
assert "# First Worksheet" in result_single
assert "# Second Worksheet" not in result_single
assert "# First Worksheet" in result_multi
assert "# Second Worksheet" in result_multi
def test_process_notebook_code_only(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook containing only code cells.
Given a notebook with code cells only:
When `process_notebook` is called,
Then no triple quotes should appear in the output.
"""
notebook_content = {
"cells": [
{"cell_type": "code", "source": ["print('Code Cell 1')"]},
{"cell_type": "code", "source": ["x = 42"]},
]
}
nb_path = write_notebook("code_only.ipynb", notebook_content)
result = process_notebook(nb_path)
assert '"""' not in result, "No triple quotes expected when there are only code cells."
assert "print('Code Cell 1')" in result
assert "x = 42" in result
def test_process_notebook_markdown_only(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook with only markdown cells.
Given a notebook with two markdown cells:
When `process_notebook` is called,
Then each markdown cell should become a triple-quoted block (2 blocks => 4 triple quotes total).
"""
notebook_content = {
"cells": [
{"cell_type": "markdown", "source": ["# Markdown Header"]},
{"cell_type": "markdown", "source": ["Some more markdown."]},
]
}
nb_path = write_notebook("markdown_only.ipynb", notebook_content)
result = process_notebook(nb_path)
assert result.count('"""') == 4, "Two markdown cells => 2 blocks => 4 triple quotes total."
assert "# Markdown Header" in result
assert "Some more markdown." in result
def test_process_notebook_raw_only(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook with only raw cells.
Given two raw cells:
When `process_notebook` is called,
Then each raw cell should become a triple-quoted block (2 blocks => 4 triple quotes total).
"""
notebook_content = {
"cells": [
{"cell_type": "raw", "source": ["Raw content line 1"]},
{"cell_type": "raw", "source": ["Raw content line 2"]},
]
}
nb_path = write_notebook("raw_only.ipynb", notebook_content)
result = process_notebook(nb_path)
assert result.count('"""') == 4, "Two raw cells => 2 blocks => 4 triple quotes."
assert "Raw content line 1" in result
assert "Raw content line 2" in result
def test_process_notebook_empty_cells(write_notebook: WriteNotebookFunc) -> None:
"""
Test that cells with an empty 'source' are skipped.
Given a notebook with 4 cells, 3 of which have empty `source`:
When `process_notebook` is called,
Then only the non-empty cell should appear in the output (1 block => 2 triple quotes).
"""
notebook_content = {
"cells": [
{"cell_type": "markdown", "source": []},
{"cell_type": "code", "source": []},
{"cell_type": "raw", "source": []},
{"cell_type": "markdown", "source": ["# Non-empty markdown"]},
]
}
nb_path = write_notebook("empty_cells.ipynb", notebook_content)
result = process_notebook(nb_path)
assert result.count('"""') == 2, "Only one non-empty cell => 1 block => 2 triple quotes"
assert "# Non-empty markdown" in result
def test_process_notebook_invalid_cell_type(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook with an unknown cell type.
Given a notebook cell whose `cell_type` is unrecognized:
When `process_notebook` is called,
Then a ValueError should be raised.
"""
notebook_content = {
"cells": [
{"cell_type": "markdown", "source": ["# Valid markdown"]},
{"cell_type": "unknown", "source": ["Unrecognized cell type"]},
]
}
nb_path = write_notebook("invalid_cell_type.ipynb", notebook_content)
with pytest.raises(ValueError, match="Unknown cell type: unknown"):
process_notebook(nb_path)
def test_process_notebook_with_output(write_notebook: WriteNotebookFunc) -> None:
"""
Test a notebook that has code cells with outputs.
Given a code cell and multiple output objects:
When `process_notebook` is called with `include_output=True`,
Then the outputs should be appended as commented lines under the code.
"""
notebook_content = {
"cells": [
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"print('my_data')\n",
"my_data = [1, 2, 3, 4, 5]\n",
"plt.plot(my_data)\n",
"my_data",
],
"outputs": [
{"output_type": "stream", "text": ["my_data"]},
{"output_type": "execute_result", "data": {"text/plain": ["[1, 2, 3, 4, 5]"]}},
{"output_type": "display_data", "data": {"text/plain": ["<Figure size 640x480 with 1 Axes>"]}},
],
}
]
}
nb_path = write_notebook("with_output.ipynb", notebook_content)
with_output = process_notebook(nb_path, include_output=True)
without_output = process_notebook(nb_path, include_output=False)
expected_source = "\n".join(
[
"# Jupyter notebook converted to Python script.\n",
"import matplotlib.pyplot as plt",
"print('my_data')",
"my_data = [1, 2, 3, 4, 5]",
"plt.plot(my_data)",
"my_data\n",
]
)
expected_output = "\n".join(
[
"# Output:",
"# my_data",
"# [1, 2, 3, 4, 5]",
"# <Figure size 640x480 with 1 Axes>\n",
]
)
expected_combined = expected_source + expected_output
assert with_output == expected_combined, "Should include source code and comment-ified output."
assert without_output == expected_source, "Should include only the source code without output."