mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-29 15:59:29 +00:00
test: add pytest-mock, introduce fixtures & type hints (#290)
* Added pytest-mock to dev dependencies and pre-commit hooks * Introduced InvalidGitHubTokenError for clearer token-validation failures * Refactored tests: * Replaced ad-hoc mocks with reusable fixtures * Parametrised URL/branch matrices to cut duplication * Added type hints throughout * New coverage: * validate_github_token (happy & error paths) * create_git_command / create_git_auth_header
This commit is contained in:
parent
3869aa32e3
commit
95009bdf15
10 changed files with 578 additions and 461 deletions
|
|
@ -6,62 +6,43 @@ paths.
|
|||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from typing import Callable, List, Optional
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from gitingest.query_parsing import _parse_patterns, _parse_remote_repo, parse_query
|
||||
from gitingest.schemas.ingestion_schema import IngestionQuery
|
||||
from gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS
|
||||
from tests.conftest import DEMO_URL
|
||||
|
||||
URLS_HTTPS: List[str] = [
|
||||
DEMO_URL,
|
||||
"https://gitlab.com/user/repo",
|
||||
"https://bitbucket.org/user/repo",
|
||||
"https://gitea.com/user/repo",
|
||||
"https://codeberg.org/user/repo",
|
||||
"https://gist.github.com/user/repo",
|
||||
]
|
||||
|
||||
URLS_HTTP: List[str] = [url.replace("https://", "http://") for url in URLS_HTTPS]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("url", URLS_HTTPS, ids=lambda u: u)
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_valid_https() -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` with valid HTTPS URLs.
|
||||
async def test_parse_url_valid_https(url: str) -> None:
|
||||
"""Valid HTTPS URLs parse correctly and `query.url` equals the input."""
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
|
||||
Given various HTTPS URLs on supported platforms:
|
||||
When `_parse_remote_repo` is called,
|
||||
Then user name, repo name, and the URL should be extracted correctly.
|
||||
"""
|
||||
test_cases = [
|
||||
"https://github.com/user/repo",
|
||||
"https://gitlab.com/user/repo",
|
||||
"https://bitbucket.org/user/repo",
|
||||
"https://gitea.com/user/repo",
|
||||
"https://codeberg.org/user/repo",
|
||||
"https://gist.github.com/user/repo",
|
||||
]
|
||||
for url in test_cases:
|
||||
query = await _parse_remote_repo(url)
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.url == url
|
||||
assert query.url == url # HTTPS: canonical URL should equal input
|
||||
|
||||
|
||||
@pytest.mark.parametrize("url", URLS_HTTP, ids=lambda u: u)
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_valid_http() -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` with valid HTTP URLs.
|
||||
|
||||
Given various HTTP URLs on supported platforms:
|
||||
When `_parse_remote_repo` is called,
|
||||
Then user name, repo name, and the slug should be extracted correctly.
|
||||
"""
|
||||
test_cases = [
|
||||
"http://github.com/user/repo",
|
||||
"http://gitlab.com/user/repo",
|
||||
"http://bitbucket.org/user/repo",
|
||||
"http://gitea.com/user/repo",
|
||||
"http://codeberg.org/user/repo",
|
||||
"http://gist.github.com/user/repo",
|
||||
]
|
||||
for url in test_cases:
|
||||
query = await _parse_remote_repo(url)
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.slug == "user-repo"
|
||||
async def test_parse_url_valid_http(url: str) -> None:
|
||||
"""Valid HTTP URLs parse correctly (slug check only)."""
|
||||
await _assert_basic_repo_fields(url)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -74,13 +55,14 @@ async def test_parse_url_invalid() -> None:
|
|||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = "https://github.com"
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await _parse_remote_repo(url)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("url", ["https://github.com/user/repo", "https://gitlab.com/user/repo"])
|
||||
async def test_parse_query_basic(url):
|
||||
@pytest.mark.parametrize("url", [DEMO_URL, "https://gitlab.com/user/repo"])
|
||||
async def test_parse_query_basic(url: str) -> None:
|
||||
"""
|
||||
Test `parse_query` with a basic valid repository URL.
|
||||
|
||||
|
|
@ -122,8 +104,7 @@ async def test_parse_query_include_pattern() -> None:
|
|||
When `parse_query` is called,
|
||||
Then the include pattern should be set, and default ignore patterns remain applied.
|
||||
"""
|
||||
url = "https://github.com/user/repo"
|
||||
query = await parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py")
|
||||
query = await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="*.py")
|
||||
|
||||
assert query.include_patterns == {"*.py"}
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
|
@ -138,13 +119,12 @@ async def test_parse_query_invalid_pattern() -> None:
|
|||
When `parse_query` is called,
|
||||
Then a ValueError should be raised indicating invalid characters.
|
||||
"""
|
||||
url = "https://github.com/user/repo"
|
||||
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
|
||||
await parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
|
||||
await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_with_subpaths() -> None:
|
||||
async def test_parse_url_with_subpaths(stub_branches: Callable[[List[str]], None]) -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` with a URL containing branch and subpath.
|
||||
|
||||
|
|
@ -152,19 +132,16 @@ async def test_parse_url_with_subpaths() -> None:
|
|||
When `_parse_remote_repo` is called with remote branch fetching,
|
||||
Then user, repo, branch, and subpath should be identified correctly.
|
||||
"""
|
||||
url = "https://github.com/user/repo/tree/main/subdir/file"
|
||||
with patch("gitingest.utils.git_utils.run_command", new_callable=AsyncMock) as mock_run_command:
|
||||
mock_run_command.return_value = (b"refs/heads/main\nrefs/heads/dev\nrefs/heads/feature-branch\n", b"")
|
||||
with patch(
|
||||
"gitingest.utils.git_utils.fetch_remote_branch_list", new_callable=AsyncMock
|
||||
) as mock_fetch_branches:
|
||||
mock_fetch_branches.return_value = ["main", "dev", "feature-branch"]
|
||||
query = await _parse_remote_repo(url)
|
||||
url = DEMO_URL + "/tree/main/subdir/file"
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.branch == "main"
|
||||
assert query.subpath == "/subdir/file"
|
||||
stub_branches(["main", "dev", "feature-branch"])
|
||||
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.branch == "main"
|
||||
assert query.subpath == "/subdir/file"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -177,6 +154,7 @@ async def test_parse_url_invalid_repo_structure() -> None:
|
|||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = "https://github.com/user"
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await _parse_remote_repo(url)
|
||||
|
||||
|
|
@ -204,6 +182,7 @@ def test_parse_patterns_invalid_characters() -> None:
|
|||
Then a ValueError should be raised indicating invalid pattern syntax.
|
||||
"""
|
||||
patterns = "*.py;rm -rf"
|
||||
|
||||
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
|
||||
_parse_patterns(patterns)
|
||||
|
||||
|
|
@ -217,8 +196,7 @@ async def test_parse_query_with_large_file_size() -> None:
|
|||
When `parse_query` is called,
|
||||
Then `max_file_size` should be set correctly and default ignore patterns remain unchanged.
|
||||
"""
|
||||
url = "https://github.com/user/repo"
|
||||
query = await parse_query(url, max_file_size=10**9, from_web=True)
|
||||
query = await parse_query(DEMO_URL, max_file_size=10**9, from_web=True)
|
||||
|
||||
assert query.max_file_size == 10**9
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
|
@ -233,8 +211,7 @@ async def test_parse_query_empty_patterns() -> None:
|
|||
When `parse_query` is called,
|
||||
Then include_patterns becomes None and default ignore patterns apply.
|
||||
"""
|
||||
url = "https://github.com/user/repo"
|
||||
query = await parse_query(url, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="")
|
||||
query = await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="")
|
||||
|
||||
assert query.include_patterns is None
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
|
@ -249,9 +226,8 @@ async def test_parse_query_include_and_ignore_overlap() -> None:
|
|||
When `parse_query` is called,
|
||||
Then "*.py" should be removed from ignore patterns.
|
||||
"""
|
||||
url = "https://github.com/user/repo"
|
||||
query = await parse_query(
|
||||
url,
|
||||
DEMO_URL,
|
||||
max_file_size=50,
|
||||
from_web=True,
|
||||
include_patterns="*.py",
|
||||
|
|
@ -308,23 +284,26 @@ async def test_parse_query_empty_source() -> None:
|
|||
When `parse_query` is called,
|
||||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = ""
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await parse_query("", max_file_size=100, from_web=True)
|
||||
await parse_query(url, max_file_size=100, from_web=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"url, expected_branch, expected_commit",
|
||||
"path, expected_branch, expected_commit",
|
||||
[
|
||||
("https://github.com/user/repo/tree/main", "main", None),
|
||||
(
|
||||
"https://github.com/user/repo/tree/abcd1234abcd1234abcd1234abcd1234abcd1234",
|
||||
None,
|
||||
"abcd1234abcd1234abcd1234abcd1234abcd1234",
|
||||
),
|
||||
("/tree/main", "main", None),
|
||||
("/tree/abcd1234abcd1234abcd1234abcd1234abcd1234", None, "abcd1234abcd1234abcd1234abcd1234abcd1234"),
|
||||
],
|
||||
)
|
||||
async def test_parse_url_branch_and_commit_distinction(url: str, expected_branch: str, expected_commit: str) -> None:
|
||||
async def test_parse_url_branch_and_commit_distinction(
|
||||
path: str,
|
||||
expected_branch: str,
|
||||
expected_commit: str,
|
||||
stub_branches: Callable[[List[str]], None],
|
||||
) -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` distinguishing branch vs. commit hash.
|
||||
|
||||
|
|
@ -332,19 +311,13 @@ async def test_parse_url_branch_and_commit_distinction(url: str, expected_branch
|
|||
When `_parse_remote_repo` is called with branch fetching,
|
||||
Then the function should correctly set `branch` or `commit` based on the URL content.
|
||||
"""
|
||||
with patch("gitingest.utils.git_utils.run_command", new_callable=AsyncMock) as mock_run_command:
|
||||
# Mocking the return value to include 'main' and some additional branches
|
||||
mock_run_command.return_value = (b"refs/heads/main\nrefs/heads/dev\nrefs/heads/feature-branch\n", b"")
|
||||
with patch(
|
||||
"gitingest.utils.git_utils.fetch_remote_branch_list", new_callable=AsyncMock
|
||||
) as mock_fetch_branches:
|
||||
mock_fetch_branches.return_value = ["main", "dev", "feature-branch"]
|
||||
stub_branches(["main", "dev", "feature-branch"])
|
||||
|
||||
query = await _parse_remote_repo(url)
|
||||
url = DEMO_URL + path
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
|
||||
# Verify that `branch` and `commit` match our expectations
|
||||
assert query.branch == expected_branch
|
||||
assert query.commit == expected_commit
|
||||
assert query.branch == expected_branch
|
||||
assert query.commit == expected_commit
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -372,12 +345,12 @@ async def test_parse_url_with_query_and_fragment() -> None:
|
|||
When `_parse_remote_repo` is called,
|
||||
Then those parts should be stripped, leaving a clean user/repo URL.
|
||||
"""
|
||||
url = "https://github.com/user/repo?arg=value#fragment"
|
||||
url = DEMO_URL + "?arg=value#fragment"
|
||||
query = await _parse_remote_repo(url)
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.url == "https://github.com/user/repo" # URL should be cleaned
|
||||
assert query.url == DEMO_URL # URL should be cleaned
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -390,6 +363,7 @@ async def test_parse_url_unsupported_host() -> None:
|
|||
Then a ValueError should be raised for the unknown domain.
|
||||
"""
|
||||
url = "https://only-domain.com"
|
||||
|
||||
with pytest.raises(ValueError, match="Unknown domain 'only-domain.com' in URL"):
|
||||
await _parse_remote_repo(url)
|
||||
|
||||
|
|
@ -419,14 +393,19 @@ async def test_parse_query_with_branch() -> None:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"url, expected_branch, expected_subpath",
|
||||
"path, expected_branch, expected_subpath",
|
||||
[
|
||||
("https://github.com/user/repo/tree/main/src", "main", "/src"),
|
||||
("https://github.com/user/repo/tree/fix1", "fix1", "/"),
|
||||
("https://github.com/user/repo/tree/nonexistent-branch/src", "nonexistent-branch", "/src"),
|
||||
("/tree/main/src", "main", "/src"),
|
||||
("/tree/fix1", "fix1", "/"),
|
||||
("/tree/nonexistent-branch/src", "nonexistent-branch", "/src"),
|
||||
],
|
||||
)
|
||||
async def test_parse_repo_source_with_failed_git_command(url, expected_branch, expected_subpath):
|
||||
async def test_parse_repo_source_with_failed_git_command(
|
||||
path: str,
|
||||
expected_branch: str,
|
||||
expected_subpath: str,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` when git fetch fails.
|
||||
|
||||
|
|
@ -434,52 +413,62 @@ async def test_parse_repo_source_with_failed_git_command(url, expected_branch, e
|
|||
When `_parse_remote_repo` is called,
|
||||
Then it should fall back to path components for branch identification.
|
||||
"""
|
||||
with patch("gitingest.utils.git_utils.fetch_remote_branch_list", new_callable=AsyncMock) as mock_fetch_branches:
|
||||
mock_fetch_branches.side_effect = Exception("Failed to fetch branch list")
|
||||
url = DEMO_URL + path
|
||||
|
||||
with pytest.warns(
|
||||
RuntimeWarning,
|
||||
match="Warning: Failed to fetch branch list: Command failed: "
|
||||
"git ls-remote --heads https://github.com/user/repo",
|
||||
):
|
||||
mock_fetch_branches = mocker.patch("gitingest.utils.git_utils.fetch_remote_branch_list", new_callable=AsyncMock)
|
||||
mock_fetch_branches.side_effect = Exception("Failed to fetch branch list")
|
||||
|
||||
query = await _parse_remote_repo(url)
|
||||
with pytest.warns(
|
||||
RuntimeWarning,
|
||||
match="Warning: Failed to fetch branch list: Command failed: "
|
||||
"git ls-remote --heads https://github.com/user/repo",
|
||||
):
|
||||
query = await _parse_remote_repo(url)
|
||||
|
||||
assert query.branch == expected_branch
|
||||
assert query.subpath == expected_subpath
|
||||
assert query.branch == expected_branch
|
||||
assert query.subpath == expected_subpath
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"url, expected_branch, expected_subpath",
|
||||
("path", "expected_branch", "expected_subpath"),
|
||||
[
|
||||
("https://github.com/user/repo/tree/feature/fix1/src", "feature/fix1", "/src"),
|
||||
("https://github.com/user/repo/tree/main/src", "main", "/src"),
|
||||
("https://github.com/user/repo", None, "/"), # No
|
||||
("https://github.com/user/repo/tree/nonexistent-branch/src", None, "/"), # Non-existent branch
|
||||
("https://github.com/user/repo/tree/fix", "fix", "/"),
|
||||
("https://github.com/user/repo/blob/fix/page.html", "fix", "/page.html"),
|
||||
("/tree/feature/fix1/src", "feature/fix1", "/src"),
|
||||
("/tree/main/src", "main", "/src"),
|
||||
("", None, "/"),
|
||||
("/tree/nonexistent-branch/src", None, "/"),
|
||||
("/tree/fix", "fix", "/"),
|
||||
("/blob/fix/page.html", "fix", "/page.html"),
|
||||
],
|
||||
)
|
||||
async def test_parse_repo_source_with_various_url_patterns(url, expected_branch, expected_subpath):
|
||||
async def test_parse_repo_source_with_various_url_patterns(
|
||||
path: str,
|
||||
expected_branch: Optional[str],
|
||||
expected_subpath: str,
|
||||
stub_branches: Callable[[List[str]], None],
|
||||
) -> None:
|
||||
"""
|
||||
Test `_parse_remote_repo` with various URL patterns.
|
||||
`_parse_remote_repo` should detect (or reject) a branch and resolve the
|
||||
sub-path for various GitHub-style URL permutations.
|
||||
|
||||
Given multiple branch/blob patterns (including nonexistent branches):
|
||||
When `_parse_remote_repo` is called with remote branch fetching,
|
||||
Then the correct branch/subpath should be set or None if unmatched.
|
||||
Branch discovery is stubbed so that only names passed to `stub_branches` are considered "remote".
|
||||
"""
|
||||
with patch("gitingest.utils.git_utils.run_command", new_callable=AsyncMock) as mock_run_command:
|
||||
with patch(
|
||||
"gitingest.utils.git_utils.fetch_remote_branch_list", new_callable=AsyncMock
|
||||
) as mock_fetch_branches:
|
||||
mock_run_command.return_value = (
|
||||
b"refs/heads/feature/fix1\nrefs/heads/main\nrefs/heads/feature-branch\nrefs/heads/fix\n",
|
||||
b"",
|
||||
)
|
||||
mock_fetch_branches.return_value = ["feature/fix1", "main", "feature-branch"]
|
||||
stub_branches(["feature/fix1", "main", "feature-branch", "fix"])
|
||||
|
||||
query = await _parse_remote_repo(url)
|
||||
url = DEMO_URL + path
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
|
||||
assert query.branch == expected_branch
|
||||
assert query.subpath == expected_subpath
|
||||
assert query.branch == expected_branch
|
||||
assert query.subpath == expected_subpath
|
||||
|
||||
|
||||
async def _assert_basic_repo_fields(url: str) -> IngestionQuery:
|
||||
"""Run _parse_remote_repo and assert user, repo and slug are parsed."""
|
||||
|
||||
query = await _parse_remote_repo(url)
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.slug == "user-repo"
|
||||
|
||||
return query
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue