mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 17:49:33 +00:00
refactor: consistent cloning & pattern-handling (#388)
Some checks are pending
CI / test (macos-latest, 3.10) (push) Waiting to run
CI / test (macos-latest, 3.11) (push) Waiting to run
CI / test (macos-latest, 3.12) (push) Waiting to run
CI / test (macos-latest, 3.13) (push) Waiting to run
CI / test (macos-latest, 3.8) (push) Waiting to run
CI / test (macos-latest, 3.9) (push) Waiting to run
CI / test (true, ubuntu-latest, 3.13) (push) Waiting to run
CI / test (ubuntu-latest, 3.10) (push) Waiting to run
CI / test (ubuntu-latest, 3.11) (push) Waiting to run
CI / test (ubuntu-latest, 3.12) (push) Waiting to run
CI / test (ubuntu-latest, 3.8) (push) Waiting to run
CI / test (ubuntu-latest, 3.9) (push) Waiting to run
CI / test (windows-latest, 3.10) (push) Waiting to run
CI / test (windows-latest, 3.11) (push) Waiting to run
CI / test (windows-latest, 3.12) (push) Waiting to run
CI / test (windows-latest, 3.13) (push) Waiting to run
CI / test (windows-latest, 3.8) (push) Waiting to run
CI / test (windows-latest, 3.9) (push) Waiting to run
CodeQL / Analyze (push) Waiting to run
Build & Push Container / docker-build (push) Waiting to run
release-please / release (push) Waiting to run
OSSF Scorecard / Scorecard analysis (push) Waiting to run
Some checks are pending
CI / test (macos-latest, 3.10) (push) Waiting to run
CI / test (macos-latest, 3.11) (push) Waiting to run
CI / test (macos-latest, 3.12) (push) Waiting to run
CI / test (macos-latest, 3.13) (push) Waiting to run
CI / test (macos-latest, 3.8) (push) Waiting to run
CI / test (macos-latest, 3.9) (push) Waiting to run
CI / test (true, ubuntu-latest, 3.13) (push) Waiting to run
CI / test (ubuntu-latest, 3.10) (push) Waiting to run
CI / test (ubuntu-latest, 3.11) (push) Waiting to run
CI / test (ubuntu-latest, 3.12) (push) Waiting to run
CI / test (ubuntu-latest, 3.8) (push) Waiting to run
CI / test (ubuntu-latest, 3.9) (push) Waiting to run
CI / test (windows-latest, 3.10) (push) Waiting to run
CI / test (windows-latest, 3.11) (push) Waiting to run
CI / test (windows-latest, 3.12) (push) Waiting to run
CI / test (windows-latest, 3.13) (push) Waiting to run
CI / test (windows-latest, 3.8) (push) Waiting to run
CI / test (windows-latest, 3.9) (push) Waiting to run
CodeQL / Analyze (push) Waiting to run
Build & Push Container / docker-build (push) Waiting to run
release-please / release (push) Waiting to run
OSSF Scorecard / Scorecard analysis (push) Waiting to run
Co-authored-by: ix-56h <n.guintini@protonmail.com>
This commit is contained in:
parent
360a38e10e
commit
a99089ab11
37 changed files with 1040 additions and 883 deletions
|
|
@ -4,6 +4,7 @@ These tests cover URL parsing, pattern parsing, and handling of branches/subpath
|
|||
paths.
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-arguments, too-many-positional-arguments
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
|
@ -11,12 +12,14 @@ from typing import TYPE_CHECKING, Callable
|
|||
|
||||
import pytest
|
||||
|
||||
from gitingest.query_parser import _parse_patterns, _parse_remote_repo, parse_query
|
||||
from gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS
|
||||
from gitingest.query_parser import parse_local_dir_path, parse_remote_repo
|
||||
from gitingest.utils.query_parser_utils import _is_valid_git_commit_hash
|
||||
from tests.conftest import DEMO_URL
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gitingest.schemas.ingestion import IngestionQuery
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
from gitingest.schemas import IngestionQuery
|
||||
|
||||
|
||||
URLS_HTTPS: list[str] = [
|
||||
|
|
@ -36,106 +39,85 @@ URLS_HTTP: list[str] = [url.replace("https://", "http://") for url in URLS_HTTPS
|
|||
|
||||
@pytest.mark.parametrize("url", URLS_HTTPS, ids=lambda u: u)
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_valid_https(url: str) -> None:
|
||||
async def test_parse_url_valid_https(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Valid HTTPS URLs parse correctly and ``query.url`` equals the input."""
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
query = await _assert_basic_repo_fields(url, stub_resolve_sha["head"])
|
||||
|
||||
assert query.url == url # HTTPS: canonical URL should equal input
|
||||
|
||||
|
||||
@pytest.mark.parametrize("url", URLS_HTTP, ids=lambda u: u)
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_valid_http(url: str) -> None:
|
||||
async def test_parse_url_valid_http(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Valid HTTP URLs parse correctly (slug check only)."""
|
||||
await _assert_basic_repo_fields(url)
|
||||
await _assert_basic_repo_fields(url, stub_resolve_sha["head"])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_invalid() -> None:
|
||||
"""Test ``_parse_remote_repo`` with an invalid URL.
|
||||
async def test_parse_url_invalid(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with an invalid URL.
|
||||
|
||||
Given an HTTPS URL lacking a repository structure (e.g., "https://github.com"),
|
||||
When ``_parse_remote_repo`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = "https://github.com"
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await _parse_remote_repo(url)
|
||||
await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("url", [DEMO_URL, "https://gitlab.com/user/repo"])
|
||||
async def test_parse_query_basic(url: str) -> None:
|
||||
"""Test ``parse_query`` with a basic valid repository URL.
|
||||
async def test_parse_query_basic(url: str, stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with a basic valid repository URL.
|
||||
|
||||
Given an HTTPS URL and ignore_patterns="*.txt":
|
||||
When ``parse_query`` is called,
|
||||
Then user/repo, URL, and ignore patterns should be parsed correctly.
|
||||
Given an HTTPS URL:
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then user/repo, URL should be parsed correctly.
|
||||
"""
|
||||
query = await parse_query(source=url, max_file_size=50, from_web=True, ignore_patterns="*.txt")
|
||||
query = await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_awaited_once()
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.url == url
|
||||
assert query.ignore_patterns
|
||||
assert "*.txt" in query.ignore_patterns
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_mixed_case() -> None:
|
||||
"""Test ``parse_query`` with mixed-case URLs.
|
||||
async def test_parse_query_mixed_case(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with mixed-case URLs.
|
||||
|
||||
Given a URL with mixed-case parts (e.g. "Https://GitHub.COM/UsEr/rEpO"):
|
||||
When ``parse_query`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then the user and repo names should be normalized to lowercase.
|
||||
"""
|
||||
url = "Https://GitHub.COM/UsEr/rEpO"
|
||||
query = await parse_query(url, max_file_size=50, from_web=True)
|
||||
query = await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_awaited_once()
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_include_pattern() -> None:
|
||||
"""Test ``parse_query`` with a specified include pattern.
|
||||
|
||||
Given a URL and include_patterns="*.py":
|
||||
When ``parse_query`` is called,
|
||||
Then the include pattern should be set, and default ignore patterns remain applied.
|
||||
"""
|
||||
query = await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="*.py")
|
||||
|
||||
assert query.include_patterns == {"*.py"}
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_invalid_pattern() -> None:
|
||||
"""Test ``parse_query`` with an invalid pattern.
|
||||
|
||||
Given an include pattern containing special characters (e.g., "*.py;rm -rf"):
|
||||
When ``parse_query`` is called,
|
||||
Then a ValueError should be raised indicating invalid characters.
|
||||
"""
|
||||
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
|
||||
await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_with_subpaths(stub_branches: Callable[[list[str]], None]) -> None:
|
||||
"""Test ``_parse_remote_repo`` with a URL containing branch and subpath.
|
||||
async def test_parse_url_with_subpaths(
|
||||
stub_branches: Callable[[list[str]], None],
|
||||
stub_resolve_sha: dict[str, AsyncMock],
|
||||
) -> None:
|
||||
"""Test ``parse_remote_repo`` with a URL containing branch and subpath.
|
||||
|
||||
Given a URL referencing a branch ("main") and a subdir ("subdir/file"):
|
||||
When ``_parse_remote_repo`` is called with remote branch fetching,
|
||||
When ``parse_remote_repo`` is called with remote branch fetching,
|
||||
Then user, repo, branch, and subpath should be identified correctly.
|
||||
"""
|
||||
url = DEMO_URL + "/tree/main/subdir/file"
|
||||
|
||||
stub_branches(["main", "dev", "feature-branch"])
|
||||
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
query = await _assert_basic_repo_fields(url, stub_resolve_sha["ref"])
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
|
|
@ -144,105 +126,30 @@ async def test_parse_url_with_subpaths(stub_branches: Callable[[list[str]], None
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_invalid_repo_structure() -> None:
|
||||
"""Test ``_parse_remote_repo`` with a URL missing a repository name.
|
||||
async def test_parse_url_invalid_repo_structure(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with a URL missing a repository name.
|
||||
|
||||
Given a URL like "https://github.com/user":
|
||||
When ``_parse_remote_repo`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = "https://github.com/user"
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await _parse_remote_repo(url)
|
||||
await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_not_awaited()
|
||||
|
||||
|
||||
def test_parse_patterns_valid() -> None:
|
||||
"""Test ``_parse_patterns`` with valid comma-separated patterns.
|
||||
async def test_parse_local_dir_path_local_path() -> None:
|
||||
"""Test ``parse_local_dir_path``.
|
||||
|
||||
Given patterns like "*.py, *.md, docs/*":
|
||||
When ``_parse_patterns`` is called,
|
||||
Then it should return a set of parsed strings.
|
||||
"""
|
||||
patterns = "*.py, *.md, docs/*"
|
||||
parsed_patterns = _parse_patterns(patterns)
|
||||
|
||||
assert parsed_patterns == {"*.py", "*.md", "docs/*"}
|
||||
|
||||
|
||||
def test_parse_patterns_invalid_characters() -> None:
|
||||
"""Test ``_parse_patterns`` with invalid characters.
|
||||
|
||||
Given a pattern string containing special characters (e.g. "*.py;rm -rf"):
|
||||
When ``_parse_patterns`` is called,
|
||||
Then a ValueError should be raised indicating invalid pattern syntax.
|
||||
"""
|
||||
patterns = "*.py;rm -rf"
|
||||
|
||||
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
|
||||
_parse_patterns(patterns)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_with_large_file_size() -> None:
|
||||
"""Test ``parse_query`` with a very large file size limit.
|
||||
|
||||
Given a URL and max_file_size=10**9:
|
||||
When ``parse_query`` is called,
|
||||
Then ``max_file_size`` should be set correctly and default ignore patterns remain unchanged.
|
||||
"""
|
||||
query = await parse_query(DEMO_URL, max_file_size=10**9, from_web=True)
|
||||
|
||||
assert query.max_file_size == 10**9
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_empty_patterns() -> None:
|
||||
"""Test ``parse_query`` with empty patterns.
|
||||
|
||||
Given empty include_patterns and ignore_patterns:
|
||||
When ``parse_query`` is called,
|
||||
Then ``include_patterns`` becomes ``None`` and default ignore patterns apply.
|
||||
"""
|
||||
query = await parse_query(DEMO_URL, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="")
|
||||
|
||||
assert query.include_patterns is None
|
||||
assert query.ignore_patterns == DEFAULT_IGNORE_PATTERNS
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_include_and_ignore_overlap() -> None:
|
||||
"""Test ``parse_query`` with overlapping patterns.
|
||||
|
||||
Given include="*.py" and ignore={"*.py", "*.txt"}:
|
||||
When ``parse_query`` is called,
|
||||
Then "*.py" should be removed from ignore patterns.
|
||||
"""
|
||||
query = await parse_query(
|
||||
DEMO_URL,
|
||||
max_file_size=50,
|
||||
from_web=True,
|
||||
include_patterns="*.py",
|
||||
ignore_patterns={"*.py", "*.txt"},
|
||||
)
|
||||
|
||||
assert query.include_patterns == {"*.py"}
|
||||
assert query.ignore_patterns is not None
|
||||
assert "*.py" not in query.ignore_patterns
|
||||
assert "*.txt" in query.ignore_patterns
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_local_path() -> None:
|
||||
"""Test ``parse_query`` with a local file path.
|
||||
|
||||
Given "/home/user/project" and from_web=False:
|
||||
When ``parse_query`` is called,
|
||||
Given "/home/user/project":
|
||||
When ``parse_local_dir_path`` is called,
|
||||
Then the local path should be set, id generated, and slug formed accordingly.
|
||||
"""
|
||||
path = "/home/user/project"
|
||||
query = await parse_query(path, max_file_size=100, from_web=False)
|
||||
query = parse_local_dir_path(path)
|
||||
tail = Path("home/user/project")
|
||||
|
||||
assert query.local_path.parts[-len(tail.parts) :] == tail.parts
|
||||
|
|
@ -250,16 +157,15 @@ async def test_parse_query_local_path() -> None:
|
|||
assert query.slug == "home/user/project"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_relative_path() -> None:
|
||||
"""Test ``parse_query`` with a relative path.
|
||||
async def test_parse_local_dir_path_relative_path() -> None:
|
||||
"""Test ``parse_local_dir_path`` with a relative path.
|
||||
|
||||
Given "./project" and from_web=False:
|
||||
When ``parse_query`` is called,
|
||||
Given "./project":
|
||||
When ``parse_local_dir_path`` is called,
|
||||
Then ``local_path`` resolves relatively, and ``slug`` ends with "project".
|
||||
"""
|
||||
path = "./project"
|
||||
query = await parse_query(path, max_file_size=100, from_web=False)
|
||||
query = parse_local_dir_path(path)
|
||||
tail = Path("project")
|
||||
|
||||
assert query.local_path.parts[-len(tail.parts) :] == tail.parts
|
||||
|
|
@ -267,103 +173,109 @@ async def test_parse_query_relative_path() -> None:
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_empty_source() -> None:
|
||||
"""Test ``parse_query`` with an empty string.
|
||||
async def test_parse_remote_repo_empty_source(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with an empty string.
|
||||
|
||||
Given an empty source string:
|
||||
When ``parse_query`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then a ValueError should be raised indicating an invalid repository URL.
|
||||
"""
|
||||
url = ""
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid repository URL"):
|
||||
await parse_query(url, max_file_size=100, from_web=True)
|
||||
await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("path", "expected_branch", "expected_commit"),
|
||||
("path", "expected_branch", "mock_name"),
|
||||
[
|
||||
("/tree/main", "main", None),
|
||||
("/tree/abcd1234abcd1234abcd1234abcd1234abcd1234", None, "abcd1234abcd1234abcd1234abcd1234abcd1234"),
|
||||
("/tree/main", "main", "ref"),
|
||||
("/tree/abcd1234abcd1234abcd1234abcd1234abcd1234", None, "ref"),
|
||||
],
|
||||
)
|
||||
async def test_parse_url_branch_and_commit_distinction(
|
||||
path: str,
|
||||
expected_branch: str,
|
||||
expected_commit: str,
|
||||
stub_branches: Callable[[list[str]], None],
|
||||
stub_resolve_sha: dict[str, AsyncMock],
|
||||
mock_name: str,
|
||||
) -> None:
|
||||
"""Test ``_parse_remote_repo`` distinguishing branch vs. commit hash.
|
||||
"""Test ``parse_remote_repo`` distinguishing branch vs. commit hash.
|
||||
|
||||
Given either a branch URL (e.g., ".../tree/main") or a 40-character commit URL:
|
||||
When ``_parse_remote_repo`` is called with branch fetching,
|
||||
When ``parse_remote_repo`` is called with branch fetching,
|
||||
Then the function should correctly set ``branch`` or ``commit`` based on the URL content.
|
||||
"""
|
||||
stub_branches(["main", "dev", "feature-branch"])
|
||||
|
||||
url = DEMO_URL + path
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
query = await _assert_basic_repo_fields(url, stub_resolve_sha[mock_name])
|
||||
|
||||
assert query.branch == expected_branch
|
||||
assert query.commit == expected_commit
|
||||
assert query.commit is not None
|
||||
assert _is_valid_git_commit_hash(query.commit)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_uuid_uniqueness() -> None:
|
||||
"""Test ``parse_query`` for unique UUID generation.
|
||||
async def test_parse_local_dir_path_uuid_uniqueness() -> None:
|
||||
"""Test ``parse_local_dir_path`` for unique UUID generation.
|
||||
|
||||
Given the same path twice:
|
||||
When ``parse_query`` is called repeatedly,
|
||||
When ``parse_local_dir_path`` is called repeatedly,
|
||||
Then each call should produce a different query id.
|
||||
"""
|
||||
path = "/home/user/project"
|
||||
query_1 = await parse_query(path, max_file_size=100, from_web=False)
|
||||
query_2 = await parse_query(path, max_file_size=100, from_web=False)
|
||||
query_1 = parse_local_dir_path(path)
|
||||
query_2 = parse_local_dir_path(path)
|
||||
|
||||
assert query_1.id != query_2.id
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_with_query_and_fragment() -> None:
|
||||
"""Test ``_parse_remote_repo`` with query parameters and a fragment.
|
||||
async def test_parse_url_with_query_and_fragment(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with query parameters and a fragment.
|
||||
|
||||
Given a URL like "https://github.com/user/repo?arg=value#fragment":
|
||||
When ``_parse_remote_repo`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then those parts should be stripped, leaving a clean user/repo URL.
|
||||
"""
|
||||
url = DEMO_URL + "?arg=value#fragment"
|
||||
query = await _parse_remote_repo(url)
|
||||
query = await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_awaited_once()
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
assert query.url == DEMO_URL # URL should be cleaned
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_url_unsupported_host() -> None:
|
||||
"""Test ``_parse_remote_repo`` with an unsupported host.
|
||||
async def test_parse_url_unsupported_host(stub_resolve_sha: dict[str, AsyncMock]) -> None:
|
||||
"""Test ``parse_remote_repo`` with an unsupported host.
|
||||
|
||||
Given "https://only-domain.com":
|
||||
When ``_parse_remote_repo`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then a ValueError should be raised for the unknown domain.
|
||||
"""
|
||||
url = "https://only-domain.com"
|
||||
|
||||
with pytest.raises(ValueError, match="Unknown domain 'only-domain.com' in URL"):
|
||||
await _parse_remote_repo(url)
|
||||
await parse_remote_repo(url)
|
||||
|
||||
stub_resolve_sha["head"].assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_query_with_branch() -> None:
|
||||
"""Test ``parse_query`` when a branch is specified in a blob path.
|
||||
"""Test ``parse_remote_repo`` when a branch is specified in a blob path.
|
||||
|
||||
Given "https://github.com/pandas-dev/pandas/blob/2.2.x/...":
|
||||
When ``parse_query`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then the branch should be identified, subpath set, and commit remain None.
|
||||
"""
|
||||
url = "https://github.com/pandas-dev/pandas/blob/2.2.x/.github/ISSUE_TEMPLATE/documentation_improvement.yaml"
|
||||
query = await parse_query(url, max_file_size=10**9, from_web=True)
|
||||
query = await parse_remote_repo(url)
|
||||
|
||||
assert query.user_name == "pandas-dev"
|
||||
assert query.repo_name == "pandas"
|
||||
|
|
@ -372,20 +284,21 @@ async def test_parse_query_with_branch() -> None:
|
|||
assert query.id is not None
|
||||
assert query.subpath == "/.github/ISSUE_TEMPLATE/documentation_improvement.yaml"
|
||||
assert query.branch == "2.2.x"
|
||||
assert query.commit is None
|
||||
assert query.commit is not None
|
||||
assert _is_valid_git_commit_hash(query.commit)
|
||||
assert query.type == "blob"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
("path", "expected_branch", "expected_subpath"),
|
||||
("path", "expected_branch", "expected_subpath", "mock_name"),
|
||||
[
|
||||
("/tree/feature/fix1/src", "feature/fix1", "/src"),
|
||||
("/tree/main/src", "main", "/src"),
|
||||
("", None, "/"),
|
||||
("/tree/nonexistent-branch/src", None, "/"),
|
||||
("/tree/fix", "fix", "/"),
|
||||
("/blob/fix/page.html", "fix", "/page.html"),
|
||||
("/tree/feature/fix1/src", "feature/fix1", "/src", "ref"),
|
||||
("/tree/main/src", "main", "/src", "ref"),
|
||||
("", None, "/", "head"),
|
||||
("/tree/nonexistent-branch/src", None, "/", "ref"),
|
||||
("/tree/fix", "fix", "/", "ref"),
|
||||
("/blob/fix/page.html", "fix", "/page.html", "ref"),
|
||||
],
|
||||
)
|
||||
async def test_parse_repo_source_with_various_url_patterns(
|
||||
|
|
@ -393,11 +306,13 @@ async def test_parse_repo_source_with_various_url_patterns(
|
|||
expected_branch: str | None,
|
||||
expected_subpath: str,
|
||||
stub_branches: Callable[[list[str]], None],
|
||||
stub_resolve_sha: dict[str, AsyncMock],
|
||||
mock_name: str,
|
||||
) -> None:
|
||||
"""Test ``_parse_remote_repo`` with various GitHub-style URL permutations.
|
||||
"""Test ``parse_remote_repo`` with various GitHub-style URL permutations.
|
||||
|
||||
Given various GitHub-style URL permutations:
|
||||
When ``_parse_remote_repo`` is called,
|
||||
When ``parse_remote_repo`` is called,
|
||||
Then it should detect (or reject) a branch and resolve the sub-path.
|
||||
|
||||
Branch discovery is stubbed so that only names passed to ``stub_branches`` are considered "remote".
|
||||
|
|
@ -405,15 +320,24 @@ async def test_parse_repo_source_with_various_url_patterns(
|
|||
stub_branches(["feature/fix1", "main", "feature-branch", "fix"])
|
||||
|
||||
url = DEMO_URL + path
|
||||
query = await _assert_basic_repo_fields(url)
|
||||
query = await _assert_basic_repo_fields(url, stub_resolve_sha[mock_name])
|
||||
|
||||
assert query.branch == expected_branch
|
||||
assert query.subpath == expected_subpath
|
||||
|
||||
|
||||
async def _assert_basic_repo_fields(url: str) -> IngestionQuery:
|
||||
"""Run ``_parse_remote_repo`` and assert user, repo and slug are parsed."""
|
||||
query = await _parse_remote_repo(url)
|
||||
@pytest.mark.asyncio
|
||||
async def _assert_basic_repo_fields(url: str, sha_mock: AsyncMock) -> IngestionQuery:
|
||||
"""Run ``parse_remote_repo`` and assert user, repo and slug are parsed."""
|
||||
query = await parse_remote_repo(url)
|
||||
|
||||
assert query.commit is not None
|
||||
assert _is_valid_git_commit_hash(query.commit)
|
||||
|
||||
if query.commit in url:
|
||||
sha_mock.assert_not_awaited()
|
||||
else:
|
||||
sha_mock.assert_awaited_once()
|
||||
|
||||
assert query.user_name == "user"
|
||||
assert query.repo_name == "repo"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue