mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 13:59:30 +00:00
Some checks failed
CI / test (macos-latest, 3.10) (push) Has been cancelled
CI / test (macos-latest, 3.11) (push) Has been cancelled
CI / test (macos-latest, 3.12) (push) Has been cancelled
CI / test (macos-latest, 3.13) (push) Has been cancelled
CI / test (macos-latest, 3.8) (push) Has been cancelled
CI / test (macos-latest, 3.9) (push) Has been cancelled
CI / test (ubuntu-latest, 3.10) (push) Has been cancelled
CI / test (ubuntu-latest, 3.11) (push) Has been cancelled
CI / test (ubuntu-latest, 3.12) (push) Has been cancelled
CI / test (ubuntu-latest, 3.13) (push) Has been cancelled
CI / test (ubuntu-latest, 3.8) (push) Has been cancelled
CI / test (ubuntu-latest, 3.9) (push) Has been cancelled
CI / test (windows-latest, 3.10) (push) Has been cancelled
CI / test (windows-latest, 3.11) (push) Has been cancelled
CI / test (windows-latest, 3.12) (push) Has been cancelled
CI / test (windows-latest, 3.13) (push) Has been cancelled
CI / test (windows-latest, 3.8) (push) Has been cancelled
CI / test (windows-latest, 3.9) (push) Has been cancelled
OSSF Scorecard / Scorecard analysis (push) Has been cancelled
* **Pre-commit**: replace `black` & `darglint` with `ruff-check` / `ruff-format`; add `pydoclint` for docstring quality * **Deps**: drop `tomli`; tighten `typing_extensions`; add `eval-type-backport`; remove `black`, `djlint`, `pylint` from `requirements-dev` * **Ignore files**: deprecate TOML-based `.gitingest`; introduce `.gitingestignore` (git-wildmatch, parsed via `_parse_ignore_file`) * **Config**: new unified `[tool.ruff]` (lint + format + isort); delete `[tool.black]`, keep minimal `[tool.isort]` for now * **Refactor/style**: adopt `from __future__ import annotations`, kw-only args, richer types; reorder params & `__all__`; move type-only imports under `if TYPE_CHECKING`; extract `_CLIArgs` `TypedDict`, migrate form data to `pydantic.QueryForm`; deduplicate `cli.main` / `_async_main`; use `pathlib`, avoid file-IO in async; replace magic numbers with constants; delete `is_text_file` (logic now lives in `FileSystemNode.content`) * **Bug fix**: remove silent error in `notebook_utils._process_cell` * **Docs**: refresh README badges * **Tests**: update fixtures & assertions **BREAKING**: new `.gitingestignore` file replaces (now-deprecated) `.gitingest`. No functional API or CLI changes.
72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
"""Tests to verify that the query parser is Git host agnostic.
|
|
|
|
These tests confirm that ``parse_query`` correctly identifies user/repo pairs and canonical URLs for GitHub, GitLab,
|
|
Bitbucket, Gitea, and Codeberg, even if the host is omitted.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from gitingest.query_parser import parse_query
|
|
from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
|
|
|
|
# Repository matrix: (host, user, repo)
|
|
_REPOS: list[tuple[str, str, str]] = [
|
|
("github.com", "tiangolo", "fastapi"),
|
|
("gitlab.com", "gitlab-org", "gitlab-runner"),
|
|
("bitbucket.org", "na-dna", "llm-knowledge-share"),
|
|
("gitea.com", "xorm", "xorm"),
|
|
("codeberg.org", "forgejo", "forgejo"),
|
|
("git.rwth-aachen.de", "medialab", "19squared"),
|
|
("gitlab.alpinelinux.org", "alpine", "apk-tools"),
|
|
]
|
|
|
|
|
|
# Generate cartesian product of repository tuples with URL variants.
|
|
@pytest.mark.parametrize(("host", "user", "repo"), _REPOS, ids=[f"{h}:{u}/{r}" for h, u, r in _REPOS])
|
|
@pytest.mark.parametrize("variant", ["full", "noscheme", "slug"])
|
|
@pytest.mark.asyncio
|
|
async def test_parse_query_without_host(
|
|
host: str,
|
|
user: str,
|
|
repo: str,
|
|
variant: str,
|
|
) -> None:
|
|
"""Verify that ``parse_query`` handles URLs, host-omitted URLs and raw slugs."""
|
|
# Build the input URL based on the selected variant
|
|
if variant == "full":
|
|
url = f"https://{host}/{user}/{repo}"
|
|
elif variant == "noscheme":
|
|
url = f"{host}/{user}/{repo}"
|
|
else: # "slug"
|
|
url = f"{user}/{repo}"
|
|
|
|
expected_url = f"https://{host}/{user}/{repo}"
|
|
|
|
# For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,
|
|
# because the parser cannot guess which domain to use.
|
|
if variant == "slug" and host not in KNOWN_GIT_HOSTS:
|
|
with pytest.raises(ValueError, match="Could not find a valid repository host"):
|
|
await parse_query(url, max_file_size=50, from_web=True)
|
|
return
|
|
|
|
query = await parse_query(url, max_file_size=50, from_web=True)
|
|
|
|
# Compare against the canonical dict while ignoring unpredictable fields.
|
|
actual = query.model_dump(exclude={"id", "local_path", "ignore_patterns"})
|
|
|
|
expected = {
|
|
"user_name": user,
|
|
"repo_name": repo,
|
|
"url": expected_url,
|
|
"slug": f"{user}-{repo}",
|
|
"subpath": "/",
|
|
"type": None,
|
|
"branch": None,
|
|
"commit": None,
|
|
"max_file_size": 50,
|
|
"include_patterns": None,
|
|
}
|
|
|
|
assert actual == expected
|