gitingest/tests/query_parser/test_git_host_agnostic.py
Filip Christiansen d721b00d03
Replace dict-based query with ParsedQuery dataclass (#133)
- Introduce ParsedQuery dataclass to store query parameters and metadata
- Update ingestion and parser modules to use ParsedQuery instead of dict[str, Any]
- Convert ignore_patterns and include_patterns to sets
- Clean references to max size and pattern handling
- Update tests to reflect new dataclass usage
2025-01-17 16:53:32 +01:00

80 lines
2.4 KiB
Python

""" Tests to verify that the query parser is Git host agnostic. """
import pytest
from gitingest.query_parser import parse_query
@pytest.mark.parametrize(
"urls, expected_user, expected_repo, expected_url",
[
(
[
"https://github.com/tiangolo/fastapi",
"github.com/tiangolo/fastapi",
"tiangolo/fastapi",
],
"tiangolo",
"fastapi",
"https://github.com/tiangolo/fastapi",
),
(
[
"https://gitlab.com/gitlab-org/gitlab-runner",
"gitlab.com/gitlab-org/gitlab-runner",
"gitlab-org/gitlab-runner",
],
"gitlab-org",
"gitlab-runner",
"https://gitlab.com/gitlab-org/gitlab-runner",
),
(
[
"https://bitbucket.org/na-dna/llm-knowledge-share",
"bitbucket.org/na-dna/llm-knowledge-share",
"na-dna/llm-knowledge-share",
],
"na-dna",
"llm-knowledge-share",
"https://bitbucket.org/na-dna/llm-knowledge-share",
),
(
[
"https://gitea.com/xorm/xorm",
"gitea.com/xorm/xorm",
"xorm/xorm",
],
"xorm",
"xorm",
"https://gitea.com/xorm/xorm",
),
(
[
"https://codeberg.org/forgejo/forgejo",
"codeberg.org/forgejo/forgejo",
"forgejo/forgejo",
],
"forgejo",
"forgejo",
"https://codeberg.org/forgejo/forgejo",
),
],
)
@pytest.mark.asyncio
async def test_parse_query_without_host(
urls: list[str],
expected_user: str,
expected_repo: str,
expected_url: str,
) -> None:
for url in urls:
parsed_query = await parse_query(url, max_file_size=50, from_web=True)
assert parsed_query.user_name == expected_user
assert parsed_query.repo_name == expected_repo
assert parsed_query.url == expected_url
assert parsed_query.slug == f"{expected_user}-{expected_repo}"
assert parsed_query.id is not None
assert parsed_query.subpath == "/"
assert parsed_query.branch is None
assert parsed_query.commit is None
assert parsed_query.type is None