gitingest/tests/query_parser/test_git_host_agnostic.py
Filip Christiansen dd8f1e0aac
feat: enhance parser domain-agnostic support (#117)
* feat: make parser domain-agnostic to support multiple Git hosts

- added list of known domains/Git hosts in `query_parser.py`
- fixed bug from [#115](https://github.com/cyclotruc/gitingest/pull/115): corrected case handling for URL components—scheme, domain, username, and repository are case-insensitive, but paths beyond (e.g., file names, branches) are case-sensitive
- implemented `try_domains_for_user_and_repo` in `query_parser.py` to iteratively guess the correct domain until success or supported hosts are exhausted
- added helper functions `_get_user_and_repo_from_path`, `_validate_host`, and `_validate_scheme` in `query_parser.py`
- extended `_parse_repo_source` in `query_parser.py` to be Git host agnostic by using `try_domains_for_user_and_repo`
- added tests `test_parse_url_unsupported_host` and `test_parse_query_with_branch` in `test_query_parser.py`
- created new file `test_git_host_agnostic.py` to verify domain/Git host agnostic behavior
2025-01-13 05:46:29 +01:00

81 lines
2.4 KiB
Python

""" Tests to verify that the query parser is Git host agnostic. """
import pytest
from gitingest.query_parser import parse_query
@pytest.mark.parametrize(
"urls, expected_user, expected_repo, expected_url",
[
(
[
"https://github.com/tiangolo/fastapi",
"github.com/tiangolo/fastapi",
"tiangolo/fastapi",
],
"tiangolo",
"fastapi",
"https://github.com/tiangolo/fastapi",
),
(
[
"https://gitlab.com/gitlab-org/gitlab-runner",
"gitlab.com/gitlab-org/gitlab-runner",
"gitlab-org/gitlab-runner",
],
"gitlab-org",
"gitlab-runner",
"https://gitlab.com/gitlab-org/gitlab-runner",
),
(
[
"https://bitbucket.org/na-dna/llm-knowledge-share",
"bitbucket.org/na-dna/llm-knowledge-share",
"na-dna/llm-knowledge-share",
],
"na-dna",
"llm-knowledge-share",
"https://bitbucket.org/na-dna/llm-knowledge-share",
),
(
[
"https://gitea.com/xorm/xorm",
"gitea.com/xorm/xorm",
"xorm/xorm",
],
"xorm",
"xorm",
"https://gitea.com/xorm/xorm",
),
(
[
"https://codeberg.org/forgejo/forgejo",
"codeberg.org/forgejo/forgejo",
"forgejo/forgejo",
],
"forgejo",
"forgejo",
"https://codeberg.org/forgejo/forgejo",
),
],
)
@pytest.mark.asyncio
async def test_parse_query_without_host(
urls: list[str],
expected_user: str,
expected_repo: str,
expected_url: str,
) -> None:
for url in urls:
result = await parse_query(url, max_file_size=50, from_web=True)
# Common assertions for all cases
assert result["user_name"] == expected_user
assert result["repo_name"] == expected_repo
assert result["url"] == expected_url
assert result["slug"] == f"{expected_user}-{expected_repo}"
assert result["id"] is not None
assert result["subpath"] == "/"
assert result["branch"] is None
assert result["commit"] is None
assert result["type"] is None