mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 11:41:35 +00:00
feat(parser): relax host validation to support self-hosted GitLab & git.* domains (#314)
• Accept hosts starting with “git.” or “gitlab.” in _looks_like_git_host • Update doc-strings to document the heuristic • Adjust git-host-agnostic tests: expect ValueError for slug form with custom hosts; add real GitLab instance (git.rwth-aachen.de) to matrix
This commit is contained in:
parent
4ee598c406
commit
e5fadce158
4 changed files with 44 additions and 4 deletions
0
tests/query_parser/__init__.py
Normal file
0
tests/query_parser/__init__.py
Normal file
|
|
@ -10,6 +10,7 @@ from typing import List, Tuple
|
|||
import pytest
|
||||
|
||||
from gitingest.query_parsing import parse_query
|
||||
from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
|
||||
|
||||
# Repository matrix: (host, user, repo)
|
||||
_REPOS: List[Tuple[str, str, str]] = [
|
||||
|
|
@ -18,6 +19,8 @@ _REPOS: List[Tuple[str, str, str]] = [
|
|||
("bitbucket.org", "na-dna", "llm-knowledge-share"),
|
||||
("gitea.com", "xorm", "xorm"),
|
||||
("codeberg.org", "forgejo", "forgejo"),
|
||||
("git.rwth-aachen.de", "medialab", "19squared"),
|
||||
("gitlab.alpinelinux.org", "alpine", "apk-tools"),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -43,6 +46,13 @@ async def test_parse_query_without_host(
|
|||
|
||||
expected_url = f"https://{host}/{user}/{repo}"
|
||||
|
||||
# For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,
|
||||
# because the parser cannot guess which domain to use.
|
||||
if variant == "slug" and host not in KNOWN_GIT_HOSTS:
|
||||
with pytest.raises(ValueError):
|
||||
await parse_query(url, max_file_size=50, from_web=True)
|
||||
return
|
||||
|
||||
query = await parse_query(url, max_file_size=50, from_web=True)
|
||||
|
||||
# Compare against the canonical dict while ignoring unpredictable fields.
|
||||
|
|
|
|||
|
|
@ -24,6 +24,9 @@ URLS_HTTPS: List[str] = [
|
|||
"https://gitea.com/user/repo",
|
||||
"https://codeberg.org/user/repo",
|
||||
"https://gist.github.com/user/repo",
|
||||
"https://git.example.com/user/repo",
|
||||
"https://gitlab.example.com/user/repo",
|
||||
"https://gitlab.example.se/user/repo",
|
||||
]
|
||||
|
||||
URLS_HTTP: List[str] = [url.replace("https://", "http://") for url in URLS_HTTPS]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue