feat(parser): relax host validation to support self-hosted GitLab & git.* domains (#314)

• Accept hosts starting with “git.” or “gitlab.” in _looks_like_git_host
• Update doc-strings to document the heuristic
• Adjust git-host-agnostic tests: expect ValueError for slug form with
  custom hosts; add real GitLab instance (git.rwth-aachen.de) to matrix
This commit is contained in:
Filip Christiansen 2025-06-23 20:50:08 +02:00 committed by GitHub
parent 4ee598c406
commit e5fadce158
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 44 additions and 4 deletions

View file

View file

@ -10,6 +10,7 @@ from typing import List, Tuple
import pytest
from gitingest.query_parsing import parse_query
from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
# Repository matrix: (host, user, repo)
_REPOS: List[Tuple[str, str, str]] = [
@ -18,6 +19,8 @@ _REPOS: List[Tuple[str, str, str]] = [
("bitbucket.org", "na-dna", "llm-knowledge-share"),
("gitea.com", "xorm", "xorm"),
("codeberg.org", "forgejo", "forgejo"),
("git.rwth-aachen.de", "medialab", "19squared"),
("gitlab.alpinelinux.org", "alpine", "apk-tools"),
]
@ -43,6 +46,13 @@ async def test_parse_query_without_host(
expected_url = f"https://{host}/{user}/{repo}"
# For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,
# because the parser cannot guess which domain to use.
if variant == "slug" and host not in KNOWN_GIT_HOSTS:
with pytest.raises(ValueError):
await parse_query(url, max_file_size=50, from_web=True)
return
query = await parse_query(url, max_file_size=50, from_web=True)
# Compare against the canonical dict while ignoring unpredictable fields.

View file

@ -24,6 +24,9 @@ URLS_HTTPS: List[str] = [
"https://gitea.com/user/repo",
"https://codeberg.org/user/repo",
"https://gist.github.com/user/repo",
"https://git.example.com/user/repo",
"https://gitlab.example.com/user/repo",
"https://gitlab.example.se/user/repo",
]
URLS_HTTP: List[str] = [url.replace("https://", "http://") for url in URLS_HTTPS]