mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 10:29:30 +00:00
refactor: centralize PAT validation, streamline repo checks & misc cleanup (#349)
* refactor: centralize PAT validation, streamline repo checks & housekeeping
* `.venv*` to `.gitignore`
* `# type: ignore[attr-defined]` hints in `compat_typing.py` for IDE-agnostic imports
* Helpful PAT string in `InvalidGitHubTokenError` for easier debugging
* Bump **ruff-pre-commit** hook → `v0.12.1`
* CONTRIBUTING:
* Require **Python 3.9+**
* Recommend signed (`-S`) commits
* PAT validation now happens **only** in entry points
(`utils.auth.resolve_token` for CLI/lib, `server.process_query` for Web UI)
* Unified `_check_github_repo_exists` into `check_repo_exists`, replacing
`curl -I` with `curl --silent --location --write-out %{http_code} -o /dev/null`
* Broaden `_GITHUB_PAT_PATTERN`
* `create_git_auth_header` raises `ValueError` when hostname is missing
* Tests updated to expect raw HTTP-code output
* Superfluous “token can be set via `GITHUB_TOKEN`” notes in docstrings
* `.gitingestignore` & `.terraform` from `DEFAULT_IGNORE_PATTERNS`
* Token validation inside `create_git_command`
* Obsolete `test_create_git_command_invalid_token`
* Adjust `test_clone.py` and `test_git_utils.py` for new status-code handling
* Consolidate mocks after token-validation relocation
BREAKING CHANGE:
`create_git_command` no longer validates GitHub tokens; callers must ensure
tokens are valid (via `validate_github_token`) before invoking lower-level
git helpers.
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
parent
6fe62cc2da
commit
cea0eddce8
5 changed files with 82 additions and 40 deletions
|
|
@ -151,4 +151,4 @@ repos:
|
|||
- repo: meta
|
||||
hooks:
|
||||
- id: check-hooks-apply
|
||||
- id: check-useless-excludes
|
||||
- id: check-useless-excludes
|
||||
|
|
@ -89,4 +89,4 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
|
|||
|
||||
13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.
|
||||
|
||||
*(Optional) Invite a maintainer to your branch for easier collaboration.*
|
||||
*(Optional) Invite a maintainer to your branch for easier collaboration.*
|
||||
|
|
@ -4,15 +4,27 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
from typing import Final
|
||||
import sys
|
||||
from typing import Final
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND
|
||||
|
||||
from gitingest.utils.compat_func import removesuffix
|
||||
|
||||
from starlette.status import (
|
||||
HTTP_200_OK,
|
||||
HTTP_301_MOVED_PERMANENTLY,
|
||||
HTTP_302_FOUND,
|
||||
HTTP_401_UNAUTHORIZED,
|
||||
HTTP_403_FORBIDDEN,
|
||||
HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
from gitingest.utils.exceptions import InvalidGitHubTokenError
|
||||
from server.server_utils import Colors
|
||||
|
||||
|
|
@ -130,28 +142,46 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool:
|
|||
If the host returns an unrecognised status code.
|
||||
|
||||
"""
|
||||
headers = {}
|
||||
# TODO: use `requests` instead of `curl`
|
||||
cmd: list[str] = [
|
||||
"curl",
|
||||
"--silent",
|
||||
"--location",
|
||||
"--head",
|
||||
"--write-out",
|
||||
"%{http_code}",
|
||||
"-o",
|
||||
os.devnull,
|
||||
]
|
||||
|
||||
if token and is_github_host(url):
|
||||
host, owner, repo = _parse_github_url(url)
|
||||
# Public GitHub vs. GitHub Enterprise
|
||||
base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
|
||||
url = f"{base_api}/repos/{owner}/{repo}"
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
cmd += [f"Authorization: Bearer {token}"]
|
||||
|
||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||
try:
|
||||
response = await client.head(url, headers=headers)
|
||||
except httpx.RequestError:
|
||||
return False
|
||||
cmd.append(url)
|
||||
|
||||
status_code = response.status_code
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, _ = await proc.communicate()
|
||||
|
||||
if status_code == HTTP_200_OK:
|
||||
return True
|
||||
if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}:
|
||||
if proc.returncode != 0:
|
||||
return False
|
||||
msg = f"Unexpected HTTP status {status_code} for {url}"
|
||||
|
||||
status = int(stdout.decode().strip())
|
||||
if status in {HTTP_200_OK, HTTP_301_MOVED_PERMANENTLY}:
|
||||
return True
|
||||
# TODO: handle 302 redirects
|
||||
if status in {HTTP_404_NOT_FOUND, HTTP_302_FOUND}:
|
||||
return False
|
||||
if status in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN}:
|
||||
return False
|
||||
msg = f"Unexpected HTTP status {status} for {url}"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
|
||||
|
|
@ -183,7 +213,7 @@ def _parse_github_url(url: str) -> tuple[str, str, str]:
|
|||
msg = f"Un-recognised GitHub hostname: {parsed.hostname!r}"
|
||||
raise ValueError(msg)
|
||||
|
||||
parts = removesuffix(parsed.path, ".git").strip("/").split("/")
|
||||
parts = parsed.path.strip("/").removesuffix(".git").split("/")
|
||||
expected_path_length = 2
|
||||
if len(parts) != expected_path_length:
|
||||
msg = f"Path must look like /<owner>/<repo>: {parsed.path!r}"
|
||||
|
|
@ -216,28 +246,13 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
|
|||
If the ``ref_type`` parameter is not "branches" or "tags".
|
||||
|
||||
"""
|
||||
if ref_type not in ("branches", "tags"):
|
||||
msg = f"Invalid fetch type: {ref_type}"
|
||||
raise ValueError(msg)
|
||||
|
||||
cmd = ["git"]
|
||||
|
||||
# Add authentication if needed
|
||||
if token and is_github_host(url):
|
||||
cmd += ["-c", create_git_auth_header(token, url=url)]
|
||||
|
||||
cmd += ["ls-remote"]
|
||||
|
||||
fetch_tags = ref_type == "tags"
|
||||
to_fetch = "tags" if fetch_tags else "heads"
|
||||
|
||||
cmd += [f"--{to_fetch}"]
|
||||
|
||||
# `--refs` filters out the peeled tag objects (those ending with "^{}") (for tags)
|
||||
if fetch_tags:
|
||||
cmd += ["--refs"]
|
||||
|
||||
cmd += [url]
|
||||
cmd += ["ls-remote", "--heads", url]
|
||||
|
||||
await ensure_git_installed()
|
||||
stdout, _ = await run_command(*cmd)
|
||||
|
|
@ -246,9 +261,9 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str |
|
|||
# - Skip empty lines and lines that don't contain "refs/{to_fetch}/"
|
||||
# - Extract the branch or tag name after "refs/{to_fetch}/"
|
||||
return [
|
||||
line.split(f"refs/{to_fetch}/", 1)[1]
|
||||
line.split("refs/heads/", 1)[1]
|
||||
for line in stdout.decode().splitlines()
|
||||
if line.strip() and f"refs/{to_fetch}/" in line
|
||||
if line.strip() and "refs/heads/" in line
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,12 @@ from gitingest.clone import clone_repo
|
|||
from gitingest.ingestion import ingest_query
|
||||
from gitingest.query_parser import IngestionQuery, parse_query
|
||||
from gitingest.utils.git_utils import validate_github_token
|
||||
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse
|
||||
from server.server_config import MAX_DISPLAY_SIZE
|
||||
from server.server_config import (
|
||||
DEFAULT_FILE_SIZE_KB,
|
||||
EXAMPLE_REPOS,
|
||||
MAX_DISPLAY_SIZE,
|
||||
templates,
|
||||
)
|
||||
from server.server_utils import Colors, log_slider_to_size
|
||||
|
||||
|
||||
|
|
@ -63,6 +67,8 @@ async def process_query(
|
|||
if token:
|
||||
validate_github_token(token)
|
||||
|
||||
template = "index.jinja" if is_index else "git.jinja"
|
||||
template_response = partial(templates.TemplateResponse, name=template)
|
||||
max_file_size = log_slider_to_size(slider_position)
|
||||
|
||||
query: IngestionQuery | None = None
|
||||
|
|
@ -99,7 +105,10 @@ async def process_query(
|
|||
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
|
||||
print(f"{Colors.RED}{exc}{Colors.END}")
|
||||
|
||||
return IngestErrorResponse(error=str(exc), repo_url=short_repo_url)
|
||||
context["error_message"] = f"Error: {exc}"
|
||||
if "405" in str(exc):
|
||||
context["error_message"] = "Repository not found. Please make sure it is public."
|
||||
return template_response(context=context)
|
||||
|
||||
if len(content) > MAX_DISPLAY_SIZE:
|
||||
content = (
|
||||
|
|
|
|||
|
|
@ -91,10 +91,9 @@ async def test_clone_nonexistent_repository(repo_exists_true: AsyncMock) -> None
|
|||
@pytest.mark.parametrize(
|
||||
("status_code", "expected"),
|
||||
[
|
||||
(HTTP_200_OK, True),
|
||||
(HTTP_401_UNAUTHORIZED, False),
|
||||
(HTTP_403_FORBIDDEN, False),
|
||||
(HTTP_404_NOT_FOUND, False),
|
||||
(b"200\n", 0, True), # Existing repo
|
||||
(b"404\n", 0, False), # Non-existing repo
|
||||
(b"200\n", 1, False), # Failed request
|
||||
],
|
||||
)
|
||||
async def test_check_repo_exists(status_code: int, *, expected: bool, mocker: MockerFixture) -> None:
|
||||
|
|
@ -209,6 +208,25 @@ async def test_check_repo_exists_with_redirect(mocker: MockerFixture) -> None:
|
|||
assert repo_exists is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_repo_exists_with_permanent_redirect(mocker: MockerFixture) -> None:
|
||||
"""Test ``check_repo_exists`` when a permanent redirect (301) is returned.
|
||||
|
||||
Given a URL that responds with "301 Found":
|
||||
When ``check_repo_exists`` is called,
|
||||
Then it should return ``True``, indicating the repo may exist at the new location.
|
||||
"""
|
||||
mock_exec = mocker.patch("asyncio.create_subprocess_exec", new_callable=AsyncMock)
|
||||
mock_process = AsyncMock()
|
||||
mock_process.communicate.return_value = (b"301\n", b"")
|
||||
mock_process.returncode = 0 # Simulate successful request
|
||||
mock_exec.return_value = mock_process
|
||||
|
||||
repo_exists = await check_repo_exists(DEMO_URL)
|
||||
|
||||
assert repo_exists
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clone_with_timeout(run_command_mock: AsyncMock) -> None:
|
||||
"""Test cloning a repository when a timeout occurs.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue