diff --git a/.codecov.yaml b/.codecov.yaml deleted file mode 100644 index dc46984..0000000 --- a/.codecov.yaml +++ /dev/null @@ -1,3 +0,0 @@ -comment: false -github_checks: - annotations: false diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 73efd68..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,22 +0,0 @@ -version: 2 -updates: - # ─── Python (pip) ───────────────────────────── - - package-ecosystem: "pip" - directory: "/" - schedule: { interval: "weekly" } - labels: [ "dependencies", "pip" ] - groups: # Group patches & minors from dev-only tools - dev-py: - dependency-type: "development" - update-types: ["minor", "patch"] - - # ─── GitHub Actions ─────────────────────────── - - package-ecosystem: "github-actions" - directory: "/" - schedule: { interval: "weekly" } - labels: [ "dependencies", "gh-actions" ] - - - package-ecosystem: docker - directory: / - schedule: - interval: daily diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed49893..3ab7110 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.8", "3.13"] include: - os: ubuntu-latest python-version: "3.13" @@ -32,7 +32,7 @@ jobs: with: egress-policy: audit - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Set up Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 @@ -57,26 +57,11 @@ jobs: if: ${{ matrix.coverage != true }} run: pytest - - name: Run tests and collect coverage + - name: Run tests if: ${{ matrix.coverage == true }} - run: | - pytest \ - --cov=gitingest \ - --cov=server \ - --cov-branch \ - --cov-report=xml \ - --cov-report=term + run: pytest + - - name: Upload coverage to Codecov - if: ${{ matrix.coverage == true }} - uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 - with: - token: ${{ secrets.CODECOV_TOKEN }} - files: coverage.xml - flags: ${{ matrix.os }}-py${{ matrix.python-version }} - name: codecov-${{ matrix.os }}-${{ matrix.python-version }} - fail_ci_if_error: true - verbose: true - name: Run pre-commit hooks uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 781e541..ba432f1 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -46,11 +46,11 @@ jobs: egress-policy: audit - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5 + uses: github/codeql-action/init@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -60,7 +60,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5 + uses: github/codeql-action/autobuild@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -73,6 +73,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5 + uses: github/codeql-action/analyze@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 71ddc65..adc5366 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -22,6 +22,6 @@ jobs: egress-policy: audit - name: 'Checkout Repository' - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: 'Dependency Review' uses: actions/dependency-review-action@da24556b548a50705dd671f47852072ea4c105d9 # v4.7.1 diff --git a/.github/workflows/deploy-pr.yml b/.github/workflows/deploy-pr.yml index de002b8..fb3400a 100644 --- a/.github/workflows/deploy-pr.yml +++ b/.github/workflows/deploy-pr.yml @@ -32,7 +32,7 @@ jobs: repositories: '${{ env.FLUX_REPO }}' - name: Checkout Flux repo - uses: actions/checkout@v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: repository: '${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}' token: '${{ steps.app-token.outputs.token }}' @@ -120,7 +120,7 @@ jobs: repositories: '${{ env.FLUX_REPO }}' - name: Checkout Flux repo - uses: actions/checkout@v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: repository: '${{ env.FLUX_OWNER }}/${{ env.FLUX_REPO }}' token: '${{ steps.app-token.outputs.token }}' diff --git a/.github/workflows/docker-build.ecr.yml b/.github/workflows/docker-build.ecr.yml index 0a819e1..9424837 100644 --- a/.github/workflows/docker-build.ecr.yml +++ b/.github/workflows/docker-build.ecr.yml @@ -32,7 +32,9 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v4 @@ -46,6 +48,32 @@ jobs: run: | echo "timestamp=$(date +%s)" >> $GITHUB_OUTPUT echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + echo "sha_full=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + + - name: Determine version and deployment context + id: version + run: | + REPO_URL="https://github.com/${{ github.repository }}" + + if [[ "${{ github.ref_type }}" == "tag" ]]; then + # Tag deployment - display version, link to release + echo "version=${{ github.ref_name }}" >> $GITHUB_OUTPUT + echo "app_version=${{ github.ref_name }}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/releases/tag/${{ github.ref_name }}" >> $GITHUB_OUTPUT + elif [[ "${{ github.event_name }}" == "pull_request" ]]; then + # PR deployment - display pr-XXX, link to PR commit + PR_NUMBER="${{ github.event.pull_request.number }}" + COMMIT_HASH="${{ steps.vars.outputs.sha_full }}" + echo "version=${PR_NUMBER}/merge-${COMMIT_HASH}" >> $GITHUB_OUTPUT + echo "app_version=pr-${PR_NUMBER}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/pull/${PR_NUMBER}/commits/${COMMIT_HASH}" >> $GITHUB_OUTPUT + else + # Branch deployment - display branch name, link to commit + BRANCH_NAME="${{ github.ref_name }}" + COMMIT_HASH="${{ steps.vars.outputs.sha_full }}" + echo "app_version=${BRANCH_NAME}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/commit/${COMMIT_HASH}" >> $GITHUB_OUTPUT + fi - name: Login to Amazon ECR id: login-ecr @@ -78,5 +106,9 @@ jobs: push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + APP_REPOSITORY=https://github.com/${{ github.repository }} + APP_VERSION=${{ steps.version.outputs.app_version }} + APP_VERSION_URL=${{ steps.version.outputs.app_version_url }} cache-from: type=gha cache-to: type=gha,mode=max diff --git a/.github/workflows/docker-build.ghcr.yml b/.github/workflows/docker-build.ghcr.yml index de72fba..a108293 100644 --- a/.github/workflows/docker-build.ghcr.yml +++ b/.github/workflows/docker-build.ghcr.yml @@ -43,16 +43,44 @@ jobs: with: egress-policy: audit - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - name: Set current timestamp id: vars run: | echo "timestamp=$(date +%s)" >> $GITHUB_OUTPUT echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + echo "sha_full=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT + + - name: Determine version and deployment context + id: version + run: | + REPO_URL="https://github.com/${{ github.repository }}" + + if [[ "${{ github.ref_type }}" == "tag" ]]; then + # Tag deployment - display version, link to release + echo "version=${{ github.ref_name }}" >> $GITHUB_OUTPUT + echo "app_version=${{ github.ref_name }}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/releases/tag/${{ github.ref_name }}" >> $GITHUB_OUTPUT + elif [[ "${{ github.event_name }}" == "pull_request" ]]; then + # PR deployment - display pr-XXX, link to PR commit + PR_NUMBER="${{ github.event.pull_request.number }}" + COMMIT_HASH="${{ steps.vars.outputs.sha_full }}" + echo "version=${PR_NUMBER}/merge-${COMMIT_HASH}" >> $GITHUB_OUTPUT + echo "app_version=pr-${PR_NUMBER}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/pull/${PR_NUMBER}/commits/${COMMIT_HASH}" >> $GITHUB_OUTPUT + else + # Branch deployment - display branch name, link to commit + BRANCH_NAME="${{ github.ref_name }}" + COMMIT_HASH="${{ steps.vars.outputs.sha_full }}" + echo "app_version=${BRANCH_NAME}" >> $GITHUB_OUTPUT + echo "app_version_url=${REPO_URL}/commit/${COMMIT_HASH}" >> $GITHUB_OUTPUT + fi - name: Log in to the Container registry - uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -60,7 +88,7 @@ jobs: - name: Docker Meta id: meta - uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 + uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 with: images: | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} @@ -87,6 +115,10 @@ jobs: push: ${{ github.event_name != 'pull_request' || env.PUSH_FROM_PR == 'true' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + APP_REPOSITORY=https://github.com/${{ github.repository }} + APP_VERSION=${{ steps.version.outputs.app_version }} + APP_VERSION_URL=${{ steps.version.outputs.app_version_url }} cache-from: type=gha cache-to: type=gha,mode=max diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 51a6ff5..cc11f10 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -18,7 +18,7 @@ jobs: with: egress-policy: audit - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Set up Python 3.13 uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 @@ -54,7 +54,7 @@ jobs: with: egress-policy: audit - - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 with: name: dist path: dist/ diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 29e0f9b..603a28a 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -12,7 +12,7 @@ jobs: release: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Create GitHub App token uses: actions/create-github-app-token@v2 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 8025b93..844860b 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -27,18 +27,18 @@ jobs: egress-policy: audit - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: persist-credentials: false - name: Run Scorecard - uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde + uses: ossf/scorecard-action@f35c64557cf912815708bb1126d9948f3e459487 with: results_file: results.sarif results_format: sarif publish_results: true # enables the public badge - name: Upload to code-scanning - uses: github/codeql-action/upload-sarif@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5 + uses: github/codeql-action/upload-sarif@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9 with: sarif_file: results.sarif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8556083..3fcfb61 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -117,6 +117,7 @@ repos: boto3>=1.28.0, click>=8.0.0, 'fastapi[standard]>=0.109.1', + gitpython>=3.1.0, httpx, loguru>=0.7.0, pathspec>=0.12.1, @@ -144,6 +145,7 @@ repos: boto3>=1.28.0, click>=8.0.0, 'fastapi[standard]>=0.109.1', + gitpython>=3.1.0, httpx, loguru>=0.7.0, pathspec>=0.12.1, diff --git a/Dockerfile b/Dockerfile index d686922..0ae5285 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,16 +13,22 @@ COPY src/ ./src/ RUN set -eux; \ pip install --no-cache-dir --upgrade pip; \ - pip install --no-cache-dir --timeout 1000 .[server] + pip install --no-cache-dir --timeout 1000 .[server,mcp] # Stage 2: Runtime image FROM python:3.13.5-slim@sha256:4c2cf9917bd1cbacc5e9b07320025bdb7cdf2df7b0ceaccb55e9dd7e30987419 ARG UID=1000 ARG GID=1000 +ARG APP_REPOSITORY=https://github.com/coderamp-labs/gitingest +ARG APP_VERSION=unknown +ARG APP_VERSION_URL=https://github.com/coderamp-labs/gitingest ENV PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 + PYTHONDONTWRITEBYTECODE=1 \ + APP_REPOSITORY=${APP_REPOSITORY} \ + APP_VERSION=${APP_VERSION} \ + APP_VERSION_URL=${APP_VERSION_URL} RUN set -eux; \ apt-get update; \ diff --git a/README.md b/README.md index a31c780..f16e612 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@
CI - + Ruff OpenSSF Scorecard
diff --git a/pyproject.toml b/pyproject.toml index aa17bd7..36219fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = {file = "README.md", content-type = "text/markdown" } requires-python = ">= 3.8" dependencies = [ "click>=8.0.0", + "gitpython>=3.1.0", "httpx", "loguru>=0.7.0", "pathspec>=0.12.1", @@ -40,7 +41,6 @@ dev = [ "pre-commit", "pytest", "pytest-asyncio", - "pytest-cov", "pytest-mock", ] diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..5db72dd --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ] +} diff --git a/src/gitingest/clone.py b/src/gitingest/clone.py index d05381b..9999fcd 100644 --- a/src/gitingest/clone.py +++ b/src/gitingest/clone.py @@ -5,16 +5,17 @@ from __future__ import annotations from pathlib import Path from typing import TYPE_CHECKING +import git + from gitingest.config import DEFAULT_TIMEOUT from gitingest.utils.git_utils import ( check_repo_exists, checkout_partial_clone, - create_git_auth_header, - create_git_command, + create_git_repo, ensure_git_installed, + git_auth_context, is_github_host, resolve_commit, - run_command, ) from gitingest.utils.logging_config import get_logger from gitingest.utils.os_utils import ensure_directory_exists_or_create @@ -46,6 +47,8 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None: ------ ValueError If the repository is not found, if the provided URL is invalid, or if the token format is invalid. + RuntimeError + If Git operations fail during the cloning process. """ # Extract and validate query parameters @@ -83,20 +86,34 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None: commit = await resolve_commit(config, token=token) logger.debug("Resolved commit", extra={"commit": commit}) - clone_cmd = ["git"] - if token and is_github_host(url): - clone_cmd += ["-c", create_git_auth_header(token, url=url)] + # Clone the repository using GitPython with proper authentication + logger.info("Executing git clone operation", extra={"url": "", "local_path": local_path}) + try: + clone_kwargs = { + "single_branch": True, + "no_checkout": True, + "depth": 1, + } - clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"] - if partial_clone: - clone_cmd += ["--filter=blob:none", "--sparse"] + with git_auth_context(url, token) as (git_cmd, auth_url): + if partial_clone: + # For partial clones, use git.Git() with filter and sparse options + cmd_args = ["--single-branch", "--no-checkout", "--depth=1"] + cmd_args.extend(["--filter=blob:none", "--sparse"]) + cmd_args.extend([auth_url, local_path]) + git_cmd.clone(*cmd_args) + elif token and is_github_host(url): + # For authenticated GitHub repos, use git_cmd with auth URL + cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path] + git_cmd.clone(*cmd_args) + else: + # For non-authenticated repos, use the standard GitPython method + git.Repo.clone_from(url, local_path, **clone_kwargs) - clone_cmd += [url, local_path] - - # Clone the repository - logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "", local_path])}) - await run_command(*clone_cmd) - logger.info("Git clone completed successfully") + logger.info("Git clone completed successfully") + except git.GitCommandError as exc: + msg = f"Git clone failed: {exc}" + raise RuntimeError(msg) from exc # Checkout the subpath if it is a partial clone if partial_clone: @@ -104,20 +121,56 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None: await checkout_partial_clone(config, token=token) logger.debug("Partial clone setup completed") - git = create_git_command(["git"], local_path, url, token) - - # Ensure the commit is locally available - logger.debug("Fetching specific commit", extra={"commit": commit}) - await run_command(*git, "fetch", "--depth=1", "origin", commit) - - # Write the work-tree at that commit - logger.info("Checking out commit", extra={"commit": commit}) - await run_command(*git, "checkout", commit) - - # Update submodules - if config.include_submodules: - logger.info("Updating submodules") - await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1") - logger.debug("Submodules updated successfully") + # Perform post-clone operations + await _perform_post_clone_operations(config, local_path, url, token, commit) logger.info("Git clone operation completed successfully", extra={"local_path": local_path}) + + +async def _perform_post_clone_operations( + config: CloneConfig, + local_path: str, + url: str, + token: str | None, + commit: str, +) -> None: + """Perform post-clone operations like fetching, checkout, and submodule updates. + + Parameters + ---------- + config : CloneConfig + The configuration for cloning the repository. + local_path : str + The local path where the repository was cloned. + url : str + The repository URL. + token : str | None + GitHub personal access token (PAT) for accessing private repositories. + commit : str + The commit SHA to checkout. + + Raises + ------ + RuntimeError + If any Git operation fails. + + """ + try: + repo = create_git_repo(local_path, url, token) + + # Ensure the commit is locally available + logger.debug("Fetching specific commit", extra={"commit": commit}) + repo.git.fetch("--depth=1", "origin", commit) + + # Write the work-tree at that commit + logger.info("Checking out commit", extra={"commit": commit}) + repo.git.checkout(commit) + + # Update submodules + if config.include_submodules: + logger.info("Updating submodules") + repo.git.submodule("update", "--init", "--recursive", "--depth=1") + logger.debug("Submodules updated successfully") + except git.GitCommandError as exc: + msg = f"Git operation failed: {exc}" + raise RuntimeError(msg) from exc diff --git a/src/gitingest/utils/git_utils.py b/src/gitingest/utils/git_utils.py index daf4056..85fbccf 100644 --- a/src/gitingest/utils/git_utils.py +++ b/src/gitingest/utils/git_utils.py @@ -6,12 +6,12 @@ import asyncio import base64 import re import sys +from contextlib import contextmanager from pathlib import Path -from typing import TYPE_CHECKING, Final, Iterable -from urllib.parse import urlparse +from typing import TYPE_CHECKING, Final, Generator, Iterable +from urllib.parse import urlparse, urlunparse -import httpx -from starlette.status import HTTP_200_OK, HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND +import git from gitingest.utils.compat_func import removesuffix from gitingest.utils.exceptions import InvalidGitHubTokenError @@ -50,6 +50,9 @@ def is_github_host(url: str) -> bool: async def run_command(*args: str) -> tuple[bytes, bytes]: """Execute a shell command asynchronously and return (stdout, stderr) bytes. + This function is kept for backward compatibility with non-git commands. + Git operations should use GitPython directly. + Parameters ---------- *args : str @@ -92,21 +95,27 @@ async def ensure_git_installed() -> None: """ try: - await run_command("git", "--version") - except RuntimeError as exc: + # Use GitPython to check git availability + git_cmd = git.Git() + git_cmd.version() + except git.GitCommandError as exc: msg = "Git is not installed or not accessible. Please install Git first." raise RuntimeError(msg) from exc + except Exception as exc: + msg = "Git is not installed or not accessible. Please install Git first." + raise RuntimeError(msg) from exc + if sys.platform == "win32": try: - stdout, _ = await run_command("git", "config", "core.longpaths") - if stdout.decode().strip().lower() != "true": + longpaths_value = git_cmd.config("core.longpaths") + if longpaths_value.lower() != "true": logger.warning( "Git clone may fail on Windows due to long file paths. " "Consider enabling long path support with: 'git config --global core.longpaths true'. " "Note: This command may require administrator privileges.", extra={"platform": "windows", "longpaths_enabled": False}, ) - except RuntimeError: + except git.GitCommandError: # Ignore if checking 'core.longpaths' fails. pass @@ -126,35 +135,15 @@ async def check_repo_exists(url: str, token: str | None = None) -> bool: bool ``True`` if the repository exists, ``False`` otherwise. - Raises - ------ - RuntimeError - If the host returns an unrecognised status code. - """ - headers = {} - - if token and is_github_host(url): - host, owner, repo = _parse_github_url(url) - # Public GitHub vs. GitHub Enterprise - base_api = "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3" - url = f"{base_api}/repos/{owner}/{repo}" - headers["Authorization"] = f"Bearer {token}" - - async with httpx.AsyncClient(follow_redirects=True) as client: - try: - response = await client.head(url, headers=headers) - except httpx.RequestError: - return False - - status_code = response.status_code - - if status_code == HTTP_200_OK: - return True - if status_code in {HTTP_401_UNAUTHORIZED, HTTP_403_FORBIDDEN, HTTP_404_NOT_FOUND}: + try: + # Try to resolve HEAD - if repo exists, this will work + await _resolve_ref_to_sha(url, "HEAD", token=token) + except (ValueError, Exception): + # Repository doesn't exist, is private without proper auth, or other error return False - msg = f"Unexpected HTTP status {status_code} for {url}" - raise RuntimeError(msg) + + return True def _parse_github_url(url: str) -> tuple[str, str, str]: @@ -216,52 +205,51 @@ async def fetch_remote_branches_or_tags(url: str, *, ref_type: str, token: str | ------ ValueError If the ``ref_type`` parameter is not "branches" or "tags". + RuntimeError + If fetching branches or tags from the remote repository fails. """ if ref_type not in ("branches", "tags"): msg = f"Invalid fetch type: {ref_type}" raise ValueError(msg) - cmd = ["git"] - - # Add authentication if needed - if token and is_github_host(url): - cmd += ["-c", create_git_auth_header(token, url=url)] - - cmd += ["ls-remote"] - - fetch_tags = ref_type == "tags" - to_fetch = "tags" if fetch_tags else "heads" - - cmd += [f"--{to_fetch}"] - - # `--refs` filters out the peeled tag objects (those ending with "^{}") (for tags) - if fetch_tags: - cmd += ["--refs"] - - cmd += [url] - await ensure_git_installed() - stdout, _ = await run_command(*cmd) - # For each line in the output: - # - Skip empty lines and lines that don't contain "refs/{to_fetch}/" - # - Extract the branch or tag name after "refs/{to_fetch}/" - return [ - line.split(f"refs/{to_fetch}/", 1)[1] - for line in stdout.decode().splitlines() - if line.strip() and f"refs/{to_fetch}/" in line - ] + + # Use GitPython to get remote references + try: + fetch_tags = ref_type == "tags" + to_fetch = "tags" if fetch_tags else "heads" + + # Build ls-remote command + cmd_args = [f"--{to_fetch}"] + if fetch_tags: + cmd_args.append("--refs") # Filter out peeled tag objects + cmd_args.append(url) + + # Run the command with proper authentication + with git_auth_context(url, token) as (git_cmd, auth_url): + # Replace the URL in cmd_args with the authenticated URL + cmd_args[-1] = auth_url # URL is the last argument + output = git_cmd.ls_remote(*cmd_args) + + # Parse output + return [ + line.split(f"refs/{to_fetch}/", 1)[1] + for line in output.splitlines() + if line.strip() and f"refs/{to_fetch}/" in line + ] + except git.GitCommandError as exc: + msg = f"Failed to fetch {ref_type} from {url}: {exc}" + raise RuntimeError(msg) from exc -def create_git_command(base_cmd: list[str], local_path: str, url: str, token: str | None = None) -> list[str]: - """Create a git command with authentication if needed. +def create_git_repo(local_path: str, url: str, token: str | None = None) -> git.Repo: + """Create a GitPython Repo object with authentication if needed. Parameters ---------- - base_cmd : list[str] - The base git command to start with. local_path : str - The local path where the git command should be executed. + The local path where the git repository is located. url : str The repository URL to check if it's a GitHub repository. token : str | None @@ -269,14 +257,30 @@ def create_git_command(base_cmd: list[str], local_path: str, url: str, token: st Returns ------- - list[str] - The git command with authentication if needed. + git.Repo + A GitPython Repo object configured with authentication. + + Raises + ------ + ValueError + If the local path is not a valid git repository. """ - cmd = [*base_cmd, "-C", local_path] - if token and is_github_host(url): - cmd += ["-c", create_git_auth_header(token, url=url)] - return cmd + try: + repo = git.Repo(local_path) + + # Configure authentication if needed + if token and is_github_host(url): + auth_header = create_git_auth_header(token, url=url) + # Set the auth header in git config for this repo + key, value = auth_header.split("=", 1) + repo.git.config(key, value) + + except git.InvalidGitRepositoryError as exc: + msg = f"Invalid git repository at {local_path}" + raise ValueError(msg) from exc + + return repo def create_git_auth_header(token: str, url: str = "https://github.com") -> str: @@ -310,6 +314,70 @@ def create_git_auth_header(token: str, url: str = "https://github.com") -> str: return f"http.https://{hostname}/.extraheader=Authorization: Basic {basic}" +def create_authenticated_url(url: str, token: str | None = None) -> str: + """Create an authenticated URL for Git operations. + + This is the safest approach for multi-user environments - no global state. + + Parameters + ---------- + url : str + The repository URL. + token : str | None + GitHub personal access token (PAT) for accessing private repositories. + + Returns + ------- + str + The URL with authentication embedded (for GitHub) or original URL. + + """ + if not (token and is_github_host(url)): + return url + + parsed = urlparse(url) + # Add token as username in URL (GitHub supports this) + netloc = f"x-oauth-basic:{token}@{parsed.hostname}" + if parsed.port: + netloc += f":{parsed.port}" + + return urlunparse( + ( + parsed.scheme, + netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ), + ) + + +@contextmanager +def git_auth_context(url: str, token: str | None = None) -> Generator[tuple[git.Git, str]]: + """Context manager that provides Git command and authenticated URL. + + Returns both a Git command object and the authenticated URL to use. + This avoids any global state contamination between users. + + Parameters + ---------- + url : str + The repository URL to check if authentication is needed. + token : str | None + GitHub personal access token (PAT) for accessing private repositories. + + Yields + ------ + Generator[tuple[git.Git, str]] + Tuple of (Git command object, authenticated URL to use). + + """ + git_cmd = git.Git() + auth_url = create_authenticated_url(url, token) + yield git_cmd, auth_url + + def validate_github_token(token: str) -> None: """Validate the format of a GitHub Personal Access Token. @@ -338,13 +406,23 @@ async def checkout_partial_clone(config: CloneConfig, token: str | None) -> None token : str | None GitHub personal access token (PAT) for accessing private repositories. + Raises + ------ + RuntimeError + If the sparse-checkout configuration fails. + """ subpath = config.subpath.lstrip("/") if config.blob: # Remove the file name from the subpath when ingesting from a file url (e.g. blob/branch/path/file.txt) subpath = str(Path(subpath).parent.as_posix()) - checkout_cmd = create_git_command(["git"], config.local_path, config.url, token) - await run_command(*checkout_cmd, "sparse-checkout", "set", subpath) + + try: + repo = create_git_repo(config.local_path, config.url, token) + repo.git.sparse_checkout("set", subpath) + except git.GitCommandError as exc: + msg = f"Failed to configure sparse-checkout: {exc}" + raise RuntimeError(msg) from exc async def resolve_commit(config: CloneConfig, token: str | None) -> str: @@ -400,18 +478,20 @@ async def _resolve_ref_to_sha(url: str, pattern: str, token: str | None = None) If the ref does not exist in the remote repository. """ - # Build: git [-c http./.extraheader=Auth...] ls-remote - cmd: list[str] = ["git"] - if token and is_github_host(url): - cmd += ["-c", create_git_auth_header(token, url=url)] + try: + # Execute ls-remote command with proper authentication + with git_auth_context(url, token) as (git_cmd, auth_url): + output = git_cmd.ls_remote(auth_url, pattern) + lines = output.splitlines() - cmd += ["ls-remote", url, pattern] - stdout, _ = await run_command(*cmd) - lines = stdout.decode().splitlines() - sha = _pick_commit_sha(lines) - if not sha: - msg = f"{pattern!r} not found in {url}" - raise ValueError(msg) + sha = _pick_commit_sha(lines) + if not sha: + msg = f"{pattern!r} not found in {url}" + raise ValueError(msg) + + except git.GitCommandError as exc: + msg = f"Failed to resolve {pattern} in {url}:\n{exc}" + raise ValueError(msg) from exc return sha diff --git a/src/server/main.py b/src/server/main.py index d973c38..f66f674 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -18,7 +18,7 @@ from starlette.middleware.trustedhost import TrustedHostMiddleware from gitingest.utils.logging_config import get_logger from server.metrics_server import start_metrics_server from server.routers import dynamic, index, ingest -from server.server_config import templates +from server.server_config import get_version_info, templates from server.server_utils import limiter, rate_limit_exception_handler # Load environment variables from .env file @@ -169,7 +169,9 @@ async def custom_swagger_ui(request: Request) -> HTMLResponse: - **HTMLResponse**: Custom Swagger UI documentation page """ - return templates.TemplateResponse("swagger_ui.jinja", {"request": request}) + context = {"request": request} + context.update(get_version_info()) + return templates.TemplateResponse("swagger_ui.jinja", context) @app.get("/api", include_in_schema=True) diff --git a/src/server/query_processor.py b/src/server/query_processor.py index 03f52f1..f2f2ae9 100644 --- a/src/server/query_processor.py +++ b/src/server/query_processor.py @@ -308,7 +308,7 @@ async def process_query( _print_error(query.url, exc, max_file_size, pattern_type, pattern) # Clean up repository even if processing failed _cleanup_repository(clone_config) - return IngestErrorResponse(error=str(exc)) + return IngestErrorResponse(error=f"{exc!s}") if len(content) > MAX_DISPLAY_SIZE: content = ( diff --git a/src/server/routers/dynamic.py b/src/server/routers/dynamic.py index 93b9d68..49fdf1b 100644 --- a/src/server/routers/dynamic.py +++ b/src/server/routers/dynamic.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse -from server.server_config import templates +from server.server_config import get_version_info, templates router = APIRouter() @@ -29,11 +29,11 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse: and other default parameters such as file size. """ - return templates.TemplateResponse( - "git.jinja", - { - "request": request, - "repo_url": full_path, - "default_max_file_size": 243, - }, - ) + context = { + "request": request, + "repo_url": full_path, + "default_max_file_size": 243, + } + context.update(get_version_info()) + + return templates.TemplateResponse("git.jinja", context) diff --git a/src/server/routers/index.py b/src/server/routers/index.py index af4abd5..e8dfdfd 100644 --- a/src/server/routers/index.py +++ b/src/server/routers/index.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse -from server.server_config import EXAMPLE_REPOS, templates +from server.server_config import EXAMPLE_REPOS, get_version_info, templates router = APIRouter() @@ -27,11 +27,11 @@ async def home(request: Request) -> HTMLResponse: and other default parameters such as file size. """ - return templates.TemplateResponse( - "index.jinja", - { - "request": request, - "examples": EXAMPLE_REPOS, - "default_max_file_size": 243, - }, - ) + context = { + "request": request, + "examples": EXAMPLE_REPOS, + "default_max_file_size": 243, + } + context.update(get_version_info()) + + return templates.TemplateResponse("index.jinja", context) diff --git a/src/server/server_config.py b/src/server/server_config.py index 6918bf2..56b5eb1 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os from pathlib import Path from fastapi.templating import Jinja2Templates @@ -14,13 +15,42 @@ MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb EXAMPLE_REPOS: list[dict[str, str]] = [ {"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"}, - {"name": "FastAPI", "url": "https://github.com/tiangolo/fastapi"}, + {"name": "FastAPI", "url": "https://github.com/fastapi/fastapi"}, {"name": "Flask", "url": "https://github.com/pallets/flask"}, {"name": "Excalidraw", "url": "https://github.com/excalidraw/excalidraw"}, {"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"}, ] +# Version and repository configuration +APP_REPOSITORY = os.getenv("APP_REPOSITORY", "https://github.com/coderamp-labs/gitingest") +APP_VERSION = os.getenv("APP_VERSION", "unknown") +APP_VERSION_URL = os.getenv("APP_VERSION_URL", "https://github.com/coderamp-labs/gitingest") + + +def get_version_info() -> dict[str, str]: + """Get version information including display version and link. + + Returns + ------- + dict[str, str] + Dictionary containing 'version' and 'version_link' keys. + + """ + # Use pre-computed values from GitHub Actions + display_version = APP_VERSION + version_link = APP_VERSION_URL + + # Fallback to repository root if no URL is provided + if version_link == APP_REPOSITORY or not version_link: + version_link = f"{APP_REPOSITORY.rstrip('/')}/tree/main" + + return { + "version": display_version, + "version_link": version_link, + } + + # Use absolute path to templates directory templates_dir = Path(__file__).parent / "templates" templates = Jinja2Templates(directory=templates_dir) diff --git a/src/server/templates/components/footer.jinja b/src/server/templates/components/footer.jinja index 9784dfe..03900e3 100644 --- a/src/server/templates/components/footer.jinja +++ b/src/server/templates/components/footer.jinja @@ -1,7 +1,7 @@ {% from 'components/_macros.jinja' import footer_icon_link %}