fix: handle network errors gracefully in token count estimation (#437)

Co-authored-by: ix-56h <n.guintini@protonmail.com> Co-authored-by: Zarial <39010759+ix-56h@users.noreply.github.com>
2026-04-28 08:19:31 +00:00 · 2025-07-30 14:49:12 +02:00 · 2025-07-30 14:49:12 +02:00 · 5fbb445cd8
commit 5fbb445cd8
parent efe5a26861
2 changed files with 9 additions and 1 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -26,6 +26,7 @@ repos:

      - id: trailing-whitespace
        description: 'Trim trailing whitespace.'
+        exclude: CHANGELOG.md

      - id: check-docstring-first
        description: 'Check a common error of defining a docstring after code.'
--- a/src/gitingest/output_formatter.py
+++ b/src/gitingest/output_formatter.py
@ -2,8 +2,11 @@

 from __future__ import annotations

+import ssl
+import warnings
 from typing import TYPE_CHECKING

+import requests.exceptions
 import tiktoken

 from gitingest.schemas import FileSystemNode, FileSystemNodeType
@ -190,7 +193,11 @@ def _format_token_count(text: str) -> str | None:
        encoding = tiktoken.get_encoding("o200k_base")  # gpt-4o, gpt-4o-mini
        total_tokens = len(encoding.encode(text, disallowed_special=()))
    except (ValueError, UnicodeEncodeError) as exc:
-        print(exc)
+        warnings.warn(f"Failed to estimate token size: {exc}", RuntimeWarning, stacklevel=3)
+        return None
+    except (requests.exceptions.RequestException, ssl.SSLError) as exc:
+        # If network errors, skip token count estimation instead of erroring out
+        warnings.warn(f"Failed to download tiktoken model: {exc}", RuntimeWarning, stacklevel=3)
        return None

    for threshold, suffix in _TOKEN_THRESHOLDS: