fix: handle network errors gracefully in token count estimation (#437)

Co-authored-by: ix-56h <n.guintini@protonmail.com>
Co-authored-by: Zarial <39010759+ix-56h@users.noreply.github.com>
This commit is contained in:
Napuh 2025-07-30 14:49:12 +02:00 committed by GitHub
parent efe5a26861
commit 5fbb445cd8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 9 additions and 1 deletions

View file

@ -26,6 +26,7 @@ repos:
- id: trailing-whitespace
description: 'Trim trailing whitespace.'
exclude: CHANGELOG.md
- id: check-docstring-first
description: 'Check a common error of defining a docstring after code.'

View file

@ -2,8 +2,11 @@
from __future__ import annotations
import ssl
import warnings
from typing import TYPE_CHECKING
import requests.exceptions
import tiktoken
from gitingest.schemas import FileSystemNode, FileSystemNodeType
@ -190,7 +193,11 @@ def _format_token_count(text: str) -> str | None:
encoding = tiktoken.get_encoding("o200k_base") # gpt-4o, gpt-4o-mini
total_tokens = len(encoding.encode(text, disallowed_special=()))
except (ValueError, UnicodeEncodeError) as exc:
print(exc)
warnings.warn(f"Failed to estimate token size: {exc}", RuntimeWarning, stacklevel=3)
return None
except (requests.exceptions.RequestException, ssl.SSLError) as exc:
# If network errors, skip token count estimation instead of erroring out
warnings.warn(f"Failed to download tiktoken model: {exc}", RuntimeWarning, stacklevel=3)
return None
for threshold, suffix in _TOKEN_THRESHOLDS: