fix: remove logarithm conversion from the backend and correctly process max file size in kb (#464)

Co-authored-by: Nicolas Iragne <nicoragne@hotmail.fr>
This commit is contained in:
Zarial 2025-07-27 18:36:47 +02:00 committed by GitHub
parent 05b44d9287
commit 932bfef85d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 58 additions and 47 deletions

View file

@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
tag : str | None
The tag of the repository.
max_file_size : int
The maximum file size to ingest (default: 10 MB).
The maximum file size to ingest in bytes (default: 10 MB).
ignore_patterns : set[str]
The patterns to ignore (default: ``set()``).
include_patterns : set[str] | None

View file

@ -3,14 +3,16 @@
from __future__ import annotations
from enum import Enum
from typing import Union
from typing import TYPE_CHECKING, Union
from pydantic import BaseModel, Field, field_validator
from gitingest.utils.compat_func import removesuffix
from server.server_config import MAX_FILE_SIZE_KB
# needed for type checking (pydantic)
from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import)
if TYPE_CHECKING:
from server.form_types import IntForm, OptStrForm, StrForm
class PatternType(str, Enum):
@ -39,7 +41,7 @@ class IngestRequest(BaseModel):
"""
input_text: str = Field(..., description="Git repository URL or slug to ingest")
max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB")
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
token: str | None = Field(default=None, description="GitHub PAT for private repositories")

View file

@ -13,12 +13,12 @@ from gitingest.utils.pattern_utils import process_patterns
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
from server.server_config import MAX_DISPLAY_SIZE
from server.server_utils import Colors, log_slider_to_size
from server.server_utils import Colors
async def process_query(
input_text: str,
slider_position: int,
max_file_size: int,
pattern_type: PatternType,
pattern: str,
token: str | None = None,
@ -32,8 +32,8 @@ async def process_query(
----------
input_text : str
Input text provided by the user, typically a Git repository URL or slug.
slider_position : int
Position of the slider, representing the maximum file size in the query.
max_file_size : int
Max file size in KB to be include in the digest.
pattern_type : PatternType
Type of pattern to use (either "include" or "exclude")
pattern : str
@ -55,8 +55,6 @@ async def process_query(
if token:
validate_github_token(token)
max_file_size = log_slider_to_size(slider_position)
try:
query = await parse_remote_repo(input_text, token=token)
except Exception as exc:
@ -65,7 +63,7 @@ async def process_query(
return IngestErrorResponse(error=str(exc))
query.url = cast("str", query.url)
query.max_file_size = max_file_size
query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels
query.ignore_patterns, query.include_patterns = process_patterns(
exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,
include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
@ -142,7 +140,7 @@ async def process_query(
digest_url=digest_url,
tree=tree,
content=content,
default_max_file_size=slider_position,
default_max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
)

View file

@ -11,7 +11,7 @@ from gitingest.config import TMP_BASE_PATH
from server.models import IngestRequest
from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion
from server.s3_utils import is_s3_enabled
from server.server_config import MAX_DISPLAY_SIZE
from server.server_config import DEFAULT_FILE_SIZE_KB
from server.server_utils import limiter
ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"])
@ -58,7 +58,7 @@ async def api_ingest_get(
request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
user: str,
repository: str,
max_file_size: int = MAX_DISPLAY_SIZE,
max_file_size: int = DEFAULT_FILE_SIZE_KB,
pattern_type: str = "exclude",
pattern: str = "",
token: str = "",
@ -74,7 +74,7 @@ async def api_ingest_get(
- **repository** (`str`): GitHub repository name
**Query Parameters**
- **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB)
- **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)
- **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
- **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
- **token** (`str`, optional): GitHub personal access token for private repositories (default: "")

View file

@ -33,7 +33,7 @@ async def _perform_ingestion(
result = await process_query(
input_text=input_text,
slider_position=max_file_size,
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
token=token,

View file

@ -10,8 +10,8 @@ MAX_DISPLAY_SIZE: int = 300_000
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)
# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB
MAX_SLIDER_POSITION: int = 500 # Maximum slider position
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb
EXAMPLE_REPOS: list[dict[str, str]] = [
{"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"},

View file

@ -1,7 +1,6 @@
"""Utility functions for the server."""
import asyncio
import math
import shutil
import time
from contextlib import asynccontextmanager, suppress
@ -15,7 +14,7 @@ from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address
from gitingest.config import TMP_BASE_PATH
from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION
from server.server_config import DELETE_REPO_AFTER
# Initialize a rate limiter
limiter = Limiter(key_func=get_remote_address)
@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None:
fp.write(f"{line}\n")
def log_slider_to_size(position: int) -> int:
"""Convert a slider position to a file size in bytes using a logarithmic scale.
Parameters
----------
position : int
Slider position ranging from 0 to 500.
Returns
-------
int
File size in bytes corresponding to the slider position.
"""
maxv = math.log(MAX_FILE_SIZE_KB)
return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024
## Color printing utility
class Colors:
"""ANSI color codes."""

View file

@ -76,12 +76,12 @@
</label>
<input type="range"
id="file_size"
name="max_file_size"
min="0"
min="1"
max="500"
required
value="{{ default_max_file_size }}"
class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000]">
<input type="hidden" id="max_file_size_kb" name="max_file_size" value="">
</div>
<!-- PAT checkbox with PAT field below -->
<div class="flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5">

View file

@ -126,13 +126,13 @@ function collectFormData(form) {
const json_data = {};
const inputText = form.querySelector('[name="input_text"]');
const token = form.querySelector('[name="token"]');
const slider = document.getElementById('file_size');
const hiddenInput = document.getElementById('max_file_size_kb');
const patternType = document.getElementById('pattern_type');
const pattern = document.getElementById('pattern');
if (inputText) {json_data.input_text = inputText.value;}
if (token) {json_data.token = token.value;}
if (slider) {json_data.max_file_size = slider.value;}
if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}
if (patternType) {json_data.pattern_type = patternType.value;}
if (pattern) {json_data.pattern = pattern.value;}
@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) {
if (!form) {return;}
// Ensure hidden input is updated before collecting form data
const slider = document.getElementById('file_size');
const hiddenInput = document.getElementById('max_file_size_kb');
if (slider && hiddenInput) {
hiddenInput.value = logSliderToSize(slider.value);
}
if (showLoadingSpinner) {
showLoading();
}
@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(json_data)
})
.then((response) => response.json())
.then( (data) => {
// Hide loading overlay
.then(async (response) => {
let data;
try {
data = await response.json();
} catch {
data = {};
}
setButtonLoadingState(submitButton, false);
// Handle error
if (!response.ok) {
// Show all error details if present
if (Array.isArray(data.detail)) {
const details = data.detail.map((d) => `<li>${d.msg || JSON.stringify(d)}</li>`).join('');
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'><b>Error(s):</b><ul>${details}</ul></div>`);
return;
}
// Other errors
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error || JSON.stringify(data) || 'An error occurred.'}</div>`);
return;
}
// Handle error in data
if (data.error) {
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error}</div>`);
@ -327,14 +355,16 @@ function logSliderToSize(position) {
function initializeSlider() {
const slider = document.getElementById('file_size');
const sizeValue = document.getElementById('size_value');
const hiddenInput = document.getElementById('max_file_size_kb');
if (!slider || !sizeValue) {return;}
if (!slider || !sizeValue || !hiddenInput) {return;}
function updateSlider() {
const value = logSliderToSize(slider.value);
sizeValue.textContent = formatSize(value);
slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`;
hiddenInput.value = value; // Set hidden input to KB value
}
// Update on slider change