mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-26 15:40:40 +00:00
fix: remove logarithm conversion from the backend and correctly process max file size in kb (#464)
Co-authored-by: Nicolas Iragne <nicoragne@hotmail.fr>
This commit is contained in:
parent
05b44d9287
commit
932bfef85d
9 changed files with 58 additions and 47 deletions
|
|
@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
|
|||
tag : str | None
|
||||
The tag of the repository.
|
||||
max_file_size : int
|
||||
The maximum file size to ingest (default: 10 MB).
|
||||
The maximum file size to ingest in bytes (default: 10 MB).
|
||||
ignore_patterns : set[str]
|
||||
The patterns to ignore (default: ``set()``).
|
||||
include_patterns : set[str] | None
|
||||
|
|
|
|||
|
|
@ -3,14 +3,16 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Union
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from gitingest.utils.compat_func import removesuffix
|
||||
from server.server_config import MAX_FILE_SIZE_KB
|
||||
|
||||
# needed for type checking (pydantic)
|
||||
from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import)
|
||||
if TYPE_CHECKING:
|
||||
from server.form_types import IntForm, OptStrForm, StrForm
|
||||
|
||||
|
||||
class PatternType(str, Enum):
|
||||
|
|
@ -39,7 +41,7 @@ class IngestRequest(BaseModel):
|
|||
"""
|
||||
|
||||
input_text: str = Field(..., description="Git repository URL or slug to ingest")
|
||||
max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
|
||||
max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB")
|
||||
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
|
||||
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
|
||||
token: str | None = Field(default=None, description="GitHub PAT for private repositories")
|
||||
|
|
|
|||
|
|
@ -13,12 +13,12 @@ from gitingest.utils.pattern_utils import process_patterns
|
|||
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
|
||||
from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
|
||||
from server.server_config import MAX_DISPLAY_SIZE
|
||||
from server.server_utils import Colors, log_slider_to_size
|
||||
from server.server_utils import Colors
|
||||
|
||||
|
||||
async def process_query(
|
||||
input_text: str,
|
||||
slider_position: int,
|
||||
max_file_size: int,
|
||||
pattern_type: PatternType,
|
||||
pattern: str,
|
||||
token: str | None = None,
|
||||
|
|
@ -32,8 +32,8 @@ async def process_query(
|
|||
----------
|
||||
input_text : str
|
||||
Input text provided by the user, typically a Git repository URL or slug.
|
||||
slider_position : int
|
||||
Position of the slider, representing the maximum file size in the query.
|
||||
max_file_size : int
|
||||
Max file size in KB to be include in the digest.
|
||||
pattern_type : PatternType
|
||||
Type of pattern to use (either "include" or "exclude")
|
||||
pattern : str
|
||||
|
|
@ -55,8 +55,6 @@ async def process_query(
|
|||
if token:
|
||||
validate_github_token(token)
|
||||
|
||||
max_file_size = log_slider_to_size(slider_position)
|
||||
|
||||
try:
|
||||
query = await parse_remote_repo(input_text, token=token)
|
||||
except Exception as exc:
|
||||
|
|
@ -65,7 +63,7 @@ async def process_query(
|
|||
return IngestErrorResponse(error=str(exc))
|
||||
|
||||
query.url = cast("str", query.url)
|
||||
query.max_file_size = max_file_size
|
||||
query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels
|
||||
query.ignore_patterns, query.include_patterns = process_patterns(
|
||||
exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,
|
||||
include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
|
||||
|
|
@ -142,7 +140,7 @@ async def process_query(
|
|||
digest_url=digest_url,
|
||||
tree=tree,
|
||||
content=content,
|
||||
default_max_file_size=slider_position,
|
||||
default_max_file_size=max_file_size,
|
||||
pattern_type=pattern_type,
|
||||
pattern=pattern,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from gitingest.config import TMP_BASE_PATH
|
|||
from server.models import IngestRequest
|
||||
from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion
|
||||
from server.s3_utils import is_s3_enabled
|
||||
from server.server_config import MAX_DISPLAY_SIZE
|
||||
from server.server_config import DEFAULT_FILE_SIZE_KB
|
||||
from server.server_utils import limiter
|
||||
|
||||
ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"])
|
||||
|
|
@ -58,7 +58,7 @@ async def api_ingest_get(
|
|||
request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
|
||||
user: str,
|
||||
repository: str,
|
||||
max_file_size: int = MAX_DISPLAY_SIZE,
|
||||
max_file_size: int = DEFAULT_FILE_SIZE_KB,
|
||||
pattern_type: str = "exclude",
|
||||
pattern: str = "",
|
||||
token: str = "",
|
||||
|
|
@ -74,7 +74,7 @@ async def api_ingest_get(
|
|||
- **repository** (`str`): GitHub repository name
|
||||
|
||||
**Query Parameters**
|
||||
- **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB)
|
||||
- **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)
|
||||
- **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
|
||||
- **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
|
||||
- **token** (`str`, optional): GitHub personal access token for private repositories (default: "")
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ async def _perform_ingestion(
|
|||
|
||||
result = await process_query(
|
||||
input_text=input_text,
|
||||
slider_position=max_file_size,
|
||||
max_file_size=max_file_size,
|
||||
pattern_type=pattern_type,
|
||||
pattern=pattern,
|
||||
token=token,
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ MAX_DISPLAY_SIZE: int = 300_000
|
|||
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)
|
||||
|
||||
# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
|
||||
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB
|
||||
MAX_SLIDER_POSITION: int = 500 # Maximum slider position
|
||||
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
|
||||
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb
|
||||
|
||||
EXAMPLE_REPOS: list[dict[str, str]] = [
|
||||
{"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"},
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
"""Utility functions for the server."""
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import shutil
|
||||
import time
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
|
|
@ -15,7 +14,7 @@ from slowapi.errors import RateLimitExceeded
|
|||
from slowapi.util import get_remote_address
|
||||
|
||||
from gitingest.config import TMP_BASE_PATH
|
||||
from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION
|
||||
from server.server_config import DELETE_REPO_AFTER
|
||||
|
||||
# Initialize a rate limiter
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
|
@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None:
|
|||
fp.write(f"{line}\n")
|
||||
|
||||
|
||||
def log_slider_to_size(position: int) -> int:
|
||||
"""Convert a slider position to a file size in bytes using a logarithmic scale.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
position : int
|
||||
Slider position ranging from 0 to 500.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
File size in bytes corresponding to the slider position.
|
||||
|
||||
"""
|
||||
maxv = math.log(MAX_FILE_SIZE_KB)
|
||||
return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024
|
||||
|
||||
|
||||
## Color printing utility
|
||||
class Colors:
|
||||
"""ANSI color codes."""
|
||||
|
|
|
|||
|
|
@ -76,12 +76,12 @@
|
|||
</label>
|
||||
<input type="range"
|
||||
id="file_size"
|
||||
name="max_file_size"
|
||||
min="0"
|
||||
min="1"
|
||||
max="500"
|
||||
required
|
||||
value="{{ default_max_file_size }}"
|
||||
class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000]">
|
||||
<input type="hidden" id="max_file_size_kb" name="max_file_size" value="">
|
||||
</div>
|
||||
<!-- PAT checkbox with PAT field below -->
|
||||
<div class="flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5">
|
||||
|
|
|
|||
|
|
@ -126,13 +126,13 @@ function collectFormData(form) {
|
|||
const json_data = {};
|
||||
const inputText = form.querySelector('[name="input_text"]');
|
||||
const token = form.querySelector('[name="token"]');
|
||||
const slider = document.getElementById('file_size');
|
||||
const hiddenInput = document.getElementById('max_file_size_kb');
|
||||
const patternType = document.getElementById('pattern_type');
|
||||
const pattern = document.getElementById('pattern');
|
||||
|
||||
if (inputText) {json_data.input_text = inputText.value;}
|
||||
if (token) {json_data.token = token.value;}
|
||||
if (slider) {json_data.max_file_size = slider.value;}
|
||||
if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}
|
||||
if (patternType) {json_data.pattern_type = patternType.value;}
|
||||
if (pattern) {json_data.pattern = pattern.value;}
|
||||
|
||||
|
|
@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) {
|
|||
|
||||
if (!form) {return;}
|
||||
|
||||
// Ensure hidden input is updated before collecting form data
|
||||
const slider = document.getElementById('file_size');
|
||||
const hiddenInput = document.getElementById('max_file_size_kb');
|
||||
|
||||
if (slider && hiddenInput) {
|
||||
hiddenInput.value = logSliderToSize(slider.value);
|
||||
}
|
||||
|
||||
if (showLoadingSpinner) {
|
||||
showLoading();
|
||||
}
|
||||
|
|
@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) {
|
|||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(json_data)
|
||||
})
|
||||
.then((response) => response.json())
|
||||
.then( (data) => {
|
||||
// Hide loading overlay
|
||||
.then(async (response) => {
|
||||
let data;
|
||||
|
||||
try {
|
||||
data = await response.json();
|
||||
} catch {
|
||||
data = {};
|
||||
}
|
||||
setButtonLoadingState(submitButton, false);
|
||||
|
||||
// Handle error
|
||||
if (!response.ok) {
|
||||
// Show all error details if present
|
||||
if (Array.isArray(data.detail)) {
|
||||
const details = data.detail.map((d) => `<li>${d.msg || JSON.stringify(d)}</li>`).join('');
|
||||
|
||||
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'><b>Error(s):</b><ul>${details}</ul></div>`);
|
||||
|
||||
return;
|
||||
}
|
||||
// Other errors
|
||||
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error || JSON.stringify(data) || 'An error occurred.'}</div>`);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle error in data
|
||||
if (data.error) {
|
||||
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error}</div>`);
|
||||
|
||||
|
|
@ -327,14 +355,16 @@ function logSliderToSize(position) {
|
|||
function initializeSlider() {
|
||||
const slider = document.getElementById('file_size');
|
||||
const sizeValue = document.getElementById('size_value');
|
||||
const hiddenInput = document.getElementById('max_file_size_kb');
|
||||
|
||||
if (!slider || !sizeValue) {return;}
|
||||
if (!slider || !sizeValue || !hiddenInput) {return;}
|
||||
|
||||
function updateSlider() {
|
||||
const value = logSliderToSize(slider.value);
|
||||
|
||||
sizeValue.textContent = formatSize(value);
|
||||
slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`;
|
||||
hiddenInput.value = value; // Set hidden input to KB value
|
||||
}
|
||||
|
||||
// Update on slider change
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue