fix: make cache aware of subpaths (#481)

This commit is contained in:
Nicolas Iragne 2025-07-31 14:43:54 +02:00 committed by GitHub
parent 4b190fc73d
commit 8b59bef541
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 10 additions and 4 deletions

View file

@ -21,7 +21,7 @@ x-prod-environment: &prod-environment
x-dev-environment: &dev-environment
DEBUG: "true"
LOG_LEVEL: "debug"
LOG_LEVEL: "DEBUG"
RELOAD: "true"
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}
# S3 Configuration for development

View file

@ -90,6 +90,7 @@ async def _check_s3_cache(
user_name=cast("str", query.user_name),
repo_name=cast("str", query.repo_name),
commit=query.commit,
subpath=query.subpath,
include_patterns=query.include_patterns,
ignore_patterns=query.ignore_patterns,
)
@ -168,6 +169,7 @@ def _store_digest_content(
user_name=cast("str", query.user_name),
repo_name=cast("str", query.repo_name),
commit=query.commit,
subpath=query.subpath,
include_patterns=query.include_patterns,
ignore_patterns=query.ignore_patterns,
)

View file

@ -62,6 +62,7 @@ def generate_s3_file_path(
user_name: str,
repo_name: str,
commit: str,
subpath: str,
include_patterns: set[str] | None,
ignore_patterns: set[str],
) -> str:
@ -69,7 +70,7 @@ def generate_s3_file_path(
The file path is formatted as:
[<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/
<exclude&include hash>/<owner>-<repo-name>.txt
<exclude&include hash>/<owner>-<repo-name>-<subpath-hash>.txt
If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.
The commit-ID is always included in the URL.
@ -85,6 +86,8 @@ def generate_s3_file_path(
Repository name.
commit : str
Commit hash.
subpath : str
Subpath of the repository.
include_patterns : set[str] | None
Set of patterns specifying which files to include.
ignore_patterns : set[str]
@ -111,9 +114,10 @@ def generate_s3_file_path(
patterns_str = f"include:{sorted(include_patterns) if include_patterns else []}"
patterns_str += f"exclude:{sorted(ignore_patterns)}"
patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16]
subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16]
# Build the base path using hostname directly
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{user_name}-{repo_name}.txt"
file_name = f"{user_name}-{repo_name}-{subpath_hash}.txt"
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{file_name}"
# Check for S3_DIRECTORY_PREFIX environment variable
s3_directory_prefix = os.getenv("S3_DIRECTORY_PREFIX")