mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 08:29:29 +00:00
refactor: rework how symlinks are processed (no longer resolve) (#248)
Some changes to how we handle symlinks. We no longer resolve them, which should reduce the complexity by a nice bit. We also now show the target name in the output. I also added a launch.json file for debugging because it took me a while to figure out how to get the debugger to work. Yeah, that's it. Please test before merging because I'm a bit of a dingus sometimes
This commit is contained in:
parent
8be6f5620f
commit
cdeadf510d
4 changed files with 55 additions and 32 deletions
12
.vscode/launch.json
vendored
Normal file
12
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Module",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["server.main:app", "--host", "0.0.0.0", "--port", "8000"],
|
||||
"cwd": "${workspaceFolder}/src"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -9,7 +9,6 @@ from gitingest.output_formatters import format_node
|
|||
from gitingest.query_parsing import IngestionQuery
|
||||
from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
|
||||
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
|
||||
from gitingest.utils.path_utils import _is_safe_symlink
|
||||
|
||||
try:
|
||||
import tomllib # type: ignore[import]
|
||||
|
|
@ -171,11 +170,6 @@ def _process_node(
|
|||
The parsed query object containing information about the repository and query parameters.
|
||||
stats : FileSystemStats
|
||||
Statistics tracking object for the total file count and size.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If an unexpected error occurs during processing.
|
||||
"""
|
||||
|
||||
if limit_exceeded(stats, node.depth):
|
||||
|
|
@ -183,28 +177,15 @@ def _process_node(
|
|||
|
||||
for sub_path in node.path.iterdir():
|
||||
|
||||
symlink_path = None
|
||||
if sub_path.is_symlink():
|
||||
if not _is_safe_symlink(sub_path, query.local_path):
|
||||
print(f"Skipping unsafe symlink: {sub_path}")
|
||||
continue
|
||||
|
||||
symlink_path = sub_path
|
||||
sub_path = sub_path.resolve()
|
||||
|
||||
if sub_path in stats.visited:
|
||||
print(f"Skipping already visited path: {sub_path}")
|
||||
continue
|
||||
|
||||
stats.visited.add(sub_path)
|
||||
|
||||
if query.ignore_patterns and _should_exclude(sub_path, query.local_path, query.ignore_patterns):
|
||||
continue
|
||||
|
||||
if query.include_patterns and not _should_include(sub_path, query.local_path, query.include_patterns):
|
||||
continue
|
||||
|
||||
if sub_path.is_file():
|
||||
if sub_path.is_symlink():
|
||||
_process_symlink(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
|
||||
elif sub_path.is_file():
|
||||
_process_file(path=sub_path, parent_node=node, stats=stats, local_path=query.local_path)
|
||||
elif sub_path.is_dir():
|
||||
|
||||
|
|
@ -216,11 +197,6 @@ def _process_node(
|
|||
depth=node.depth + 1,
|
||||
)
|
||||
|
||||
# rename the subdir to reflect the symlink name
|
||||
if symlink_path:
|
||||
child_directory_node.name = symlink_path.name
|
||||
child_directory_node.path_str = str(symlink_path)
|
||||
|
||||
_process_node(
|
||||
node=child_directory_node,
|
||||
query=query,
|
||||
|
|
@ -230,13 +206,41 @@ def _process_node(
|
|||
node.size += child_directory_node.size
|
||||
node.file_count += child_directory_node.file_count
|
||||
node.dir_count += 1 + child_directory_node.dir_count
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unexpected error: {sub_path} is neither a file nor a directory")
|
||||
print(f"Warning: {sub_path} is an unknown file type, skipping")
|
||||
|
||||
node.sort_children()
|
||||
|
||||
|
||||
def _process_symlink(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
|
||||
"""
|
||||
Process a symlink in the file system.
|
||||
|
||||
This function checks the symlink's target.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : Path
|
||||
The full path of the symlink.
|
||||
parent_node : FileSystemNode
|
||||
The parent directory node.
|
||||
stats : FileSystemStats
|
||||
Statistics tracking object for the total file count and size.
|
||||
local_path : Path
|
||||
The base path of the repository or directory being processed.
|
||||
"""
|
||||
child = FileSystemNode(
|
||||
name=path.name,
|
||||
type=FileSystemNodeType.SYMLINK,
|
||||
path_str=str(path.relative_to(local_path)),
|
||||
path=path,
|
||||
depth=parent_node.depth + 1,
|
||||
)
|
||||
stats.total_files += 1
|
||||
parent_node.children.append(child)
|
||||
parent_node.file_count += 1
|
||||
|
||||
|
||||
def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStats, local_path: Path) -> None:
|
||||
"""
|
||||
Process a file in the file system.
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ def format_node(node: FileSystemNode, query: IngestionQuery) -> Tuple[str, str,
|
|||
|
||||
if node.type == FileSystemNodeType.DIRECTORY:
|
||||
summary += f"Files analyzed: {node.file_count}\n"
|
||||
else:
|
||||
elif node.type == FileSystemNodeType.FILE:
|
||||
summary += f"File: {node.name}\n"
|
||||
summary += f"Lines: {len(node.content.splitlines()):,}\n"
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ def _gather_file_contents(node: FileSystemNode) -> str:
|
|||
str
|
||||
The concatenated content of all files under the given node.
|
||||
"""
|
||||
if node.type == FileSystemNodeType.FILE:
|
||||
if node.type != FileSystemNodeType.DIRECTORY:
|
||||
return node.content_string
|
||||
|
||||
# Recursively gather contents of all files under the current directory
|
||||
|
|
@ -142,6 +142,8 @@ def _create_tree_structure(query: IngestionQuery, node: FileSystemNode, prefix:
|
|||
display_name = node.name
|
||||
if node.type == FileSystemNodeType.DIRECTORY:
|
||||
display_name += "/"
|
||||
elif node.type == FileSystemNodeType.SYMLINK:
|
||||
display_name += " -> " + node.path.readlink().name
|
||||
|
||||
tree_str += f"{prefix}{current_prefix}{display_name}\n"
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ class FileSystemNodeType(Enum):
|
|||
|
||||
DIRECTORY = auto()
|
||||
FILE = auto()
|
||||
SYMLINK = auto()
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -91,7 +92,8 @@ class FileSystemNode: # pylint: disable=too-many-instance-attributes
|
|||
"""
|
||||
parts = [
|
||||
SEPARATOR,
|
||||
f"File: {str(self.path_str).replace(os.sep, '/')}",
|
||||
f"{self.type.name}: {str(self.path_str).replace(os.sep, '/')}"
|
||||
+ (f" -> {self.path.readlink().name}" if self.type == FileSystemNodeType.SYMLINK else ""),
|
||||
SEPARATOR,
|
||||
f"{self.content}",
|
||||
]
|
||||
|
|
@ -116,6 +118,9 @@ class FileSystemNode: # pylint: disable=too-many-instance-attributes
|
|||
if self.type == FileSystemNodeType.DIRECTORY:
|
||||
raise ValueError("Cannot read content of a directory node")
|
||||
|
||||
if self.type == FileSystemNodeType.SYMLINK:
|
||||
return ""
|
||||
|
||||
if not is_text_file(self.path):
|
||||
return "[Non-text file]"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue