#!/usr/bin/env python3 # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= """Scan a project directory for common security issues. Usage: python scan_project.py /path/to/project [--format json|text] Checks for: - Hardcoded secrets and credentials - Dangerous function calls (eval, exec, os.system, etc.) - Insecure configuration patterns - Missing security files (.gitignore, etc.) """ import argparse import json import re import sys from pathlib import Path DANGEROUS_FUNCTIONS = { ".py": [ (r"\beval\s*\(", "eval() can execute arbitrary code"), (r"\bexec\s*\(", "exec() can execute arbitrary code"), (r"\bos\.system\s*\(", "os.system() is vulnerable to command injection"), (r"\bos\.popen\s*\(", "os.popen() is vulnerable to command injection"), ( r"subprocess\.\w+\(.*shell\s*=\s*True", "subprocess with shell=True is vulnerable to command injection", ), (r"\bpickle\.loads?\s*\(", "pickle.load() can execute arbitrary code during deserialization"), (r"\byaml\.load\s*\([^)]*\)", "yaml.load() without SafeLoader can execute arbitrary code"), (r'\.execute\s*\(\s*f["\']', "f-string in SQL execute() is vulnerable to SQL injection"), (r"\.execute\s*\([^)]*%", "string formatting in SQL execute() is vulnerable to SQL injection"), (r"\bmarkupsafe\.Markup\s*\(\s*f", "Markup() with f-string is vulnerable to XSS"), ], ".js": [ (r"\beval\s*\(", "eval() can execute arbitrary code"), (r"\bnew\s+Function\s*\(", "Function constructor can execute arbitrary code"), (r"\.innerHTML\s*=", "innerHTML assignment is vulnerable to XSS"), (r"\bdocument\.write\s*\(", "document.write() is vulnerable to XSS"), (r"child_process\.exec\s*\(", "child_process.exec() is vulnerable to command injection"), ], ".ts": [ (r"\beval\s*\(", "eval() can execute arbitrary code"), (r"\bnew\s+Function\s*\(", "Function constructor can execute arbitrary code"), (r"\.innerHTML\s*=", "innerHTML assignment is vulnerable to XSS"), (r"dangerouslySetInnerHTML", "dangerouslySetInnerHTML is vulnerable to XSS"), ], ".go": [ (r'exec\.Command\s*\(\s*"sh"', "Shell execution is vulnerable to command injection"), (r'db\.Query\s*\([^)]*\+', "String concatenation in SQL query is vulnerable to injection"), (r'fmt\.Sprintf\s*\("SELECT', "String formatting in SQL query is vulnerable to injection"), ], ".java": [ (r"Runtime\.getRuntime\(\)\.exec\s*\(", "Runtime.exec() is vulnerable to command injection"), (r"ObjectInputStream.*readObject", "Deserialization can execute arbitrary code"), (r'Statement.*execute\s*\([^)]*\+', "String concatenation in SQL is vulnerable to injection"), (r"parseExpression\s*\(.*\)\.getValue", "SpEL expression evaluation can execute arbitrary code"), ], } SECRET_PATTERNS = [ (r"AKIA[0-9A-Z]{16}", "AWS Access Key ID"), (r"gh[ps]_[A-Za-z0-9_]{36,}", "GitHub Token"), (r"glpat-[A-Za-z0-9\-]{20,}", "GitLab Token"), (r"xox[baprs]-[A-Za-z0-9\-]{10,}", "Slack Token"), (r"sk_live_[A-Za-z0-9]{24,}", "Stripe Secret Key"), (r"SG\.[A-Za-z0-9\-_]{22}\.[A-Za-z0-9\-_]{43}", "SendGrid API Key"), (r"AIza[0-9A-Za-z\-_]{35}", "Google API Key"), (r"npm_[A-Za-z0-9]{36}", "npm Token"), (r"pypi-[A-Za-z0-9\-_]{50,}", "PyPI Token"), (r"-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----", "Private Key"), ] CONFIG_ISSUES = [ (r"DEBUG\s*[=:]\s*[Tt]rue", "Debug mode enabled"), (r"ALLOWED_HOSTS\s*=\s*\[\s*['\"]?\*['\"]?\s*\]", "Unrestricted ALLOWED_HOSTS"), (r"CORS_ALLOW_ALL_ORIGINS\s*=\s*True", "CORS allows all origins"), (r"SSL_VERIFY\s*[=:]\s*[Ff]alse", "TLS verification disabled"), (r"verify\s*=\s*False", "TLS verification disabled"), (r"NODE_TLS_REJECT_UNAUTHORIZED\s*[=:]\s*['\"]?0", "TLS verification disabled"), ] SKIP_DIRS = { ".git", "node_modules", "__pycache__", ".venv", "venv", ".env", "dist", "build", ".tox", ".mypy_cache", ".pytest_cache", "vendor", ".bundle", } SKIP_EXTENSIONS = { ".pyc", ".pyo", ".so", ".dylib", ".dll", ".exe", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".svg", ".woff", ".woff2", ".ttf", ".eot", ".mp3", ".mp4", ".zip", ".tar", ".gz", ".jar", ".war", } MAX_FILE_SIZE = 1_000_000 # 1 MB def should_skip(path: Path, root: Path) -> bool: """Return True if path should be skipped (e.g. .git, node_modules).""" return any(part in SKIP_DIRS for part in path.relative_to(root).parts) def scan_file(filepath: Path) -> list: """Scan a single file for secrets, dangerous calls, and config issues. Args: filepath: Path to the file to scan. Returns: List of finding dicts with type, severity, file, line, rule, snippet. """ findings = [] extension = filepath.suffix.lower() if extension in SKIP_EXTENSIONS: return findings try: if filepath.stat().st_size > MAX_FILE_SIZE: return findings content = filepath.read_text(encoding="utf-8", errors="ignore") except (OSError, UnicodeDecodeError): return findings lines = content.splitlines() # Check for secrets for line_num, line in enumerate(lines, 1): for pattern, label in SECRET_PATTERNS: if re.search(pattern, line): findings.append({ "type": "secret", "severity": "critical", "file": str(filepath), "line": line_num, "rule": label, "snippet": line.strip()[:120], }) # Check for dangerous functions patterns = DANGEROUS_FUNCTIONS.get(extension, []) for line_num, line in enumerate(lines, 1): stripped = line.lstrip() if stripped.startswith("#") or stripped.startswith("//"): continue for pattern, desc in patterns: if re.search(pattern, line): findings.append({ "type": "vulnerability", "severity": "high", "file": str(filepath), "line": line_num, "rule": desc, "snippet": line.strip()[:120], }) # Check for config issues for line_num, line in enumerate(lines, 1): for pattern, desc in CONFIG_ISSUES: if re.search(pattern, line): findings.append({ "type": "config", "severity": "medium", "file": str(filepath), "line": line_num, "rule": desc, "snippet": line.strip()[:120], }) return findings def check_project_structure(project_dir: Path) -> list: """Check project root for .gitignore and sensitive files. Args: project_dir: Path to the project root. Returns: List of finding dicts for missing .gitignore or sensitive files. """ findings = [] gitignore = project_dir / ".gitignore" if not gitignore.exists(): findings.append({ "type": "config", "severity": "medium", "file": str(project_dir), "line": 0, "rule": "Missing .gitignore file", "snippet": "", }) else: content = gitignore.read_text(encoding="utf-8", errors="ignore") for sensitive in [".env", "*.pem", "*.key"]: if sensitive not in content: findings.append({ "type": "config", "severity": "medium", "file": str(gitignore), "line": 0, "rule": f".gitignore missing pattern: {sensitive}", "snippet": "", }) # Check for sensitive files that should not be committed sensitive_files = [".env", ".env.local", ".env.production"] for name in sensitive_files: target = project_dir / name if target.exists(): findings.append({ "type": "secret", "severity": "high", "file": str(target), "line": 0, "rule": f"Sensitive file present: {name}", "snippet": "", }) return findings def scan_project(project_dir) -> tuple: """Scan a project directory for security issues. Args: project_dir: Path or path string to the project root. Returns: Tuple of (list of findings, number of files scanned). """ root = Path(project_dir).resolve() if not root.is_dir(): print(f"Error: {project_dir} is not a directory", file=sys.stderr) sys.exit(1) all_findings = check_project_structure(root) scanned = 0 for filepath in root.rglob("*"): if not filepath.is_file(): continue if should_skip(filepath, root): continue all_findings.extend(scan_file(filepath)) scanned += 1 return all_findings, scanned def format_text(findings, scanned): severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3} findings.sort(key=lambda f: severity_order.get(f["severity"], 99)) counts = {} for f in findings: counts[f["severity"]] = counts.get(f["severity"], 0) + 1 lines = [ f"Scanned {scanned} files, found {len(findings)} issue(s)", f" Critical: {counts.get('critical', 0)}", f" High: {counts.get('high', 0)}", f" Medium: {counts.get('medium', 0)}", f" Low: {counts.get('low', 0)}", "", ] for f in findings: loc = f"{f['file']}:{f['line']}" if f["line"] else f["file"] lines.append(f"[{f['severity'].upper()}] {f['rule']}") lines.append(f" Location: {loc}") if f["snippet"]: lines.append(f" Code: {f['snippet']}") lines.append("") return "\n".join(lines) def format_json(findings, scanned): counts = {} for f in findings: counts[f["severity"]] = counts.get(f["severity"], 0) + 1 return json.dumps({ "scanned_files": scanned, "total_findings": len(findings), "summary": counts, "findings": findings, }, indent=2) def main(): parser = argparse.ArgumentParser(description="Scan a project for security issues") parser.add_argument("path", help="Project directory to scan") parser.add_argument("--format", choices=["text", "json"], default="text", help="Output format") args = parser.parse_args() findings, scanned = scan_project(args.path) if args.format == "json": print(format_json(findings, scanned)) else: print(format_text(findings, scanned)) sys.exit(1 if findings else 0) if __name__ == "__main__": main()