Compare commits

...

6 commits
2.0.3 ... main

Author SHA1 Message Date
Renan Bernordi
f09a861cd1 novas regras de dominios, issue 2025-03-04 17:51:15 -03:00
Renan Bernordi
7d449b5229 delete sqlite 2025-03-04 17:50:07 -03:00
Renan Bernordi
5ca8403afc função de limpeza de cache 2025-02-28 17:15:10 -03:00
Renan Bernordi
91176050c0 adicionada ferramenta para limpar cache 2025-02-28 11:29:46 -03:00
Renan Bernordi
abb1966b33 ajuste (Package 'sqlite3', required by 'virtual:world', not found) 2025-02-28 11:01:09 -03:00
Renan Bernordi
badd23ba7c migrado do redis para sqlite, no futuro tera rotinas para limpar caches 2025-02-28 10:55:38 -03:00
13 changed files with 433 additions and 151 deletions

1
.gitignore vendored
View file

@ -3,6 +3,7 @@ composer.lock
.env
app/logs/*.log
app/cache/*.gz
app/cache/database/.sqlite
TODO.md
node_modules

View file

@ -10,11 +10,11 @@ RUN apt-get update && apt-get install -y \
zip \
git \
htop \
cron \
libzip-dev \
libhiredis-dev \
&& docker-php-ext-install zip opcache \
&& pecl install redis \
&& docker-php-ext-enable redis opcache \
libsqlite3-dev \
&& docker-php-ext-install zip opcache pdo_sqlite \
&& docker-php-ext-enable opcache \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Stage 1: Build stage
@ -46,15 +46,21 @@ COPY default.conf /etc/nginx/sites-available/default
# Copy and configure initialization script permissions
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
COPY bin/cleanup /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh \
&& chmod +x /usr/local/bin/cleanup
# Create cache and logs folders
RUN mkdir -p /app/cache /app/logs
# Create cache, database, and logs folders
RUN mkdir -p /app/cache /app/cache/database /app/logs
# Configure base permissions for /app directory
RUN chown -R www-data:www-data /app \
&& chmod -R 755 /app
# Configure Cron
RUN touch /app/logs/cron.log
RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cron.log 2>&1' >> /etc/crontab
EXPOSE 80
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

View file

@ -40,5 +40,10 @@ S3_ENDPOINT=
# Selenium Configuration
SELENIUM_HOST=localhost:4444
# Debug Settings
DEBUG=false
# Debug Settings
DEBUG=false
# Cache Cleanup Settings
# Number of days to keep cache files (*.gz)
# If not set, no files will be cleaned
CLEANUP_DAYS=7

0
app/cache/database/.gitkeep vendored Normal file
View file

View file

@ -5,7 +5,8 @@
"php-curl-class/php-curl-class": "^11.0",
"php-webdriver/webdriver": "^1.15",
"monolog/monolog": "^3.8.1",
"nikic/fast-route": "^1.3"
"nikic/fast-route": "^1.3",
"league/climate": "^3.8"
},
"autoload": {
"psr-4": {

View file

@ -30,6 +30,7 @@ try {
define('SITE_NAME', $_ENV['SITE_NAME']);
define('SITE_DESCRIPTION', $_ENV['SITE_DESCRIPTION']);
define('SITE_URL', $_ENV['SITE_URL']);
define('CLEANUP_DAYS', $_ENV['CLEANUP_DAYS'] ?? 0);
// Optional settings with defaults
define('DNS_SERVERS', $_ENV['DNS_SERVERS'] ?? '1.1.1.1, 8.8.8.8');
@ -38,11 +39,6 @@ try {
define('CACHE_DIR', __DIR__ . '/cache');
define('LANGUAGE', $_ENV['LANGUAGE'] ?? 'pt-br');
// Redis connection settings
define('REDIS_HOST', $_ENV['REDIS_HOST'] ?? 'localhost');
define('REDIS_PORT', $_ENV['REDIS_PORT'] ?? 6379);
define('REDIS_PREFIX', $_ENV['REDIS_PREFIX'] ?? 'marreta:');
// Logging configuration
define('LOG_LEVEL', $_ENV['LOG_LEVEL'] ?? 'WARNING'); // DEBUG, INFO, WARNING, ERROR, CRITICAL
define('LOG_DAYS_TO_KEEP', 7);

View file

@ -40,6 +40,12 @@ return [
'removeElementsByTag' => ['style'],
'removeCustomAttr' => ['hidden','data-*']
],
'wired.com' => [
'scriptTagRemove' => ['.js'],
],
'newyorker.com' => [
'scriptTagRemove' => ['.js'],
],
'globo.com' => [
'idElementRemove' => ['cookie-banner-lgpd', 'paywall-cpt', 'mc-read-more-wrapper', 'paywall-cookie-content', 'paywall-cpt'],
'classElementRemove' => ['banner-lgpd', 'article-related-link__title', 'article-related-link__picture', 'paywall-denied', 'banner-subscription'],

View file

@ -5,7 +5,7 @@ namespace Inc;
use Inc\Cache\CacheStorageInterface;
use Inc\Cache\DiskStorage;
use Inc\Cache\S3Storage;
use Inc\Cache\RedisStorage;
use Inc\Cache\SQLiteStorage;
/**
* System cache management with multiple storage backends (disk/S3)
@ -17,18 +17,18 @@ class Cache
/** @var CacheStorageInterface Cache storage implementation */
private $storage;
/** @var RedisStorage Redis instance for file counting */
private $redisStorage;
/** @var SQLiteStorage SQLite instance for file counting */
private $sqliteStorage;
/**
* Initializes storage based on configuration
* Uses S3Storage if configured and enabled
* Defaults to DiskStorage otherwise
* Defaults to SQLiteStorage otherwise (which delegates to DiskStorage)
*/
public function __construct()
{
$this->redisStorage = new RedisStorage(CACHE_DIR);
$this->sqliteStorage = new SQLiteStorage(CACHE_DIR);
if (defined('S3_CACHE_ENABLED') && S3_CACHE_ENABLED === true) {
$this->storage = new S3Storage([
'key' => S3_ACCESS_KEY,
@ -40,14 +40,14 @@ class Cache
'endpoint' => defined('S3_ENDPOINT') ? S3_ENDPOINT : null
]);
} else {
$this->storage = new DiskStorage(CACHE_DIR);
$this->storage = $this->sqliteStorage;
}
}
/** Gets total number of cached files */
public function getCacheFileCount(): int
{
return $this->redisStorage->countCacheFiles();
return $this->sqliteStorage->countCacheFiles();
}
/**

View file

@ -1,128 +0,0 @@
<?php
namespace Inc\Cache;
use Redis;
/**
* Redis-based cache storage implementation
* Provides cache storage and file counting functionality using Redis
*/
class RedisStorage implements CacheStorageInterface
{
/**
* @var \Redis|null Redis client instance
*/
private $redis;
/**
* @var string Cache directory for file counting
*/
private $cacheDir;
/**
* Class constructor
* @param string $cacheDir Base directory for cache storage
*/
public function __construct(string $cacheDir)
{
$this->cacheDir = $cacheDir;
// Try to initialize Redis connection
try {
$this->redis = new \Redis();
$this->redis->connect(REDIS_HOST, REDIS_PORT, 2.5);
$this->redis->setOption(\Redis::OPT_PREFIX, REDIS_PREFIX);
} catch (\Exception $e) {
$this->redis = null;
}
}
/**
* Counts the number of files in the cache directory
* @return int Number of files in the cache directory
*/
public function countCacheFiles(): int
{
$cacheCountKey = 'cache_file_count';
if ($this->redis !== null) {
$cachedCount = $this->redis->get($cacheCountKey);
if ($cachedCount !== false) {
return (int)$cachedCount;
}
}
$fileCount = 0;
$iterator = new \FilesystemIterator($this->cacheDir);
foreach ($iterator as $file) {
if ($file->isFile() && $file->getExtension() === 'gz') {
$fileCount++;
}
}
if ($this->redis !== null) {
$this->redis->set($cacheCountKey, $fileCount);
}
return $fileCount;
}
/**
* Updates the file count in Redis
* @param int $count Number of files
*/
public function updateCacheFileCount(int $count): void
{
if ($this->redis !== null) {
$this->redis->set('cache_file_count', $count);
}
}
/**
* Checks if cache exists for a given ID
* @param string $id Cache ID
* @return bool True if cache exists, false otherwise
*/
public function exists(string $id): bool
{
return $this->redis !== null ? $this->redis->exists($id) : false;
}
/**
* Retrieves cached content
* @param string $id Cache ID
* @return string|null Cached content or null if not found
*/
public function get(string $id): ?string
{
if ($this->redis === null) {
return null;
}
$content = $this->redis->get($id);
return $content === false ? null : $content;
}
/**
* Stores content in cache
* @param string $id Cache ID
* @param string $content Content to be stored
* @return bool True if successful, false otherwise
*/
public function set(string $id, string $content): bool
{
if ($this->redis === null) {
return false;
}
$result = $this->redis->set($id, $content);
if ($result) {
$currentCount = $this->redis->get('cache_file_count') ?: 0;
$this->redis->set('cache_file_count', $currentCount + 1);
}
return $result;
}
}

View file

@ -0,0 +1,177 @@
<?php
namespace Inc\Cache;
use PDO;
use PDOException;
/**
* SQLite-based cache storage implementation
* Provides file counting functionality using SQLite
* Delegates actual cache storage to DiskStorage
*/
class SQLiteStorage implements CacheStorageInterface
{
/**
* @var PDO|null SQLite connection
*/
private $db;
/**
* @var string Cache directory for file counting
*/
private $cacheDir;
/**
* @var string Path to SQLite database file
*/
private $dbPath;
/**
* @var DiskStorage Disk storage for cache entries
*/
private $diskStorage;
/**
* Class constructor
* @param string $cacheDir Base directory for cache storage
*/
public function __construct(string $cacheDir)
{
$this->cacheDir = $cacheDir;
$this->diskStorage = new DiskStorage($cacheDir);
// Ensure database directory exists
$dbDir = $cacheDir . '/database';
if (!is_dir($dbDir)) {
mkdir($dbDir, 0755, true);
}
$this->dbPath = $dbDir . '/.sqlite';
// Try to initialize SQLite connection
try {
$this->db = new PDO('sqlite:' . $this->dbPath);
$this->db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// Create tables if they don't exist
$this->initDatabase();
// If database file was just created, count cache files
if (!file_exists($this->dbPath) || filesize($this->dbPath) < 1024) {
$this->countCacheFiles();
}
} catch (PDOException $e) {
$this->db = null;
}
}
/**
* Initialize database tables
*/
private function initDatabase(): void
{
$this->db->exec("
CREATE TABLE IF NOT EXISTS stats (
key TEXT PRIMARY KEY,
value INTEGER NOT NULL
)
");
}
/**
* Counts the number of files in the cache directory
* @return int Number of files in the cache directory
*/
public function countCacheFiles(): int
{
if ($this->db !== null) {
try {
$stmt = $this->db->query("SELECT value FROM stats WHERE key = 'count'");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
if ($result) {
return (int)$result['value'];
}
} catch (PDOException $e) {
// Continue to count files if query fails
}
}
$fileCount = 0;
$iterator = new \FilesystemIterator($this->cacheDir);
foreach ($iterator as $file) {
if ($file->isFile() && $file->getExtension() === 'gz') {
$fileCount++;
}
}
if ($this->db !== null) {
$this->updateCacheFileCount($fileCount);
}
return $fileCount;
}
/**
* Updates the file count in SQLite
* @param int $count Number of files
*/
public function updateCacheFileCount(int $count): void
{
if ($this->db !== null) {
try {
$stmt = $this->db->prepare("
INSERT OR REPLACE INTO stats (key, value)
VALUES ('count', :count)
");
$stmt->bindParam(':count', $count, PDO::PARAM_INT);
$stmt->execute();
} catch (PDOException $e) {
// Silently fail if update fails
}
}
}
/**
* Checks if cache exists for a given ID
* Delegates to DiskStorage
* @param string $id Cache ID
* @return bool True if cache exists, false otherwise
*/
public function exists(string $id): bool
{
return $this->diskStorage->exists($id);
}
/**
* Retrieves cached content
* Delegates to DiskStorage
* @param string $id Cache ID
* @return string|null Cached content or null if not found
*/
public function get(string $id): ?string
{
return $this->diskStorage->get($id);
}
/**
* Stores content in cache
* Delegates to DiskStorage and updates file count
* @param string $id Cache ID
* @param string $content Content to be stored
* @return bool True if successful, false otherwise
*/
public function set(string $id, string $content): bool
{
$result = $this->diskStorage->set($id, $content);
if ($result) {
// Increment cache file count
$currentCount = $this->countCacheFiles();
$this->updateCacheFileCount($currentCount + 1);
}
return $result;
}
}

211
bin/cleanup Normal file
View file

@ -0,0 +1,211 @@
#!/usr/bin/env php
<?php
/**
* Cache Cleanup Script
*
* Removes *.gz files from the cache directory that are older than the number
* of days specified in the CLEANUP_DAYS environment variable.
* If CLEANUP_DAYS is not set, no files will be cleaned.
*/
require_once __DIR__ . '/../app/vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Aws\S3\S3Client;
use Aws\Exception\AwsException;
$climate = new CLImate();
$climate->bold()->out('Cache Cleanup Tool');
$climate->br();
$cleanupDays = 0;
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/../app');
$dotenv->load();
$climate->out('Environment variables loaded');
$cleanupDays = $_ENV['CLEANUP_DAYS'];
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../app/cache');
}
if ($cleanupDays == 0) {
$climate->yellow()->out('CLEANUP_DAYS variable not set or 0. No files will be cleaned.');
exit(0);
}
$cleanupDays = (int)$cleanupDays;
if ($cleanupDays <= 0) {
$climate->red()->out('CLEANUP_DAYS must be a positive integer. No files will be cleaned.');
exit(1);
};
// Calculate the cutoff timestamp
$cutoffTime = time() - ($cleanupDays * 86400);
// Check if S3 cache is enabled
$s3CacheEnabled = isset($_ENV['S3_CACHE_ENABLED']) && filter_var($_ENV['S3_CACHE_ENABLED'], FILTER_VALIDATE_BOOLEAN);
if ($s3CacheEnabled) {
// Clean S3 cache
cleanS3Cache($climate, $cutoffTime, $cleanupDays);
} else {
// Clean local disk cache
cleanDiskCache($climate, $cutoffTime, $cleanupDays);
}
/**
* Clean cache files from S3 bucket
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanS3Cache($climate, $cutoffTime, $cleanupDays) {
$requiredVars = ['S3_ACCESS_KEY', 'S3_SECRET_KEY', 'S3_BUCKET'];
foreach ($requiredVars as $var) {
if (!isset($_ENV[$var]) || empty($_ENV[$var])) {
$climate->red()->out("$var environment variable is required for S3 cache cleaning.");
exit(1);
}
}
$climate->out("S3 cache enabled. Cleaning S3 cache files older than {$cleanupDays} days...");
$clientConfig = [
'version' => 'latest',
'region' => $_ENV['S3_REGION'] ?? 'us-east-1',
'credentials' => [
'key' => $_ENV['S3_ACCESS_KEY'],
'secret' => $_ENV['S3_SECRET_KEY'],
]
];
if (!empty($_ENV['S3_ENDPOINT'])) {
$clientConfig['endpoint'] = $_ENV['S3_ENDPOINT'];
$clientConfig['use_path_style_endpoint'] = true;
}
try {
$s3Client = new S3Client($clientConfig);
$bucket = $_ENV['S3_BUCKET'];
$prefix = $_ENV['S3_FOLDER'] ?? 'cache/';
$climate->out("Listing objects in bucket: {$bucket} with prefix: {$prefix}");
$objects = [];
$marker = null;
do {
$params = [
'Bucket' => $bucket,
'Prefix' => $prefix,
'MaxKeys' => 1000
];
if ($marker) {
$params['Marker'] = $marker;
}
$result = $s3Client->listObjects($params);
if (isset($result['Contents'])) {
foreach ($result['Contents'] as $object) {
if (substr($object['Key'], -3) === '.gz') {
$objects[] = $object;
}
}
}
$marker = $result['NextMarker'] ?? ($result['IsTruncated'] ? end($result['Contents'])['Key'] : null);
} while ($marker);
$totalObjects = count($objects);
$climate->out("Found {$totalObjects} .gz objects in S3 bucket.");
if ($totalObjects === 0) {
$climate->out('No .gz objects found in S3 bucket.');
return;
}
$progress = $climate->progress()->total($totalObjects);
$deletedObjects = 0;
foreach ($objects as $index => $object) {
$progress->current($index + 1);
$lastModified = strtotime($object['LastModified']);
if ($lastModified < $cutoffTime) {
try {
$s3Client->deleteObject([
'Bucket' => $bucket,
'Key' => $object['Key']
]);
$deletedObjects++;
} catch (AwsException $e) {
$climate->red()->out("Failed to delete: " . $object['Key'] . " - " . $e->getMessage());
}
}
}
$climate->br();
$climate->green()->out("S3 cleanup complete: {$deletedObjects} objects deleted.");
} catch (AwsException $e) {
$climate->red()->out("AWS Error: " . $e->getMessage());
exit(1);
}
}
/**
* Clean cache files from local disk
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanDiskCache($climate, $cutoffTime, $cleanupDays) {
$cacheDir = CACHE_DIR;
$climate->out("Cleaning cache files older than {$cleanupDays} days from: {$cacheDir}");
if (!is_dir($cacheDir)) {
$climate->red()->out("Cache directory not found: {$cacheDir}");
exit(1);
}
$gzFiles = glob($cacheDir . '/*.gz');
$totalFiles = count($gzFiles);
$deletedFiles = 0;
if ($totalFiles === 0) {
$climate->out('No .gz files found in cache directory.');
return;
}
$climate->out("Found {$totalFiles} .gz files in cache directory.");
$progress = $climate->progress()->total($totalFiles);
foreach ($gzFiles as $index => $file) {
$progress->current($index + 1);
$fileTime = filemtime($file);
if ($fileTime < $cutoffTime) {
if (unlink($file)) {
$deletedFiles++;
} else {
$climate->red()->out("Failed to delete: " . basename($file));
}
}
}
$climate->br();
$climate->green()->out("Disk cleanup complete: {$deletedFiles} files deleted.");
}

View file

@ -16,6 +16,7 @@ services:
- LANGUAGE=${LANGUAGE:-pt-br}
- LOG_LEVEL=${LOG_LEVEL:-WARNING}
- SELENIUM_HOST=${SELENIUM_HOST:-selenium-hub:4444}
- CLEANUP_DAYS=7 # Optional
restart: unless-stopped
# Selenium
selenium-hub:

View file

@ -54,6 +54,7 @@ log_success "Environment variables configured"
log_info "Adjusting directory permissions..."
mkdir -p /app/cache /app/logs # Ensures directories exist
mkdir -p /app/cache/database
chown -R www-data:www-data /app/cache /app/logs
chmod -R 775 /app/cache /app/logs
@ -108,6 +109,11 @@ nginx -g "daemon off;" &
sleep 3
check_nginx
# Starting Cron
log_info "Starting Cron..."
service cron restart
log_success "Cron started"
echo -e "\n${GREEN}=== Marreta initialized ===${NC}\n"
# Wait for any process to exit