validação de regras e proxy

This commit is contained in:
Renan Bernordi 2025-05-27 23:20:22 -03:00
parent b283965299
commit 3e99e34fa7
8 changed files with 176 additions and 17 deletions

1
.gitignore vendored
View file

@ -4,6 +4,7 @@ composer.lock
app/logs/*.log
app/cache/*.gz
app/cache/database/.sqlite
app/cache/*.json
TODO.md
node_modules

View file

@ -63,7 +63,7 @@ RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cleanup.log 2>&1' >
RUN echo '0 * * * * root php "/app/bin/proxy" >> /app/logs/proxy.log 2>&1' >> /etc/crontab
# Run proxy list check
RUN '/app/bin/proxy'
RUN 'php /app/bin/proxy'
EXPOSE 80

View file

@ -47,3 +47,11 @@ DEBUG=false
# Number of days to keep cache files (*.gz)
# If not set, no files will be cleaned
CLEANUP_DAYS=7
# Proxy List Configuration
# URL to download proxy list from (used by bin/proxy script)
# The proxy list should contain proxies in one of these formats:
# 1. http://USER:PASSWORD@HOST:PORT
# 2. IP:PORT:USER:PASSWORD
# Example: PROXY_LIST=https://example.com/proxy-list.txt
PROXY_LIST=

View file

@ -93,6 +93,35 @@ class Rules
return $this->getGlobalRules();
}
/**
* Retrieves merged rules for a domain
* @param string $domain Target domain
* @return array|null Combined ruleset or global rules
*/
public function hasDomainRules($domain)
{
$domainParts = $this->getDomainParts($domain);
// Check for exact domain match first
foreach ($this->domainRules as $pattern => $rules) {
if ($this->getBaseDomain($domain) === $this->getBaseDomain($pattern)) {
return true;
}
}
// Check for partial domain matches
foreach ($domainParts as $part) {
foreach ($this->domainRules as $pattern => $rules) {
if ($part === $this->getBaseDomain($pattern)) {
return true;
}
}
}
return false;
}
/**
* Combines domain rules with global configuration
* @param array $rules Domain-specific rules
@ -110,12 +139,14 @@ class Rules
if (isset($excludeGlobalRules[$ruleType])) {
if (is_assoc_array($globalTypeRules)) {
$mergedRules[$ruleType] = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$result = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$mergedRules[$ruleType] = is_array($result) ? $result : [];
} else {
$mergedRules[$ruleType] = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$result = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$mergedRules[$ruleType] = is_array($result) ? $result : [];
}
} else {
$mergedRules[$ruleType] = $globalTypeRules;
$mergedRules[$ruleType] = is_array($globalTypeRules) ? $globalTypeRules : [];
}
}
@ -128,10 +159,13 @@ class Rules
}
if (in_array($ruleType, ['cookies', 'headers'])) {
$mergedRules[$ruleType] = array_merge($mergedRules[$ruleType], $domainTypeRules);
$mergedRules[$ruleType] = array_merge(
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
is_array($domainTypeRules) ? $domainTypeRules : []
);
} else {
$mergedRules[$ruleType] = array_values(array_unique(array_merge(
$mergedRules[$ruleType],
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
(array)$domainTypeRules
)));
}

View file

@ -73,14 +73,19 @@ class URLAnalyzer extends URLAnalyzerBase
$this->error->throwError(self::ERROR_BLOCKED_DOMAIN, '');
}
// Check HTTP status and handle any errors
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
if ($redirectInfo['httpCode'] === 404) {
$this->error->throwError(self::ERROR_NOT_FOUND, '');
} else {
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
// Check if domain has specific rules by looking for domain-specific configurations
$hasCustomRules = $this->hasDomainRules($host);
// Check HTTP status and handle any errors only if domain doesn't have custom rules
if (!$hasCustomRules) {
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
if ($redirectInfo['httpCode'] === 404) {
$this->error->throwError(self::ERROR_NOT_FOUND, '');
} else {
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
}
}
}

View file

@ -113,4 +113,14 @@ class URLAnalyzerBase
{
return $this->rules->getDomainRules($domain);
}
/**
* Check if domain has specific rules
* @param string $host The domain host to check
* @return bool True if domain has custom rules, false otherwise
*/
protected function hasDomainRules($domain)
{
return $this->rules->hasDomainRules($domain);
}
}

View file

@ -18,7 +18,17 @@ class URLAnalyzerUtils extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 5);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_NOBODY, true);
$curl->setUserAgent($this->getRandomUserAgent());
$curl->setOpt(CURLOPT_DNS_SERVERS, '8.8.8.8,8.4.4.8');
$curl->setHeaders([
'User-Agent' => 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache',
'DNT' => '1',
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
'From' => 'googlebot(at)googlebot.com'
]);
$curl->get($url);
if ($curl->error) {

View file

@ -4,7 +4,7 @@
/**
* Proxy List Cache Updater
*
* Fetches proxy list from the PROXY_LIST environment variable
* Downloads proxy list from the URL specified in the PROXY_LIST environment variable
* and stores it in the cache directory for reuse.
* This script should be run daily via cron to keep the proxy list updated.
*
@ -17,6 +17,7 @@ require_once __DIR__ . '/../app/vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Curl\Curl;
$climate = new CLImate();
$climate->bold()->out('Proxy List Cache Updater');
@ -40,9 +41,20 @@ if (!isset($_ENV['PROXY_LIST']) || empty($_ENV['PROXY_LIST'])) {
exit(0);
}
$proxyList = $_ENV['PROXY_LIST'];
$proxyListUrl = $_ENV['PROXY_LIST'];
$proxyCachePath = CACHE_DIR . '/proxy_list.json';
// Download proxy list from URL
$climate->out('Downloading proxy list from: ' . $proxyListUrl);
$proxyList = downloadProxyList($proxyListUrl, $climate);
if ($proxyList === false) {
$climate->red()->out('Failed to download proxy list from URL: ' . $proxyListUrl);
exit(1);
}
$climate->green()->out('Proxy list downloaded successfully (' . strlen($proxyList) . ' bytes)');
if (!is_dir(CACHE_DIR)) {
if (!mkdir(CACHE_DIR, 0755, true)) {
$climate->red()->out('Failed to create cache directory: ' . CACHE_DIR);
@ -103,3 +115,82 @@ function parseProxyList($proxyListString) {
return $proxies;
}
/**
* Download proxy list from URL using php-curl-class
*
* @param string $url URL to download proxy list from
* @param CLImate $climate CLImate instance for output
* @return string|false Downloaded content or false on failure
*/
function downloadProxyList($url, $climate = null) {
$curl = new Curl();
// Configure cURL options
$curl->setTimeout(30);
$curl->setConnectTimeout(10);
$curl->setUserAgent('Marreta Proxy Updater/1.0');
$curl->setHeader('Accept', 'text/plain, text/html, */*');
$curl->setHeader('Accept-Encoding', 'gzip, deflate');
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 3);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false);
try {
if ($climate) {
$climate->out('Making HTTP request with php-curl-class...');
}
$curl->get($url);
if ($curl->error) {
$errorMsg = 'cURL request failed: ' . $curl->errorMessage . ' (Code: ' . $curl->errorCode . ')';
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
}
$statusCode = $curl->httpStatusCode;
if ($climate) {
$climate->out('HTTP Status Code: ' . $statusCode);
}
if ($statusCode === 200) {
$content = $curl->response;
if ($climate) {
$contentType = $curl->responseHeaders['Content-Type'] ?? 'unknown';
$climate->out('Content-Type: ' . $contentType);
$climate->out('Content-Length: ' . strlen($content) . ' bytes');
}
return $content;
}
if ($climate) {
$climate->yellow()->out('Unexpected HTTP status code: ' . $statusCode);
}
return false;
} catch (\Exception $e) {
$errorMsg = 'Unexpected error during download: ' . $e->getMessage();
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
} finally {
$curl->close();
}
}