From 22e836b707886f6848802d7b0d75a8854cf48ddd Mon Sep 17 00:00:00 2001 From: Renan Bernordi Date: Thu, 26 Jun 2025 17:38:05 -0300 Subject: [PATCH] add dmca domains block --- README.en.md | 14 +++++++++++++ README.md | 14 +++++++++++++ app/config.php | 10 +++++++++ app/inc/URLAnalyzer.php | 26 +++++++++++++++++------- app/inc/URLAnalyzer/URLAnalyzerBase.php | 2 ++ app/inc/URLAnalyzer/URLAnalyzerError.php | 13 +++++++++--- app/languages/de-de.php | 4 ++++ app/languages/en.php | 4 ++++ app/languages/es.php | 4 ++++ app/languages/pt-br.php | 4 ++++ app/languages/ru-ru.php | 4 ++++ app/src/URLProcessor.php | 14 +++++++++++++ 12 files changed, 103 insertions(+), 10 deletions(-) diff --git a/README.en.md b/README.en.md index 9fd4617..6d5449a 100644 --- a/README.en.md +++ b/README.en.md @@ -66,6 +66,20 @@ Now just run `docker compose up -d` - S3 Cache: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3 - Maintenance: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance + +### 🛡️ DMCA + +To block domains from DMCA requests, create the file `app/cache/dmca_domains.json`: + +```json +[ + { + "host": "exemplo.com.br", + "message": "This content has been blocked on request" + } +] +``` + ## 🚀 Integrations - 🤖 **Telegram**: [Official Bot](https://t.me/leissoai_bot) diff --git a/README.md b/README.md index 6c118ff..1f2beb3 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Instancia publica em [marreta.pcdomanual.com](https://marreta.pcdomanual.com)! - Remove elementos indesejados - Cache, cache! - Bloqueia domínios que você não quer +- Proteção DMCA com mensagens personalizadas - Permite configurar headers e cookies do seu jeito - PHP-FPM e OPcache - Suporte a Proxy @@ -66,6 +67,19 @@ Agora só rodar `docker compose up -d` - Cache S3: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3 - Manutenção: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance +### 🛡️ DMCA + +Para bloquear dominios por pedidos de DMCA, crie o arquivo `app/cache/dmca_domains.json`: + +```json +[ + { + "host": "exemplo.com.br", + "message": "Este conteúdo foi bloqueado a pedido" + } +] +``` + ## 🚀 Integrações - 🤖 **Telegram**: [Bot oficial](https://t.me/leissoai_bot) diff --git a/app/config.php b/app/config.php index 51710f6..af0dacc 100644 --- a/app/config.php +++ b/app/config.php @@ -66,6 +66,16 @@ try { define('BLOCKED_DOMAINS', require __DIR__ . '/data/blocked_domains.php'); define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php'); define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php'); + + // Load DMCA domains from JSON file + $dmcaDomainsFile = __DIR__ . '/cache/dmca_domains.json'; + if (file_exists($dmcaDomainsFile)) { + $dmcaDomainsJson = file_get_contents($dmcaDomainsFile); + $dmcaDomains = json_decode($dmcaDomainsJson, true); + define('DMCA_DOMAINS', is_array($dmcaDomains) ? $dmcaDomains : []); + } else { + define('DMCA_DOMAINS', []); + } } catch (Dotenv\Exception\ValidationException $e) { die('Environment Error: ' . $e->getMessage()); diff --git a/app/inc/URLAnalyzer.php b/app/inc/URLAnalyzer.php index 4ca8a69..1f73929 100644 --- a/app/inc/URLAnalyzer.php +++ b/app/inc/URLAnalyzer.php @@ -50,6 +50,25 @@ class URLAnalyzer extends URLAnalyzerBase */ public function analyze($url) { + // Extract and validate hostname + $host = parse_url($url, PHP_URL_HOST); + if (!$host) { + $this->error->throwError(self::ERROR_INVALID_URL, ''); + } + $originalHost = parse_url($url, PHP_URL_HOST); + $host = preg_replace('/^www\./', '', $host); + + // Check if domain is in DMCA list FIRST (before any HTTP requests) + foreach (DMCA_DOMAINS as $dmcaEntry) { + if (is_array($dmcaEntry) && isset($dmcaEntry['host'])) { + if ($dmcaEntry['host'] === $host || $dmcaEntry['host'] === $originalHost) { + Logger::getInstance()->logUrl($url, 'DMCA_DOMAIN'); + $customMessage = isset($dmcaEntry['message']) ? $dmcaEntry['message'] : ''; + $this->error->throwError(self::ERROR_DMCA_DOMAIN, $customMessage); + } + } + } + // Reset activated rules for new analysis $this->activatedRules = []; @@ -60,13 +79,6 @@ class URLAnalyzer extends URLAnalyzerBase return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url); } - // Extract and validate hostname - $host = parse_url($url, PHP_URL_HOST); - if (!$host) { - $this->error->throwError(self::ERROR_INVALID_URL, ''); - } - $host = preg_replace('/^www\./', '', $host); - // Check if domain is in blocked list if (in_array($host, BLOCKED_DOMAINS)) { Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN'); diff --git a/app/inc/URLAnalyzer/URLAnalyzerBase.php b/app/inc/URLAnalyzer/URLAnalyzerBase.php index c3f2c47..d9a27fc 100644 --- a/app/inc/URLAnalyzer/URLAnalyzerBase.php +++ b/app/inc/URLAnalyzer/URLAnalyzerBase.php @@ -22,6 +22,7 @@ class URLAnalyzerBase /** @var string Error constants for different failure scenarios */ const ERROR_INVALID_URL = 'INVALID_URL'; const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN'; + const ERROR_DMCA_DOMAIN = 'DMCA_DOMAIN'; const ERROR_NOT_FOUND = 'NOT_FOUND'; const ERROR_HTTP_ERROR = 'HTTP_ERROR'; const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR'; @@ -33,6 +34,7 @@ class URLAnalyzerBase protected $errorMap = [ self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'], self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'], + self::ERROR_DMCA_DOMAIN => ['code' => 403, 'message_key' => 'DMCA_DOMAIN'], self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'], self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'], self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'], diff --git a/app/inc/URLAnalyzer/URLAnalyzerError.php b/app/inc/URLAnalyzer/URLAnalyzerError.php index 49829c5..b6197ce 100644 --- a/app/inc/URLAnalyzer/URLAnalyzerError.php +++ b/app/inc/URLAnalyzer/URLAnalyzerError.php @@ -14,10 +14,17 @@ class URLAnalyzerError extends URLAnalyzerBase public function throwError($errorType, $additionalInfo = '') { $errorConfig = $this->errorMap[$errorType]; - $message = Language::getMessage($errorConfig['message_key'])['message']; - if ($additionalInfo) { - $message .= ': ' . $additionalInfo; + + // For DMCA domains, use custom message if provided, otherwise use default + if ($errorType === self::ERROR_DMCA_DOMAIN && !empty($additionalInfo)) { + $message = $additionalInfo; + } else { + $message = Language::getMessage($errorConfig['message_key'])['message']; + if ($additionalInfo && $errorType !== self::ERROR_DMCA_DOMAIN) { + $message .= ': ' . $additionalInfo; + } } + throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo); } } diff --git a/app/languages/de-de.php b/app/languages/de-de.php index 23fc4ec..8c02cc3 100644 --- a/app/languages/de-de.php +++ b/app/languages/de-de.php @@ -21,6 +21,10 @@ return [ 'message' => 'Diese Seite ist nicht erlaubt.', 'type' => 'error' ], + 'DMCA_DOMAIN' => [ + 'message' => 'Die angeforderte Website kann aufgrund von Anfragen ihrer Eigentümer nicht angezeigt werden.', + 'type' => 'error' + ], 'DNS_FAILURE' => [ 'message' => 'DNS für die Domain konnte nicht aufgelöst werden. Bitte überprüfe, ob die URL korrekt ist.', 'type' => 'warning' diff --git a/app/languages/en.php b/app/languages/en.php index 74eae3f..4165849 100644 --- a/app/languages/en.php +++ b/app/languages/en.php @@ -21,6 +21,10 @@ return [ 'message' => 'This domain is blocked for extraction.', 'type' => 'error' ], + 'DMCA_DOMAIN' => [ + 'message' => 'The requested website cannot be displayed due to requests from its owners.', + 'type' => 'error' + ], 'DNS_FAILURE' => [ 'message' => 'Failed to resolve DNS for the domain. Please verify if the URL is correct.', 'type' => 'warning' diff --git a/app/languages/es.php b/app/languages/es.php index 787513e..e1e6e58 100644 --- a/app/languages/es.php +++ b/app/languages/es.php @@ -21,6 +21,10 @@ return [ 'message' => 'Este dominio está bloqueado para extracción.', 'type' => 'error' ], + 'DMCA_DOMAIN' => [ + 'message' => 'El sitio web solicitado no se puede mostrar debido a las solicitudes de sus propietarios.', + 'type' => 'error' + ], 'DNS_FAILURE' => [ 'message' => 'Error al resolver DNS para el dominio. Verifique si la URL es correcta.', 'type' => 'warning' diff --git a/app/languages/pt-br.php b/app/languages/pt-br.php index a67a62f..2a3b5ef 100644 --- a/app/languages/pt-br.php +++ b/app/languages/pt-br.php @@ -21,6 +21,10 @@ return [ 'message' => 'Este domínio está bloqueado para extração.', 'type' => 'error' ], + 'DMCA_DOMAIN' => [ + 'message' => 'O site solicitado não pode ser exibido por exigência dos seus proprietários.', + 'type' => 'error' + ], 'DNS_FAILURE' => [ 'message' => 'Falha ao resolver DNS para o domínio. Verifique se a URL está correta.', 'type' => 'warning' diff --git a/app/languages/ru-ru.php b/app/languages/ru-ru.php index 6369ae7..e8a6326 100644 --- a/app/languages/ru-ru.php +++ b/app/languages/ru-ru.php @@ -21,6 +21,10 @@ return [ 'message' => 'Этот домен заблокирован для извлечения.', 'type' => 'error' ], + 'DMCA_DOMAIN' => [ + 'message' => 'Запрошенный веб-сайт не может быть отображен по запросу его владельцев.', + 'type' => 'error' + ], 'DNS_FAILURE' => [ 'message' => 'Не удалось разрешить DNS для домена. Проверьте правильность URL.', 'type' => 'warning' diff --git a/app/src/URLProcessor.php b/app/src/URLProcessor.php index ca2e101..b87b2c7 100644 --- a/app/src/URLProcessor.php +++ b/app/src/URLProcessor.php @@ -5,6 +5,7 @@ namespace App; use Inc\Language; use Inc\URLAnalyzer; use Inc\URLAnalyzer\URLAnalyzerException; +use Inc\Cache; /** * URL Processor @@ -109,6 +110,19 @@ class URLProcessor } else { if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) { $this->redirect(trim($additionalInfo), $errorType); + } elseif ($errorType === URLAnalyzer::ERROR_DMCA_DOMAIN) { + // For DMCA domains, show the custom message directly instead of redirecting + Language::init(LANGUAGE); + $message = $e->getMessage(); + $message_type = 'error'; + $url = ''; // Initialize url variable for the view + + // Initialize cache for counting + $cache = new \Inc\Cache(); + $cache_folder = $cache->getCacheFileCount(); + + require __DIR__ . '/views/home.php'; + exit; } $this->redirect(SITE_URL, $errorType); }