add dmca domains block

This commit is contained in:
Renan Bernordi 2025-06-26 17:38:05 -03:00
parent 01237362c5
commit 22e836b707
12 changed files with 103 additions and 10 deletions

View file

@ -66,6 +66,20 @@ Now just run `docker compose up -d`
- S3 Cache: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Maintenance: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
To block domains from DMCA requests, create the file `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "This content has been blocked on request"
}
]
```
## 🚀 Integrations
- 🤖 **Telegram**: [Official Bot](https://t.me/leissoai_bot)

View file

@ -25,6 +25,7 @@ Instancia publica em [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Remove elementos indesejados
- Cache, cache!
- Bloqueia domínios que você não quer
- Proteção DMCA com mensagens personalizadas
- Permite configurar headers e cookies do seu jeito
- PHP-FPM e OPcache
- Suporte a Proxy
@ -66,6 +67,19 @@ Agora só rodar `docker compose up -d`
- Cache S3: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Manutenção: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
Para bloquear dominios por pedidos de DMCA, crie o arquivo `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "Este conteúdo foi bloqueado a pedido"
}
]
```
## 🚀 Integrações
- 🤖 **Telegram**: [Bot oficial](https://t.me/leissoai_bot)

View file

@ -67,6 +67,16 @@ try {
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');
// Load DMCA domains from JSON file
$dmcaDomainsFile = __DIR__ . '/cache/dmca_domains.json';
if (file_exists($dmcaDomainsFile)) {
$dmcaDomainsJson = file_get_contents($dmcaDomainsFile);
$dmcaDomains = json_decode($dmcaDomainsJson, true);
define('DMCA_DOMAINS', is_array($dmcaDomains) ? $dmcaDomains : []);
} else {
define('DMCA_DOMAINS', []);
}
} catch (Dotenv\Exception\ValidationException $e) {
die('Environment Error: ' . $e->getMessage());
} catch (Exception $e) {

View file

@ -50,6 +50,25 @@ class URLAnalyzer extends URLAnalyzerBase
*/
public function analyze($url)
{
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
$originalHost = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in DMCA list FIRST (before any HTTP requests)
foreach (DMCA_DOMAINS as $dmcaEntry) {
if (is_array($dmcaEntry) && isset($dmcaEntry['host'])) {
if ($dmcaEntry['host'] === $host || $dmcaEntry['host'] === $originalHost) {
Logger::getInstance()->logUrl($url, 'DMCA_DOMAIN');
$customMessage = isset($dmcaEntry['message']) ? $dmcaEntry['message'] : '';
$this->error->throwError(self::ERROR_DMCA_DOMAIN, $customMessage);
}
}
}
// Reset activated rules for new analysis
$this->activatedRules = [];
@ -60,13 +79,6 @@ class URLAnalyzer extends URLAnalyzerBase
return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url);
}
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in blocked list
if (in_array($host, BLOCKED_DOMAINS)) {
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');

View file

@ -22,6 +22,7 @@ class URLAnalyzerBase
/** @var string Error constants for different failure scenarios */
const ERROR_INVALID_URL = 'INVALID_URL';
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
const ERROR_DMCA_DOMAIN = 'DMCA_DOMAIN';
const ERROR_NOT_FOUND = 'NOT_FOUND';
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
@ -33,6 +34,7 @@ class URLAnalyzerBase
protected $errorMap = [
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
self::ERROR_DMCA_DOMAIN => ['code' => 403, 'message_key' => 'DMCA_DOMAIN'],
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],

View file

@ -14,10 +14,17 @@ class URLAnalyzerError extends URLAnalyzerBase
public function throwError($errorType, $additionalInfo = '')
{
$errorConfig = $this->errorMap[$errorType];
// For DMCA domains, use custom message if provided, otherwise use default
if ($errorType === self::ERROR_DMCA_DOMAIN && !empty($additionalInfo)) {
$message = $additionalInfo;
} else {
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo) {
if ($additionalInfo && $errorType !== self::ERROR_DMCA_DOMAIN) {
$message .= ': ' . $additionalInfo;
}
}
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
}
}

View file

@ -21,6 +21,10 @@ return [
'message' => 'Diese Seite ist nicht erlaubt.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Die angeforderte Website kann aufgrund von Anfragen ihrer Eigentümer nicht angezeigt werden.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'DNS für die Domain konnte nicht aufgelöst werden. Bitte überprüfe, ob die URL korrekt ist.',
'type' => 'warning'

View file

@ -21,6 +21,10 @@ return [
'message' => 'This domain is blocked for extraction.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'The requested website cannot be displayed due to requests from its owners.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Failed to resolve DNS for the domain. Please verify if the URL is correct.',
'type' => 'warning'

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este dominio está bloqueado para extracción.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'El sitio web solicitado no se puede mostrar debido a las solicitudes de sus propietarios.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Error al resolver DNS para el dominio. Verifique si la URL es correcta.',
'type' => 'warning'

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este domínio está bloqueado para extração.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'O site solicitado não pode ser exibido por exigência dos seus proprietários.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Falha ao resolver DNS para o domínio. Verifique se a URL está correta.',
'type' => 'warning'

View file

@ -21,6 +21,10 @@ return [
'message' => 'Этот домен заблокирован для извлечения.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Запрошенный веб-сайт не может быть отображен по запросу его владельцев.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Не удалось разрешить DNS для домена. Проверьте правильность URL.',
'type' => 'warning'

View file

@ -5,6 +5,7 @@ namespace App;
use Inc\Language;
use Inc\URLAnalyzer;
use Inc\URLAnalyzer\URLAnalyzerException;
use Inc\Cache;
/**
* URL Processor
@ -109,6 +110,19 @@ class URLProcessor
} else {
if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
$this->redirect(trim($additionalInfo), $errorType);
} elseif ($errorType === URLAnalyzer::ERROR_DMCA_DOMAIN) {
// For DMCA domains, show the custom message directly instead of redirecting
Language::init(LANGUAGE);
$message = $e->getMessage();
$message_type = 'error';
$url = ''; // Initialize url variable for the view
// Initialize cache for counting
$cache = new \Inc\Cache();
$cache_folder = $cache->getCacheFileCount();
require __DIR__ . '/views/home.php';
exit;
}
$this->redirect(SITE_URL, $errorType);
}