Compare commits

...

25 commits
2.1.0 ... main

Author SHA1 Message Date
Renan Bernordi
ef2827a6d2 add cnn selenium 2025-08-16 21:56:20 -03:00
Renan Bernordi
734acedecb fix validate url 2025-08-16 21:53:57 -03:00
Renan Bernordi
7c01bce35f darkmode 2025-07-17 00:54:38 -03:00
Renan Bernordi
bbcbdff8bc add 2025-07-17 00:33:54 -03:00
Renan Bernordi
33b437d8fe fix fetcj 2025-07-17 00:26:05 -03:00
Renan Bernordi
2071d5c2bc add restrict urls 2025-07-06 19:32:52 -03:00
Renan Bernordi
0a57629cff fix bin tasks 2025-07-06 19:25:01 -03:00
Renan Bernordi
4d458fb75f css fixes 2025-06-26 18:32:07 -03:00
Renan Bernordi
deea4d6a2a fixing cli commands 2025-06-26 18:12:51 -03:00
Renan Bernordi
22e836b707 add dmca domains block 2025-06-26 17:38:05 -03:00
Renan Bernordi
01237362c5 zh, teste 2025-05-30 01:00:38 -03:00
Renan Bernordi
08ba5eb1a6 stcatharinesstandard, primeiro teste proxy 2025-05-30 00:58:15 -03:00
Renan Bernordi
80a0bec993 ajuste wp 2025-05-30 00:53:39 -03:00
Renan Bernordi
86be4a69a5 rodar proxy list 2025-05-30 00:52:57 -03:00
Renan Bernordi
33a7569d17 ajuste no comando inicial 2025-05-30 00:46:29 -03:00
Renan Bernordi
3e99e34fa7 validação de regras e proxy 2025-05-27 23:20:22 -03:00
Renan Bernordi
b283965299 adicionado suporte a lista de proxy 2025-05-26 16:39:54 -03:00
Renan Bernordi
86e6c9b838 integração com regras do periscope 2025-05-26 13:15:08 -03:00
Renan Bernordi
99258b0376 nova regra de modificador de url 2025-05-26 13:14:55 -03:00
Renan Bernordi
ee6f57aa43 marreta recursiva #36 2025-05-02 10:36:30 -03:00
Renan Bernordi
5409407833 autofocus #34 2025-05-02 10:33:43 -03:00
Renan Bernordi
f09a861cd1 novas regras de dominios, issue #33 2025-03-04 17:51:15 -03:00
Renan Bernordi
7d449b5229 delete sqlite 2025-03-04 17:50:07 -03:00
Renan Bernordi
5ca8403afc função de limpeza de cache 2025-02-28 17:15:10 -03:00
Renan Bernordi
91176050c0 adicionada ferramenta para limpar cache 2025-02-28 11:29:46 -03:00
45 changed files with 1487 additions and 126 deletions

3
.gitignore vendored
View file

@ -3,7 +3,8 @@ composer.lock
.env
app/logs/*.log
app/cache/*.gz
app/cache/database/*.sql
app/cache/database/.sqlite
app/cache/*.json
TODO.md
node_modules

View file

@ -10,6 +10,7 @@ RUN apt-get update && apt-get install -y \
zip \
git \
htop \
cron \
libzip-dev \
libsqlite3-dev \
&& docker-php-ext-install zip opcache pdo_sqlite \
@ -46,6 +47,8 @@ COPY default.conf /etc/nginx/sites-available/default
# Copy and configure initialization script permissions
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /app/bin/cleanup
RUN chmod +x /app/bin/proxy
# Create cache, database, and logs folders
RUN mkdir -p /app/cache /app/cache/database /app/logs
@ -54,6 +57,11 @@ RUN mkdir -p /app/cache /app/cache/database /app/logs
RUN chown -R www-data:www-data /app \
&& chmod -R 755 /app
# Configure Cron
RUN touch /app/logs/cron.log
RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cleanup.log 2>&1' >> /etc/crontab
RUN echo '0 * * * * root php "/app/bin/proxy" >> /app/logs/proxy.log 2>&1' >> /etc/crontab
EXPOSE 80
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

View file

@ -27,6 +27,7 @@ Public instance at [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Blocks domains you don't want
- Allows configuring headers and cookies your way
- PHP-FPM and OPcache
- Proxy Support
## 🐳 Installing with Docker
@ -65,6 +66,20 @@ Now just run `docker compose up -d`
- S3 Cache: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Maintenance: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
To block domains from DMCA requests, create the file `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "This content has been blocked on request"
}
]
```
## 🚀 Integrations
- 🤖 **Telegram**: [Official Bot](https://t.me/leissoai_bot)

View file

@ -25,8 +25,10 @@ Instancia publica em [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Remove elementos indesejados
- Cache, cache!
- Bloqueia domínios que você não quer
- Proteção DMCA com mensagens personalizadas
- Permite configurar headers e cookies do seu jeito
- PHP-FPM e OPcache
- Suporte a Proxy
## 🐳 Instalando em Docker
@ -65,6 +67,19 @@ Agora só rodar `docker compose up -d`
- Cache S3: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Manutenção: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
Para bloquear dominios por pedidos de DMCA, crie o arquivo `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "Este conteúdo foi bloqueado a pedido"
}
]
```
## 🚀 Integrações
- 🤖 **Telegram**: [Bot oficial](https://t.me/leissoai_bot)

View file

@ -42,3 +42,16 @@ SELENIUM_HOST=localhost:4444
# Debug Settings
DEBUG=false
# Cache Cleanup Settings
# Number of days to keep cache files (*.gz)
# If not set, no files will be cleaned
CLEANUP_DAYS=7
# Proxy List Configuration
# URL to download proxy list from (used by bin/proxy script)
# The proxy list should contain proxies in one of these formats:
# 1. http://USER:PASSWORD@HOST:PORT
# 2. IP:PORT:USER:PASSWORD
# Example: PROXY_LIST=https://example.com/proxy-list.txt
PROXY_LIST=

View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/assets/icons/sun.svg Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View file

@ -101,4 +101,20 @@ document.addEventListener('DOMContentLoaded', function () {
}
});
}
// Dark mode
const themeToggle = document.getElementById('themeToggle');
const html = document.documentElement;
const savedTheme = localStorage.getItem('theme') || 'light';
html.setAttribute('data-theme', savedTheme);
if (themeToggle) {
themeToggle.addEventListener('click', () => {
const currentTheme = html.getAttribute('data-theme');
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
html.setAttribute('data-theme', newTheme);
localStorage.setItem('theme', newTheme);
});
}
});

View file

@ -1,10 +1,10 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-500.eot');
src: url('/dist/fonts/inter-500.eot');
src: local('Inter Medium'), local('Inter-Medium'),
url('../dist/fonts/inter-500.woff2') format('woff2'),
url('../dist/fonts/inter-500.woff') format('woff'),
url('../dist/fonts/inter-500.ttf') format('truetype');
url('/dist/fonts/inter-500.woff2') format('woff2'),
url('/dist/fonts/inter-500.woff') format('woff'),
url('/dist/fonts/inter-500.ttf') format('truetype');
font-weight: 500;
font-style: normal;
font-display: swap;
@ -12,11 +12,11 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-600.eot');
src: url('/dist/fonts/inter-600.eot');
src: local('Inter SemiBold'), local('Inter-SemiBold'),
url('../dist/fonts/inter-600.woff2') format('woff2'),
url('../dist/fonts/inter-600.woff') format('woff'),
url('../dist/fonts/inter-600.ttf') format('truetype');
url('/dist/fonts/inter-600.woff2') format('woff2'),
url('/dist/fonts/inter-600.woff') format('woff'),
url('/dist/fonts/inter-600.ttf') format('truetype');
font-weight: 600;
font-style: normal;
font-display: swap;
@ -24,11 +24,11 @@
@font-face {
font-family: 'unna';
src: url('../dist/fonts/unna-400.eot');
src: url('/dist/fonts/unna-400.eot');
src: local('Unna Regular'), local('Unna-Regular'),
url('../dist/fonts/unna-400.woff2') format('woff2'),
url('../dist/fonts/unna-400.woff') format('woff'),
url('../dist/fonts/unna-400.ttf') format('truetype');
url('/dist/fonts/unna-400.woff2') format('woff2'),
url('/dist/fonts/unna-400.woff') format('woff'),
url('/dist/fonts/unna-400.ttf') format('truetype');
font-weight: 400;
font-style: normal;
font-display: swap;

View file

@ -41,3 +41,6 @@
@include mixin.icon('close', 'invert(100%) sepia(32%) saturate(8%) hue-rotate(23deg) brightness(102%) contrast(100%)');
@include mixin.icon('paste', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('sun', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('moon', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');

View file

@ -17,7 +17,7 @@
@mixin icon($name, $filter) {
.icon--#{$name} {
background-image: url("../dist/icons/#{$name}.svg");
background-image: url("/dist/icons/#{$name}.svg");
filter: #{$filter};
}
}

View file

@ -27,17 +27,53 @@
--font-weight: 500;
--line-height: 160%;
/* Light theme colors */
@include mixin.create-color('marreta', #3B82F6);
@include mixin.create-color('text', #484848);
@include mixin.create-color('textmuted', #818181);
@include mixin.create-color('link', #3B82F6);
/* Theme-aware colors */
--background: #ffffff;
--surface: #F4F4F5;
--surface-hover: #e4e4e7;
--border: #e4e4e7;
--header-text: #000000;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #333333;
--nav-desktop-hover: #007bff;
--input-bg: #F4F4F5;
--toast-error: rgb(247, 102, 97);
--toast-warning: rgb(247, 152, 97);
--container_spacing: 24px;
@include mixin.devices(desktop) {
--container_spacing: 64px;
}
}
/* Dark theme */
[data-theme="dark"] {
@include mixin.create-color('marreta', #60A5FA);
@include mixin.create-color('text', #e5e5e5);
@include mixin.create-color('textmuted', #a1a1aa);
@include mixin.create-color('link', #60A5FA);
--background: #000;
--surface: #1f1f1f;
--surface-hover: #2a2a2a;
--border: #2a2a2a;
--header-text: #ffffff;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #e5e5e5;
--nav-desktop-hover: #60A5FA;
--input-bg: #1f1f1f;
--toast-error: rgb(220, 38, 127);
--toast-warning: rgb(245, 158, 11);
}
html {
scroll-behavior: smooth;
}

View file

@ -46,22 +46,22 @@ body {
}
&--error {
background-color: rgb(247, 102, 97);
background-color: var(--toast-error);
}
&--warning {
background-color: rgb(247, 152, 97);
background-color: var(--toast-warning);
}
}
header {
display: grid;
grid-template-columns: auto 1fr 1fr;
grid-template-columns: auto 1fr auto 1fr;
align-items: center;
padding: 0 0 42px 0;
@include mixin.devices(desktop) {
grid-template-columns: 1fr 2fr 1fr;
grid-template-columns: 1fr 2fr auto 1fr;
}
&.open {
@ -126,7 +126,77 @@ header {
h1 {
font-family: var(--font-family-unna);
color: #000;
color: var(--header-text);
}
}
.fast_buttons {
display: flex;
gap: 8px;
}
.theme-controls {
display: flex;
justify-content: center;
align-items: center;
padding: 0 16px;
@include mixin.devices(desktop) {
padding: 0;
}
.theme-toggle {
background: none;
border: 2px solid var(--border);
border-radius: 50%;
width: 40px;
height: 40px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
position: relative;
transition: all 0.3s ease;
&:hover {
border-color: var(--marreta);
background-color: var(--surface-hover);
}
.icon {
width: 18px;
height: 18px;
position: absolute;
transition: all 0.3s ease;
&--sun {
opacity: 1;
transform: rotate(0deg) scale(1);
}
&--moon {
opacity: 0;
transform: rotate(180deg) scale(0.8);
}
[data-theme="dark"] & {
filter: invert(1);
}
}
[data-theme="dark"] & {
.icon {
&--sun {
opacity: 0;
transform: rotate(-180deg) scale(0.8);
}
&--moon {
opacity: 1;
transform: rotate(0deg) scale(1);
}
}
}
}
}
@ -137,7 +207,7 @@ header {
left: 0;
right: 0;
bottom: 0;
background-color: var(--marreta);
background-color: var(--nav-mobile-bg);
padding: var(--container_spacing) var(--container_spacing) calc(4*var(--container_spacing)) var(--container_spacing);
z-index: 500;
align-items: flex-end;
@ -172,20 +242,20 @@ header {
font-size: 24px;
padding: 16px 0;
border-bottom: 1px solid rgba(255, 255, 255, 0.24);
color: #fff;
color: var(--nav-mobile-text);
text-decoration: none;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
font-size: initial;
padding: 0;
border-bottom: 0;
}
&:hover {
color: #fff;
color: var(--nav-mobile-text);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -204,14 +274,14 @@ header {
color: rgba(255,255,255,0.5);
padding: 0;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
cursor: pointer;
}
&:hover {
color: rgba(255,255,255,0.5);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -222,8 +292,8 @@ header {
top: 110%;
left: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -246,7 +316,7 @@ header {
font-weight: 600;
@include mixin.devices(desktop) {
background-color: #fff;
background-color: var(--background);
margin-top: 0;
margin-bottom: 4px;
padding: 8px 16px;
@ -358,8 +428,8 @@ header {
top: 110%;
right: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -388,7 +458,7 @@ header {
font-weight: 600;
display: block;
padding: 8px 16px;
background-color: #fff;
background-color: var(--background);
display: flex;
align-items: center;
@ -408,7 +478,7 @@ header {
&.open {
.extension__toggle {
background-color: #F4F4F5;
background-color: var(--surface);
color: var(--textmuted);
}
@ -428,7 +498,7 @@ main {
font-size: 64px;
line-height: 61.44px;
text-align: center;
color: #000;
color: var(--header-text);
max-width: 512px;
margin: 0 auto;
}
@ -452,7 +522,7 @@ main {
.fields {
&::before {
content: '';
background-image: url(../assets/images/wall.png);
background-image: url(/assets/images/wall.png);
background-repeat: no-repeat;
background-size: 100% 100%;
width: 422px;
@ -461,6 +531,11 @@ main {
top: -110px;
right: -180px;
z-index: 1;
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
max-width: 470px;
@ -485,7 +560,7 @@ main {
}
input {
background-color: #F4F4F5;
background-color: var(--input-bg);
padding: 16px 0 16px 44px;
border: 0;
border-radius: 8px;
@ -493,12 +568,13 @@ main {
box-sizing: border-box;
position: relative;
line-height: 1.3em;
color: var(--text);
}
}
.paste {
background: rgb(244,244,245);
background: linear-gradient(90deg, rgba(244,244,245,0) 0%, rgba(244,244,245,1) 30%, rgba(244,244,245,1) 100%);
background: var(--input-bg);
background: linear-gradient(90deg, transparent 0%, var(--input-bg) 30%, var(--input-bg) 100%);
align-items: center;
z-index: 3;
position: absolute;
@ -508,8 +584,13 @@ main {
cursor: pointer;
height: 48px;
display: flex;
.icon {
.icon {
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
}
@ -557,7 +638,7 @@ main {
.plus {
z-index: 3;
position: relative;
background-color: rgba(244, 244, 245, 1);
background-color: var(--surface);
margin-left: calc(-1*var(--container_spacing));
margin-right: calc(-1*var(--container_spacing));

211
app/bin/cleanup Normal file
View file

@ -0,0 +1,211 @@
#!/usr/bin/env php
<?php
/**
* Cache Cleanup Script
*
* Removes *.gz files from the cache directory that are older than the number
* of days specified in the CLEANUP_DAYS environment variable.
* If CLEANUP_DAYS is not set, no files will be cleaned.
*/
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Aws\S3\S3Client;
use Aws\Exception\AwsException;
$climate = new CLImate();
$climate->bold()->out('Cache Cleanup Tool');
$climate->br();
$cleanupDays = 0;
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
$cleanupDays = $_ENV['CLEANUP_DAYS'];
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../cache');
}
if ($cleanupDays == 0) {
$climate->yellow()->out('CLEANUP_DAYS variable not set or 0. No files will be cleaned.');
exit(0);
}
$cleanupDays = (int)$cleanupDays;
if ($cleanupDays <= 0) {
$climate->red()->out('CLEANUP_DAYS must be a positive integer. No files will be cleaned.');
exit(1);
};
// Calculate the cutoff timestamp
$cutoffTime = time() - ($cleanupDays * 86400);
// Check if S3 cache is enabled
$s3CacheEnabled = isset($_ENV['S3_CACHE_ENABLED']) && filter_var($_ENV['S3_CACHE_ENABLED'], FILTER_VALIDATE_BOOLEAN);
if ($s3CacheEnabled) {
// Clean S3 cache
cleanS3Cache($climate, $cutoffTime, $cleanupDays);
} else {
// Clean local disk cache
cleanDiskCache($climate, $cutoffTime, $cleanupDays);
}
/**
* Clean cache files from S3 bucket
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanS3Cache($climate, $cutoffTime, $cleanupDays) {
$requiredVars = ['S3_ACCESS_KEY', 'S3_SECRET_KEY', 'S3_BUCKET'];
foreach ($requiredVars as $var) {
if (!isset($_ENV[$var]) || empty($_ENV[$var])) {
$climate->red()->out("$var environment variable is required for S3 cache cleaning.");
exit(1);
}
}
$climate->out("S3 cache enabled. Cleaning S3 cache files older than {$cleanupDays} days...");
$clientConfig = [
'version' => 'latest',
'region' => $_ENV['S3_REGION'] ?? 'us-east-1',
'credentials' => [
'key' => $_ENV['S3_ACCESS_KEY'],
'secret' => $_ENV['S3_SECRET_KEY'],
]
];
if (!empty($_ENV['S3_ENDPOINT'])) {
$clientConfig['endpoint'] = $_ENV['S3_ENDPOINT'];
$clientConfig['use_path_style_endpoint'] = true;
}
try {
$s3Client = new S3Client($clientConfig);
$bucket = $_ENV['S3_BUCKET'];
$prefix = $_ENV['S3_FOLDER'] ?? 'cache/';
$climate->out("Listing objects in bucket: {$bucket} with prefix: {$prefix}");
$objects = [];
$marker = null;
do {
$params = [
'Bucket' => $bucket,
'Prefix' => $prefix,
'MaxKeys' => 1000
];
if ($marker) {
$params['Marker'] = $marker;
}
$result = $s3Client->listObjects($params);
if (isset($result['Contents'])) {
foreach ($result['Contents'] as $object) {
if (substr($object['Key'], -3) === '.gz') {
$objects[] = $object;
}
}
}
$marker = $result['NextMarker'] ?? ($result['IsTruncated'] ? end($result['Contents'])['Key'] : null);
} while ($marker);
$totalObjects = count($objects);
$climate->out("Found {$totalObjects} .gz objects in S3 bucket.");
if ($totalObjects === 0) {
$climate->out('No .gz objects found in S3 bucket.');
return;
}
$progress = $climate->progress()->total($totalObjects);
$deletedObjects = 0;
foreach ($objects as $index => $object) {
$progress->current($index + 1);
$lastModified = strtotime($object['LastModified']);
if ($lastModified < $cutoffTime) {
try {
$s3Client->deleteObject([
'Bucket' => $bucket,
'Key' => $object['Key']
]);
$deletedObjects++;
} catch (AwsException $e) {
$climate->red()->out("Failed to delete: " . $object['Key'] . " - " . $e->getMessage());
}
}
}
$climate->br();
$climate->green()->out("S3 cleanup complete: {$deletedObjects} objects deleted.");
} catch (AwsException $e) {
$climate->red()->out("AWS Error: " . $e->getMessage());
exit(1);
}
}
/**
* Clean cache files from local disk
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanDiskCache($climate, $cutoffTime, $cleanupDays) {
$cacheDir = CACHE_DIR;
$climate->out("Cleaning cache files older than {$cleanupDays} days from: {$cacheDir}");
if (!is_dir($cacheDir)) {
$climate->red()->out("Cache directory not found: {$cacheDir}");
exit(1);
}
$gzFiles = glob($cacheDir . '/*.gz');
$totalFiles = count($gzFiles);
$deletedFiles = 0;
if ($totalFiles === 0) {
$climate->out('No .gz files found in cache directory.');
return;
}
$climate->out("Found {$totalFiles} .gz files in cache directory.");
$progress = $climate->progress()->total($totalFiles);
foreach ($gzFiles as $index => $file) {
$progress->current($index + 1);
$fileTime = filemtime($file);
if ($fileTime < $cutoffTime) {
if (unlink($file)) {
$deletedFiles++;
} else {
$climate->red()->out("Failed to delete: " . basename($file));
}
}
}
$climate->br();
$climate->green()->out("Disk cleanup complete: {$deletedFiles} files deleted.");
}

196
app/bin/proxy Normal file
View file

@ -0,0 +1,196 @@
#!/usr/bin/env php
<?php
/**
* Proxy List Cache Updater
*
* Downloads proxy list from the URL specified in the PROXY_LIST environment variable
* and stores it in the cache directory for reuse.
* This script should be run daily via cron to keep the proxy list updated.
*
* Supported proxy list formats:
* 1. http://USER:PASSWORD@HOST:PORT
* 2. IP:PORT:USER:PASSWORD
*/
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Curl\Curl;
$climate = new CLImate();
$climate->bold()->out('Proxy List Cache Updater');
$climate->br();
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../cache');
}
if (!isset($_ENV['PROXY_LIST']) || empty($_ENV['PROXY_LIST'])) {
$climate->yellow()->out('PROXY_LIST environment variable not set. No proxies to cache.');
exit(0);
}
$proxyListUrl = $_ENV['PROXY_LIST'];
$proxyCachePath = CACHE_DIR . '/proxy_list.json';
// Download proxy list from URL
$climate->out('Downloading proxy list from: ' . $proxyListUrl);
$proxyList = downloadProxyList($proxyListUrl, $climate);
if ($proxyList === false) {
$climate->red()->out('Failed to download proxy list from URL: ' . $proxyListUrl);
exit(1);
}
$climate->green()->out('Proxy list downloaded successfully (' . strlen($proxyList) . ' bytes)');
if (!is_dir(CACHE_DIR)) {
if (!mkdir(CACHE_DIR, 0755, true)) {
$climate->red()->out('Failed to create cache directory: ' . CACHE_DIR);
exit(1);
}
}
$climate->out('Parsing proxy list from environment variable...');
$proxies = parseProxyList($proxyList);
if (empty($proxies)) {
$climate->red()->out('No valid proxies found in PROXY_LIST. Supported formats are:');
$climate->red()->out('1. http://USER:PASSWORD@HOST:PORT');
$climate->red()->out('2. IP:PORT:USER:PASSWORD');
exit(1);
}
$climate->out('Found ' . count($proxies) . ' valid proxies.');
if (file_put_contents($proxyCachePath, json_encode($proxies))) {
$climate->green()->out('Proxy list successfully cached to: ' . $proxyCachePath);
} else {
$climate->red()->out('Failed to write proxy list to cache file: ' . $proxyCachePath);
exit(1);
}
/**
* Parse proxy list from environment variable
*
* @param string $proxyListString Proxy list in format http://USER:PASSWORD@HOST:PORT or IP:PORT:USER:PASSWORD
* @return array Array of valid proxy URLs
*/
function parseProxyList($proxyListString) {
$proxies = [];
$lines = preg_split('/[\r\n,]+/', $proxyListString);
foreach ($lines as $line) {
$line = trim($line);
if (empty($line)) continue;
// Format 1: http://USER:PASSWORD@HOST:PORT
if (preg_match('/^https?:\/\/[^:]+:[^@]+@[^:]+:\d+$/i', $line)) {
$proxies[] = $line;
continue;
}
// Format 2: IP:PORT:USER:PASSWORD
if (preg_match('/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+):([^:]+):(.+)$/', $line, $matches)) {
$ip = $matches[1];
$port = $matches[2];
$user = $matches[3];
$password = $matches[4];
// Convert to standard format
$proxies[] = "http://{$user}:{$password}@{$ip}:{$port}";
}
}
return $proxies;
}
/**
* Download proxy list from URL using php-curl-class
*
* @param string $url URL to download proxy list from
* @param CLImate $climate CLImate instance for output
* @return string|false Downloaded content or false on failure
*/
function downloadProxyList($url, $climate = null) {
$curl = new Curl();
// Configure cURL options
$curl->setTimeout(30);
$curl->setConnectTimeout(10);
$curl->setUserAgent('Marreta Proxy Updater/1.0');
$curl->setHeader('Accept', 'text/plain, text/html, */*');
$curl->setHeader('Accept-Encoding', 'gzip, deflate');
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 3);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false);
try {
if ($climate) {
$climate->out('Making HTTP request with php-curl-class...');
}
$curl->get($url);
if ($curl->error) {
$errorMsg = 'cURL request failed: ' . $curl->errorMessage . ' (Code: ' . $curl->errorCode . ')';
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
}
$statusCode = $curl->httpStatusCode;
if ($climate) {
$climate->out('HTTP Status Code: ' . $statusCode);
}
if ($statusCode === 200) {
$content = $curl->response;
if ($climate) {
$contentType = $curl->responseHeaders['Content-Type'] ?? 'unknown';
$climate->out('Content-Type: ' . $contentType);
$climate->out('Content-Length: ' . strlen($content) . ' bytes');
}
return $content;
}
if ($climate) {
$climate->yellow()->out('Unexpected HTTP status code: ' . $statusCode);
}
return false;
} catch (\Exception $e) {
$errorMsg = 'Unexpected error during download: ' . $e->getMessage();
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
} finally {
$curl->close();
}
}

Binary file not shown.

View file

@ -5,7 +5,8 @@
"php-curl-class/php-curl-class": "^11.0",
"php-webdriver/webdriver": "^1.15",
"monolog/monolog": "^3.8.1",
"nikic/fast-route": "^1.3"
"nikic/fast-route": "^1.3",
"league/climate": "^3.8"
},
"autoload": {
"psr-4": {

View file

@ -21,15 +21,11 @@ try {
'SITE_URL'
])->notEmpty();
// Validate URL format
if (!filter_var($_ENV['SITE_URL'], FILTER_VALIDATE_URL)) {
throw new Exception('SITE_URL must be a valid URL');
}
// Core system settings
define('SITE_NAME', $_ENV['SITE_NAME']);
define('SITE_DESCRIPTION', $_ENV['SITE_DESCRIPTION']);
define('SITE_URL', $_ENV['SITE_URL']);
define('CLEANUP_DAYS', $_ENV['CLEANUP_DAYS'] ?? 0);
// Optional settings with defaults
define('DNS_SERVERS', $_ENV['DNS_SERVERS'] ?? '1.1.1.1, 8.8.8.8');
@ -66,6 +62,16 @@ try {
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');
// Load DMCA domains from JSON file
$dmcaDomainsFile = __DIR__ . '/cache/dmca_domains.json';
if (file_exists($dmcaDomainsFile)) {
$dmcaDomainsJson = file_get_contents($dmcaDomainsFile);
$dmcaDomains = json_decode($dmcaDomainsJson, true);
define('DMCA_DOMAINS', is_array($dmcaDomains) ? $dmcaDomains : []);
} else {
define('DMCA_DOMAINS', []);
}
} catch (Dotenv\Exception\ValidationException $e) {
die('Environment Error: ' . $e->getMessage());
} catch (Exception $e) {

View file

@ -6,7 +6,12 @@
* Defines domains that cannot be accessed by the system
* due to usage policies or technical restrictions
*/
$host = parse_url(defined('SITE_URL') ? SITE_URL : '', PHP_URL_HOST);
return [
$host,
'localhost',
'127.0.0.1',
// News sites
//-- Content behind login access/hard paywall
'wsj.com',
@ -121,7 +126,6 @@ return [
'jusbrasil.com.br',
'glassdoor.com.br',
'gov.br',
'medium.com',
'stackoverflow.com',
'hoteis.com',
'amazon.com',

View file

@ -13,6 +13,7 @@
* - classAttrRemove: Array of classes to be removed from elements
* - customCode: String containing custom JavaScript code
* - customStyle: String containing custom CSS code
* - proxy: Enable proxy in Guzzle or Selenium requests
* - excludeGlobalRules: Associative array of global rules to exclude for this domain
* Example:
* 'excludeGlobalRules' => [
@ -27,6 +28,16 @@
* - fromGoogleBot: Adds simulation of request coming from Google Bot
* - removeElementsByTag: Remove specific elements via DOM
* - removeCustomAttr: Remove custom attributes from elements
* - urlMods: Modify the URL before fetching content.
* Example:
* 'urlMods' => [
* 'query' => [
* [
* 'key' => 'amp',
* 'value' => '1'
* ]
* ]
* ]
*/
return [
'nsctotal.com.br' => [
@ -40,6 +51,12 @@ return [
'removeElementsByTag' => ['style'],
'removeCustomAttr' => ['hidden', 'data-*']
],
'wired.com' => [
'scriptTagRemove' => ['.js'],
],
'newyorker.com' => [
'scriptTagRemove' => ['.js'],
],
'globo.com' => [
'idElementRemove' => ['cookie-banner-lgpd', 'paywall-cpt', 'mc-read-more-wrapper', 'paywall-cookie-content', 'paywall-cpt'],
'classElementRemove' => ['banner-lgpd', 'article-related-link__title', 'article-related-link__picture', 'paywall-denied', 'banner-subscription'],
@ -54,16 +71,19 @@ return [
'gauchazh.clicrbs.com.br' => [
'idElementRemove' => ['paywallTemplate'],
'classAttrRemove' => ['m-paid-content', 'paid-content-apply'],
'scriptTagRemove' => ['vendors-8'],
'scriptTagRemove' => ['vendors-9','vendors-10','vendors-11'],
'excludeGlobalRules' => [
'classElementRemove' => ['paid-content']
],
'fetchStrategies' => 'fetchFromSelenium',
'proxy' => true,
],
'reuters.com' => [
'classElementRemove' => ['leaderboard__container'],
'fetchStrategies' => 'fetchFromSelenium',
],
'cnn.com' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'lepoint.fr' => [
'classElementRemove' => ['paywall'],
],
@ -86,10 +106,6 @@ return [
'idElementRemove' => ['cboxOverlay'],
'fetchStrategies' => 'fetchFromSelenium',
],
'washingtonpost.com' => [
'classElementRemove' => ['paywall-overlay'],
'fetchStrategies' => 'fetchFromSelenium',
],
'oantagonista.com.br' => [
'fetchStrategies' => 'fetchFromSelenium',
],
@ -170,6 +186,12 @@ return [
'nzherald.co.nz' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'onetz.de' => [
'idElementRemove' => ['checkout-container'],
'classElementRemove' => ['tp-backdrop','dm-nobg'],
'classAttrRemove' => ['field-dnt-body-pp'],
'scriptTagRemove' => ['.js'],
],
'opovo.com.br' => [
'classElementRemove' => ['screen-loading', 'overlay-advise']
],
@ -199,13 +221,6 @@ return [
}
'
],
'ft.com' => [
'cookies' => [
'next-flags' => null,
'next:ads' => null
],
'fromGoogleBot' => true
],
'nytimes.com' => [
'idElementRemove' => ['gateway-content', 'site-index', 'complianceOverlay'],
'customCode' => '
@ -256,7 +271,7 @@ return [
position: relative !important;
}
',
'fetchStrategies' => 'fetchFromSelenium',
'fetchStrategies' => 'fetchFromWaybackMachine',
'excludeGlobalRules' => [
'scriptTagRemove' => [
'gtm.js',
@ -357,8 +372,358 @@ return [
'_pctx' => null
]
],
// Domain test
'thestar.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'niagarafallsreview.ca' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'thepeterboroughexaminer.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'therecord.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'thespec.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'wellandtribune.ca' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'time.com' => [
'headers' => [
'User-Agent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Cookie' => 'nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1',
'Referer' => 'https://www.google.com/'
],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]\');
banners.forEach(el => { el.remove(); });
});
'
],
'architecturaldigest.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'bonappetit.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'cntraveler.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'epicurious.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'gq.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'vanityfair.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'vogue.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'americanbanker.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const inlineGate = document.querySelector(\'.inline-gate\');
if (inlineGate) {
inlineGate.classList.remove(\'inline-gate\');
const inlineGated = document.querySelectorAll(\'.inline-gated\');
for (const elem of inlineGated) { elem.classList.remove(\'inline-gated\'); }
}
});
'
],
'washingtonpost.com' => [
'classElementRemove' => ['paywall-overlay'],
'fetchStrategies' => 'fetchFromSelenium',
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
let paywall = document.querySelectorAll(\'div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]\');
paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll(\'img\');
images.forEach(image => { image.parentElement.style.filter = \'\'; });
const headimage = document.querySelectorAll(\'div .aspect-custom\');
headimage.forEach(image => { image.style.filter = \'\'; });
});
',
'idElementRemove' => ['wall-bottom-drawer-container']
],
'usatoday.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]\');
banners.forEach(el => { el.remove(); });
});
'
],
'stcatharinesstandard.ca' => [
'proxy' => true,
'idElementRemove' => 'access-offers-modal',
'classElementRemove' => 'modal-backdrop',
'classAttrRemove' => ' modal-open'
],
'medium.com' => [
'headers' => [
'Referer' => 'https://t.co/x?amp=1',
'X-Forwarded-For' => 'none',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Content-Security-Policy' => 'script-src \'self\';'
]
],
'tagesspiegel.de' => [
'headers' => [
'Content-Security-Policy' => 'script-src \'self\';',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
],
'urlMods' => [
'query' => [
[
'key' => 'amp',
'value' => '1'
]
]
]
],
'nzz.ch' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelector(\'.dynamic-regwall\');
if (paywall) {
paywall.remove();
}
});
'
],
'demorgen.be' => [
'headers' => [
'Cookie' => 'isBot=true; authId=1',
'User-Agent' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; Googlebot-News; +http://www.google.com/bot.html) Chrome/121.0.6140.0 Safari/537.36',
'X-Forwarded-For' => 'none',
'Referer' => 'https://news.google.com'
],
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
// remove paywall items
let paywall = document.querySelectorAll(\'script[src*="advertising-cdn.dpgmedia.cloud"], div[data-temptation-position="ARTICLE_BOTTOM"]\');
paywall.forEach(el => { el.remove(); });
// remove empty advert
const advert = document.querySelector(\'div[data-advert-placeholder-collapses]\');
if (advert) {
advert.remove();
}
});
'
],
'ft.com' => [
'cookies' => [
'next-flags' => null,
'next:ads' => null
],
'fromGoogleBot' => true,
'headers' => [
'Referer' => 'https://t.co/x?amp=1'
],
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const styleTags = document.querySelectorAll(\'link[rel="stylesheet"]\');
styleTags.forEach(el => {
const href = el.getAttribute(\'href\');
if (href && href.substring(0, 1) === \'/\') {
const updatedHref = href.substring(1).replace(/(https?:\\/\\/.+?)\\/{2,}/, \'$1/\');
el.setAttribute(\'href\', updatedHref);
}
});
setTimeout(() => {
const cookie = document.querySelectorAll(\'.o-cookie-message, .js-article-ribbon, .o-ads, .o-banner, .o-message, .article__content-sign-up\');
cookie.forEach(el => { el.remove(); });
}, 1000);
})
'
],
// Test domain
'altendorfme.github.io' => [
'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'headers' => [
@ -366,6 +731,7 @@ return [
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache'
],
'proxy' => true,
'idElementRemove' => ['test-id-1', 'paywall'],
'classElementRemove' => ['test-class-1'],
'scriptTagRemove' => ['analytics.js', 'test-script.js', 'paywall.js'],

View file

@ -7,6 +7,7 @@
* using the 'excludeGlobalRules' configuration in domain_rules.php
*/
return [
'proxy' => false,
// Classes to be removed from all pages:
'classElementRemove' => [
'subscription',

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3
app/dist/icons/moon.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/dist/icons/sun.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -1,2 +1,2 @@
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let r=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),r(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),r(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))});e=document.getElementById("paste");let a=document.getElementById("url");e&&a&&e.addEventListener("click",async e=>{e.preventDefault();try{var t=await navigator.clipboard.readText();a.value=t.trim()}catch(e){console.error("Failed to read clipboard contents",e)}})});
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let a=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),a(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),a(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))});e=document.getElementById("paste");let r=document.getElementById("url");e&&r&&e.addEventListener("click",async e=>{e.preventDefault();try{var t=await navigator.clipboard.readText();r.value=t.trim()}catch(e){console.error("Failed to read clipboard contents",e)}});n=document.getElementById("themeToggle");let c=document.documentElement;e=localStorage.getItem("theme")||"light";c.setAttribute("data-theme",e),n&&n.addEventListener("click",()=>{var e="dark"===c.getAttribute("data-theme")?"light":"dark";c.setAttribute("data-theme",e),localStorage.setItem("theme",e)})});
//# sourceMappingURL=scripts.js.map

File diff suppressed because one or more lines are too long

View file

@ -31,7 +31,9 @@ class Rules
'fetchStrategies',
'fromGoogleBot',
'removeElementsByTag',
'removeCustomAttr'
'removeCustomAttr',
'urlMods',
'proxy'
];
/**
@ -91,6 +93,35 @@ class Rules
return $this->getGlobalRules();
}
/**
* Retrieves merged rules for a domain
* @param string $domain Target domain
* @return array|null Combined ruleset or global rules
*/
public function hasDomainRules($domain)
{
$domainParts = $this->getDomainParts($domain);
// Check for exact domain match first
foreach ($this->domainRules as $pattern => $rules) {
if ($this->getBaseDomain($domain) === $this->getBaseDomain($pattern)) {
return true;
}
}
// Check for partial domain matches
foreach ($domainParts as $part) {
foreach ($this->domainRules as $pattern => $rules) {
if ($part === $this->getBaseDomain($pattern)) {
return true;
}
}
}
return false;
}
/**
* Combines domain rules with global configuration
* @param array $rules Domain-specific rules
@ -108,12 +139,14 @@ class Rules
if (isset($excludeGlobalRules[$ruleType])) {
if (is_assoc_array($globalTypeRules)) {
$mergedRules[$ruleType] = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$result = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$mergedRules[$ruleType] = is_array($result) ? $result : [];
} else {
$mergedRules[$ruleType] = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$result = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$mergedRules[$ruleType] = is_array($result) ? $result : [];
}
} else {
$mergedRules[$ruleType] = $globalTypeRules;
$mergedRules[$ruleType] = is_array($globalTypeRules) ? $globalTypeRules : [];
}
}
@ -126,10 +159,13 @@ class Rules
}
if (in_array($ruleType, ['cookies', 'headers'])) {
$mergedRules[$ruleType] = array_merge($mergedRules[$ruleType], $domainTypeRules);
$mergedRules[$ruleType] = array_merge(
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
is_array($domainTypeRules) ? $domainTypeRules : []
);
} else {
$mergedRules[$ruleType] = array_values(array_unique(array_merge(
$mergedRules[$ruleType],
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
(array)$domainTypeRules
)));
}

View file

@ -50,6 +50,31 @@ class URLAnalyzer extends URLAnalyzerBase
*/
public function analyze($url)
{
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
// Check if URL contains restricted keywords
if ($this->isRestrictedUrl($url)) {
Logger::getInstance()->logUrl($url, 'RESTRICTED_URL');
$this->error->throwError(self::ERROR_RESTRICTED_URL, '');
}
$originalHost = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in DMCA list FIRST (before any HTTP requests)
foreach (DMCA_DOMAINS as $dmcaEntry) {
if (is_array($dmcaEntry) && isset($dmcaEntry['host'])) {
if ($dmcaEntry['host'] === $host || $dmcaEntry['host'] === $originalHost) {
Logger::getInstance()->logUrl($url, 'DMCA_DOMAIN');
$customMessage = isset($dmcaEntry['message']) ? $dmcaEntry['message'] : '';
$this->error->throwError(self::ERROR_DMCA_DOMAIN, $customMessage);
}
}
}
// Reset activated rules for new analysis
$this->activatedRules = [];
@ -60,20 +85,17 @@ class URLAnalyzer extends URLAnalyzerBase
return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url);
}
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in blocked list
if (in_array($host, BLOCKED_DOMAINS)) {
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
$this->error->throwError(self::ERROR_BLOCKED_DOMAIN, '');
}
// Check HTTP status and handle any errors
// Check if domain has specific rules by looking for domain-specific configurations
$hasCustomRules = $this->hasDomainRules($host);
// Check HTTP status and handle any errors only if domain doesn't have custom rules
if (!$hasCustomRules) {
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
@ -83,6 +105,7 @@ class URLAnalyzer extends URLAnalyzerBase
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
}
}
}
try {
// Get specific rules for this domain

View file

@ -22,23 +22,27 @@ class URLAnalyzerBase
/** @var string Error constants for different failure scenarios */
const ERROR_INVALID_URL = 'INVALID_URL';
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
const ERROR_DMCA_DOMAIN = 'DMCA_DOMAIN';
const ERROR_NOT_FOUND = 'NOT_FOUND';
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
const ERROR_RESTRICTED_URL = 'RESTRICTED_URL';
/** @var array Maps error types to HTTP codes and message keys */
protected $errorMap = [
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
self::ERROR_DMCA_DOMAIN => ['code' => 403, 'message_key' => 'DMCA_DOMAIN'],
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR'],
self::ERROR_RESTRICTED_URL => ['code' => 403, 'message_key' => 'RESTRICTED_URL']
];
/** @var array List of user agents to rotate through, including Googlebot */
@ -113,4 +117,59 @@ class URLAnalyzerBase
{
return $this->rules->getDomainRules($domain);
}
/**
* Check if domain has specific rules
* @param string $host The domain host to check
* @return bool True if domain has custom rules, false otherwise
*/
protected function hasDomainRules($domain)
{
return $this->rules->hasDomainRules($domain);
}
/**
* Check if URL contains restricted keywords
* @param string $url The URL to check
* @return bool True if URL contains restricted keywords, false otherwise
*/
protected function isRestrictedUrl($url)
{
$restrictedKeywords = [
'login',
'signin',
'sign-in',
'signup',
'sign-up',
'register',
'registration',
'lost-password',
'forgot-password',
'reset-password',
'password',
'auth',
'authentication',
'account',
'profile',
'dashboard',
'admin',
'member',
'subscription',
'subscribe',
'premium',
'checkout',
'payment',
'billing'
];
$urlLower = strtolower($url);
foreach ($restrictedKeywords as $keyword) {
if (strpos($urlLower, $keyword) !== false) {
return true;
}
}
return false;
}
}

View file

@ -14,10 +14,17 @@ class URLAnalyzerError extends URLAnalyzerBase
public function throwError($errorType, $additionalInfo = '')
{
$errorConfig = $this->errorMap[$errorType];
// For DMCA domains, use custom message if provided, otherwise use default
if ($errorType === self::ERROR_DMCA_DOMAIN && !empty($additionalInfo)) {
$message = $additionalInfo;
} else {
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo) {
if ($additionalInfo && $errorType !== self::ERROR_DMCA_DOMAIN) {
$message .= ': ' . $additionalInfo;
}
}
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
}
}

View file

@ -21,16 +21,117 @@ class URLAnalyzerFetch extends URLAnalyzerBase
/**
* Sets up the fetch handler with error handling capability
*/
/** @var array List of available proxies */
private $proxyList = [];
/** @var string Path to proxy cache file */
private $proxyCachePath = '';
public function __construct()
{
parent::__construct();
$this->error = new URLAnalyzerError();
$this->proxyCachePath = __DIR__ . '/../../cache/proxy_list.json';
$this->loadProxyList();
}
/**
* Loads proxy list from cache if available
*/
private function loadProxyList()
{
if (isset($_ENV['PROXY_LIST']) && file_exists($this->proxyCachePath)) {
$cachedList = file_get_contents($this->proxyCachePath);
if (!empty($cachedList)) {
$this->proxyList = json_decode($cachedList, true);
}
}
}
/**
* Gets a random proxy from the list
* @return string|null Random proxy URL or null if none available
*/
private function getRandomProxy()
{
if (empty($this->proxyList)) {
return null;
}
return $this->proxyList[array_rand($this->proxyList)];
}
/**
* Fetches content using cURL
* Handles redirects and custom headers
*/
/**
* Modifies URL based on urlMods rules
* @param string $url Original URL
* @param array $domainRules Domain rules containing urlMods
* @return string Modified URL
*/
private function applyUrlModifications($url, $domainRules)
{
if (!isset($domainRules['urlMods'])) {
return $url;
}
$urlParts = parse_url($url);
if (isset($domainRules['urlMods']['query']) && is_array($domainRules['urlMods']['query'])) {
$queryParams = [];
if (isset($urlParts['query'])) {
parse_str($urlParts['query'], $queryParams);
}
foreach ($domainRules['urlMods']['query'] as $queryMod) {
if (isset($queryMod['key']) && isset($queryMod['value'])) {
$queryParams[$queryMod['key']] = $queryMod['value'];
}
}
$urlParts['query'] = http_build_query($queryParams);
}
$modifiedUrl = '';
if (isset($urlParts['scheme'])) {
$modifiedUrl .= $urlParts['scheme'] . '://';
}
if (isset($urlParts['user'])) {
$modifiedUrl .= $urlParts['user'];
if (isset($urlParts['pass'])) {
$modifiedUrl .= ':' . $urlParts['pass'];
}
$modifiedUrl .= '@';
}
if (isset($urlParts['host'])) {
$modifiedUrl .= $urlParts['host'];
}
if (isset($urlParts['port'])) {
$modifiedUrl .= ':' . $urlParts['port'];
}
if (isset($urlParts['path'])) {
$modifiedUrl .= $urlParts['path'];
}
if (isset($urlParts['query'])) {
$modifiedUrl .= '?' . $urlParts['query'];
}
if (isset($urlParts['fragment'])) {
$modifiedUrl .= '#' . $urlParts['fragment'];
}
return $modifiedUrl;
}
public function fetchContent($url)
{
$curl = new Curl();
@ -42,6 +143,8 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$host = preg_replace('/^www\./', '', $host);
$domainRules = $this->getDomainRules($host);
$url = $this->applyUrlModifications($url, $domainRules);
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 2);
$curl->setOpt(CURLOPT_TIMEOUT, 10);
@ -49,6 +152,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_DNS_SERVERS, implode(',', $this->dnsServers));
$curl->setOpt(CURLOPT_ENCODING, '');
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->setHeaders([
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
@ -97,6 +207,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
*/
public function fetchFromWaybackMachine($url)
{
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
$domainRules = $this->getDomainRules($domainHost);
$url = $this->applyUrlModifications($url, $domainRules);
}
$url = preg_replace('#^https?://#', '', $url);
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($url);
@ -106,6 +223,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($availabilityUrl);
if ($curl->error) {
@ -130,6 +254,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($archiveUrl);
if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) {
@ -152,16 +283,32 @@ class URLAnalyzerFetch extends URLAnalyzerBase
{
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
$domainRules = $this->getDomainRules($domainHost);
$url = $this->applyUrlModifications($url, $domainRules);
}
$useProxy = isset($domainRules['proxy']) && $domainRules['proxy'] === true;
$proxy = $useProxy ? $this->getRandomProxy() : null;
if ($browser === 'chrome') {
$options = new ChromeOptions();
$options->addArguments([
$arguments = [
'--headless',
'--disable-gpu',
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-images',
'--blink-settings=imagesEnabled=false'
]);
];
if ($useProxy && $proxy) {
$arguments[] = '--proxy-server=' . $proxy;
}
$options->addArguments($arguments);
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
@ -174,6 +321,22 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$profile->setPreference("network.http.referer.spoofSource", true);
$profile->setPreference("network.http.referer.trimmingPolicy", 0);
if ($useProxy && $proxy) {
$proxyParts = parse_url($proxy);
if (isset($proxyParts['host']) && isset($proxyParts['port'])) {
$profile->setPreference("network.proxy.type", 1);
$profile->setPreference("network.proxy.http", $proxyParts['host']);
$profile->setPreference("network.proxy.http_port", $proxyParts['port']);
$profile->setPreference("network.proxy.ssl", $proxyParts['host']);
$profile->setPreference("network.proxy.ssl_port", $proxyParts['port']);
if (isset($proxyParts['user']) && isset($proxyParts['pass'])) {
$profile->setPreference("network.proxy.username", $proxyParts['user']);
$profile->setPreference("network.proxy.password", $proxyParts['pass']);
}
}
}
$options = new FirefoxOptions();
$options->setProfile($profile);

View file

@ -18,7 +18,17 @@ class URLAnalyzerUtils extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 5);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_NOBODY, true);
$curl->setUserAgent($this->getRandomUserAgent());
$curl->setOpt(CURLOPT_DNS_SERVERS, '8.8.8.8,8.4.4.8');
$curl->setHeaders([
'User-Agent' => 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache',
'DNT' => '1',
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
'From' => 'googlebot(at)googlebot.com'
]);
$curl->get($url);
if ($curl->error) {

View file

@ -21,6 +21,10 @@ return [
'message' => 'Diese Seite ist nicht erlaubt.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Die angeforderte Website kann aufgrund von Anfragen ihrer Eigentümer nicht angezeigt werden.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'DNS für die Domain konnte nicht aufgelöst werden. Bitte überprüfe, ob die URL korrekt ist.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Bei der Bearbeitung Ihrer Anfrage ist ein Fehler aufgetreten.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Diese URL enthält eingeschränkten Inhalt und kann aus Sicherheitsgründen nicht verarbeitet werden.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'This domain is blocked for extraction.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'The requested website cannot be displayed due to requests from its owners.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Failed to resolve DNS for the domain. Please verify if the URL is correct.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'An error occurred while processing your request.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'This URL contains restricted content and cannot be processed for security reasons.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este dominio está bloqueado para extracción.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'El sitio web solicitado no se puede mostrar debido a las solicitudes de sus propietarios.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Error al resolver DNS para el dominio. Verifique si la URL es correcta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocurrió un error al procesar su solicitud.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contiene contenido restringido y no se puede procesar por razones de seguridad.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este domínio está bloqueado para extração.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'O site solicitado não pode ser exibido por exigência dos seus proprietários.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Falha ao resolver DNS para o domínio. Verifique se a URL está correta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocorreu um erro ao processar sua solicitação.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contém conteúdo restrito e não pode ser processada por motivos de segurança.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Этот домен заблокирован для извлечения.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Запрошенный веб-сайт не может быть отображен по запросу его владельцев.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Не удалось разрешить DNS для домена. Проверьте правильность URL.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'При обработке вашего запроса произошла ошибка.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Этот URL-адрес содержит запрещенный контент и не может быть обработан по соображениям безопасности.',
'type' => 'error'
]
]
];

View file

@ -5,6 +5,7 @@ namespace App;
use Inc\Language;
use Inc\URLAnalyzer;
use Inc\URLAnalyzer\URLAnalyzerException;
use Inc\Cache;
/**
* URL Processor
@ -109,6 +110,19 @@ class URLProcessor
} else {
if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
$this->redirect(trim($additionalInfo), $errorType);
} elseif ($errorType === URLAnalyzer::ERROR_DMCA_DOMAIN) {
// For DMCA domains, show the custom message directly instead of redirecting
Language::init(LANGUAGE);
$message = $e->getMessage();
$message_type = 'error';
$url = ''; // Initialize url variable for the view
// Initialize cache for counting
$cache = new \Inc\Cache();
$cache_folder = $cache->getCacheFileCount();
require __DIR__ . '/views/home.php';
exit;
}
$this->redirect(SITE_URL, $errorType);
}

View file

@ -60,6 +60,7 @@
</div>
</div>
</nav>
<div class="fast_buttons">
<div class="extension">
<button class="extension__toggle"><?php echo \Inc\Language::get('nav_extension'); ?></button>
<div class="extension__menu">
@ -67,6 +68,13 @@
<a target="_blank" href="https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
</div>
</div>
<div class="theme-controls">
<button class="theme-toggle" id="themeToggle">
<span class="icon icon--sun"></span>
<span class="icon icon--moon"></span>
</button>
</div>
</div>
</header>
<main>
@ -85,7 +93,8 @@
value="<?php echo htmlspecialchars($url); ?>"
required
pattern="https?://.+"
title="<?php echo \Inc\Language::getMessage('INVALID_URL')['message']; ?>">
title="<?php echo \Inc\Language::getMessage('INVALID_URL')['message']; ?>"
autofocus>
<span class="paste" id="paste"><span class="icon icon--paste"></span></span>
</div>
<button type="submit" alt="<?php echo \Inc\Language::get('analyze_button'); ?>">

View file

@ -16,6 +16,8 @@ services:
- LANGUAGE=${LANGUAGE:-pt-br}
- LOG_LEVEL=${LOG_LEVEL:-WARNING}
- SELENIUM_HOST=${SELENIUM_HOST:-selenium-hub:4444}
- CLEANUP_DAYS=7 # Optional
- PROXY_LIST=url # Optional
restart: unless-stopped
# Selenium
selenium-hub:

View file

@ -109,8 +109,21 @@ nginx -g "daemon off;" &
sleep 3
check_nginx
# Starting Cron
log_info "Starting Cron..."
service cron restart
log_success "Cron started"
echo -e "\n${GREEN}=== Marreta initialized ===${NC}\n"
# Run proxy list updater
log_info "Running proxy list updater..."
if php /app/bin/proxy; then
log_success "Proxy list updater completed successfully"
else
log_info "Proxy list updater finished (may not have been configured)"
fi
# Wait for any process to exit
wait -n