Compare commits

...

17 commits
2.1.3 ... main

Author SHA1 Message Date
Renan Bernordi
ef2827a6d2 add cnn selenium 2025-08-16 21:56:20 -03:00
Renan Bernordi
734acedecb fix validate url 2025-08-16 21:53:57 -03:00
Renan Bernordi
7c01bce35f darkmode 2025-07-17 00:54:38 -03:00
Renan Bernordi
bbcbdff8bc add 2025-07-17 00:33:54 -03:00
Renan Bernordi
33b437d8fe fix fetcj 2025-07-17 00:26:05 -03:00
Renan Bernordi
2071d5c2bc add restrict urls 2025-07-06 19:32:52 -03:00
Renan Bernordi
0a57629cff fix bin tasks 2025-07-06 19:25:01 -03:00
Renan Bernordi
4d458fb75f css fixes 2025-06-26 18:32:07 -03:00
Renan Bernordi
deea4d6a2a fixing cli commands 2025-06-26 18:12:51 -03:00
Renan Bernordi
22e836b707 add dmca domains block 2025-06-26 17:38:05 -03:00
Renan Bernordi
01237362c5 zh, teste 2025-05-30 01:00:38 -03:00
Renan Bernordi
08ba5eb1a6 stcatharinesstandard, primeiro teste proxy 2025-05-30 00:58:15 -03:00
Renan Bernordi
80a0bec993 ajuste wp 2025-05-30 00:53:39 -03:00
Renan Bernordi
86be4a69a5 rodar proxy list 2025-05-30 00:52:57 -03:00
Renan Bernordi
33a7569d17 ajuste no comando inicial 2025-05-30 00:46:29 -03:00
Renan Bernordi
3e99e34fa7 validação de regras e proxy 2025-05-27 23:20:22 -03:00
Renan Bernordi
b283965299 adicionado suporte a lista de proxy 2025-05-26 16:39:54 -03:00
42 changed files with 787 additions and 102 deletions

1
.gitignore vendored
View file

@ -4,6 +4,7 @@ composer.lock
app/logs/*.log
app/cache/*.gz
app/cache/database/.sqlite
app/cache/*.json
TODO.md
node_modules

View file

@ -46,9 +46,9 @@ COPY default.conf /etc/nginx/sites-available/default
# Copy and configure initialization script permissions
COPY docker-entrypoint.sh /usr/local/bin/
COPY bin/cleanup /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh \
&& chmod +x /usr/local/bin/cleanup
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /app/bin/cleanup
RUN chmod +x /app/bin/proxy
# Create cache, database, and logs folders
RUN mkdir -p /app/cache /app/cache/database /app/logs
@ -59,7 +59,8 @@ RUN chown -R www-data:www-data /app \
# Configure Cron
RUN touch /app/logs/cron.log
RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cron.log 2>&1' >> /etc/crontab
RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cleanup.log 2>&1' >> /etc/crontab
RUN echo '0 * * * * root php "/app/bin/proxy" >> /app/logs/proxy.log 2>&1' >> /etc/crontab
EXPOSE 80

View file

@ -27,6 +27,7 @@ Public instance at [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Blocks domains you don't want
- Allows configuring headers and cookies your way
- PHP-FPM and OPcache
- Proxy Support
## 🐳 Installing with Docker
@ -65,6 +66,20 @@ Now just run `docker compose up -d`
- S3 Cache: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Maintenance: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
To block domains from DMCA requests, create the file `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "This content has been blocked on request"
}
]
```
## 🚀 Integrations
- 🤖 **Telegram**: [Official Bot](https://t.me/leissoai_bot)

View file

@ -25,8 +25,10 @@ Instancia publica em [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Remove elementos indesejados
- Cache, cache!
- Bloqueia domínios que você não quer
- Proteção DMCA com mensagens personalizadas
- Permite configurar headers e cookies do seu jeito
- PHP-FPM e OPcache
- Suporte a Proxy
## 🐳 Instalando em Docker
@ -65,6 +67,19 @@ Agora só rodar `docker compose up -d`
- Cache S3: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Manutenção: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
### 🛡️ DMCA
Para bloquear dominios por pedidos de DMCA, crie o arquivo `app/cache/dmca_domains.json`:
```json
[
{
"host": "exemplo.com.br",
"message": "Este conteúdo foi bloqueado a pedido"
}
]
```
## 🚀 Integrações
- 🤖 **Telegram**: [Bot oficial](https://t.me/leissoai_bot)

View file

@ -47,3 +47,11 @@ DEBUG=false
# Number of days to keep cache files (*.gz)
# If not set, no files will be cleaned
CLEANUP_DAYS=7
# Proxy List Configuration
# URL to download proxy list from (used by bin/proxy script)
# The proxy list should contain proxies in one of these formats:
# 1. http://USER:PASSWORD@HOST:PORT
# 2. IP:PORT:USER:PASSWORD
# Example: PROXY_LIST=https://example.com/proxy-list.txt
PROXY_LIST=

View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/assets/icons/sun.svg Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View file

@ -101,4 +101,20 @@ document.addEventListener('DOMContentLoaded', function () {
}
});
}
// Dark mode
const themeToggle = document.getElementById('themeToggle');
const html = document.documentElement;
const savedTheme = localStorage.getItem('theme') || 'light';
html.setAttribute('data-theme', savedTheme);
if (themeToggle) {
themeToggle.addEventListener('click', () => {
const currentTheme = html.getAttribute('data-theme');
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
html.setAttribute('data-theme', newTheme);
localStorage.setItem('theme', newTheme);
});
}
});

View file

@ -1,10 +1,10 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-500.eot');
src: url('/dist/fonts/inter-500.eot');
src: local('Inter Medium'), local('Inter-Medium'),
url('../dist/fonts/inter-500.woff2') format('woff2'),
url('../dist/fonts/inter-500.woff') format('woff'),
url('../dist/fonts/inter-500.ttf') format('truetype');
url('/dist/fonts/inter-500.woff2') format('woff2'),
url('/dist/fonts/inter-500.woff') format('woff'),
url('/dist/fonts/inter-500.ttf') format('truetype');
font-weight: 500;
font-style: normal;
font-display: swap;
@ -12,11 +12,11 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-600.eot');
src: url('/dist/fonts/inter-600.eot');
src: local('Inter SemiBold'), local('Inter-SemiBold'),
url('../dist/fonts/inter-600.woff2') format('woff2'),
url('../dist/fonts/inter-600.woff') format('woff'),
url('../dist/fonts/inter-600.ttf') format('truetype');
url('/dist/fonts/inter-600.woff2') format('woff2'),
url('/dist/fonts/inter-600.woff') format('woff'),
url('/dist/fonts/inter-600.ttf') format('truetype');
font-weight: 600;
font-style: normal;
font-display: swap;
@ -24,11 +24,11 @@
@font-face {
font-family: 'unna';
src: url('../dist/fonts/unna-400.eot');
src: url('/dist/fonts/unna-400.eot');
src: local('Unna Regular'), local('Unna-Regular'),
url('../dist/fonts/unna-400.woff2') format('woff2'),
url('../dist/fonts/unna-400.woff') format('woff'),
url('../dist/fonts/unna-400.ttf') format('truetype');
url('/dist/fonts/unna-400.woff2') format('woff2'),
url('/dist/fonts/unna-400.woff') format('woff'),
url('/dist/fonts/unna-400.ttf') format('truetype');
font-weight: 400;
font-style: normal;
font-display: swap;

View file

@ -40,4 +40,7 @@
@include mixin.icon('hamburguer', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('close', 'invert(100%) sepia(32%) saturate(8%) hue-rotate(23deg) brightness(102%) contrast(100%)');
@include mixin.icon('paste', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('paste', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('sun', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('moon', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');

View file

@ -17,7 +17,7 @@
@mixin icon($name, $filter) {
.icon--#{$name} {
background-image: url("../dist/icons/#{$name}.svg");
background-image: url("/dist/icons/#{$name}.svg");
filter: #{$filter};
}
}

View file

@ -27,10 +27,25 @@
--font-weight: 500;
--line-height: 160%;
/* Light theme colors */
@include mixin.create-color('marreta', #3B82F6);
@include mixin.create-color('text', #484848);
@include mixin.create-color('textmuted', #818181);
@include mixin.create-color('link', #3B82F6);
/* Theme-aware colors */
--background: #ffffff;
--surface: #F4F4F5;
--surface-hover: #e4e4e7;
--border: #e4e4e7;
--header-text: #000000;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #333333;
--nav-desktop-hover: #007bff;
--input-bg: #F4F4F5;
--toast-error: rgb(247, 102, 97);
--toast-warning: rgb(247, 152, 97);
--container_spacing: 24px;
@include mixin.devices(desktop) {
@ -38,6 +53,27 @@
}
}
/* Dark theme */
[data-theme="dark"] {
@include mixin.create-color('marreta', #60A5FA);
@include mixin.create-color('text', #e5e5e5);
@include mixin.create-color('textmuted', #a1a1aa);
@include mixin.create-color('link', #60A5FA);
--background: #000;
--surface: #1f1f1f;
--surface-hover: #2a2a2a;
--border: #2a2a2a;
--header-text: #ffffff;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #e5e5e5;
--nav-desktop-hover: #60A5FA;
--input-bg: #1f1f1f;
--toast-error: rgb(220, 38, 127);
--toast-warning: rgb(245, 158, 11);
}
html {
scroll-behavior: smooth;
}

View file

@ -46,22 +46,22 @@ body {
}
&--error {
background-color: rgb(247, 102, 97);
background-color: var(--toast-error);
}
&--warning {
background-color: rgb(247, 152, 97);
background-color: var(--toast-warning);
}
}
header {
display: grid;
grid-template-columns: auto 1fr 1fr;
grid-template-columns: auto 1fr auto 1fr;
align-items: center;
padding: 0 0 42px 0;
@include mixin.devices(desktop) {
grid-template-columns: 1fr 2fr 1fr;
grid-template-columns: 1fr 2fr auto 1fr;
}
&.open {
@ -126,7 +126,77 @@ header {
h1 {
font-family: var(--font-family-unna);
color: #000;
color: var(--header-text);
}
}
.fast_buttons {
display: flex;
gap: 8px;
}
.theme-controls {
display: flex;
justify-content: center;
align-items: center;
padding: 0 16px;
@include mixin.devices(desktop) {
padding: 0;
}
.theme-toggle {
background: none;
border: 2px solid var(--border);
border-radius: 50%;
width: 40px;
height: 40px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
position: relative;
transition: all 0.3s ease;
&:hover {
border-color: var(--marreta);
background-color: var(--surface-hover);
}
.icon {
width: 18px;
height: 18px;
position: absolute;
transition: all 0.3s ease;
&--sun {
opacity: 1;
transform: rotate(0deg) scale(1);
}
&--moon {
opacity: 0;
transform: rotate(180deg) scale(0.8);
}
[data-theme="dark"] & {
filter: invert(1);
}
}
[data-theme="dark"] & {
.icon {
&--sun {
opacity: 0;
transform: rotate(-180deg) scale(0.8);
}
&--moon {
opacity: 1;
transform: rotate(0deg) scale(1);
}
}
}
}
}
@ -137,7 +207,7 @@ header {
left: 0;
right: 0;
bottom: 0;
background-color: var(--marreta);
background-color: var(--nav-mobile-bg);
padding: var(--container_spacing) var(--container_spacing) calc(4*var(--container_spacing)) var(--container_spacing);
z-index: 500;
align-items: flex-end;
@ -172,20 +242,20 @@ header {
font-size: 24px;
padding: 16px 0;
border-bottom: 1px solid rgba(255, 255, 255, 0.24);
color: #fff;
color: var(--nav-mobile-text);
text-decoration: none;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
font-size: initial;
padding: 0;
border-bottom: 0;
}
&:hover {
color: #fff;
color: var(--nav-mobile-text);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -204,14 +274,14 @@ header {
color: rgba(255,255,255,0.5);
padding: 0;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
cursor: pointer;
}
&:hover {
color: rgba(255,255,255,0.5);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -222,8 +292,8 @@ header {
top: 110%;
left: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -246,7 +316,7 @@ header {
font-weight: 600;
@include mixin.devices(desktop) {
background-color: #fff;
background-color: var(--background);
margin-top: 0;
margin-bottom: 4px;
padding: 8px 16px;
@ -358,8 +428,8 @@ header {
top: 110%;
right: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -388,7 +458,7 @@ header {
font-weight: 600;
display: block;
padding: 8px 16px;
background-color: #fff;
background-color: var(--background);
display: flex;
align-items: center;
@ -408,7 +478,7 @@ header {
&.open {
.extension__toggle {
background-color: #F4F4F5;
background-color: var(--surface);
color: var(--textmuted);
}
@ -428,7 +498,7 @@ main {
font-size: 64px;
line-height: 61.44px;
text-align: center;
color: #000;
color: var(--header-text);
max-width: 512px;
margin: 0 auto;
}
@ -452,7 +522,7 @@ main {
.fields {
&::before {
content: '';
background-image: url(../assets/images/wall.png);
background-image: url(/assets/images/wall.png);
background-repeat: no-repeat;
background-size: 100% 100%;
width: 422px;
@ -461,6 +531,11 @@ main {
top: -110px;
right: -180px;
z-index: 1;
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
max-width: 470px;
@ -485,7 +560,7 @@ main {
}
input {
background-color: #F4F4F5;
background-color: var(--input-bg);
padding: 16px 0 16px 44px;
border: 0;
border-radius: 8px;
@ -493,12 +568,13 @@ main {
box-sizing: border-box;
position: relative;
line-height: 1.3em;
color: var(--text);
}
}
.paste {
background: rgb(244,244,245);
background: linear-gradient(90deg, rgba(244,244,245,0) 0%, rgba(244,244,245,1) 30%, rgba(244,244,245,1) 100%);
background: var(--input-bg);
background: linear-gradient(90deg, transparent 0%, var(--input-bg) 30%, var(--input-bg) 100%);
align-items: center;
z-index: 3;
position: absolute;
@ -508,8 +584,13 @@ main {
cursor: pointer;
height: 48px;
display: flex;
.icon {
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
}
@ -557,7 +638,7 @@ main {
.plus {
z-index: 3;
position: relative;
background-color: rgba(244, 244, 245, 1);
background-color: var(--surface);
margin-left: calc(-1*var(--container_spacing));
margin-right: calc(-1*var(--container_spacing));

View file

@ -9,7 +9,7 @@
* If CLEANUP_DAYS is not set, no files will be cleaned.
*/
require_once __DIR__ . '/../app/vendor/autoload.php';
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
@ -23,7 +23,7 @@ $climate->br();
$cleanupDays = 0;
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/../app');
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
$cleanupDays = $_ENV['CLEANUP_DAYS'];
@ -33,7 +33,7 @@ try {
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../app/cache');
define('CACHE_DIR', __DIR__ . '/../cache');
}
if ($cleanupDays == 0) {

196
app/bin/proxy Normal file
View file

@ -0,0 +1,196 @@
#!/usr/bin/env php
<?php
/**
* Proxy List Cache Updater
*
* Downloads proxy list from the URL specified in the PROXY_LIST environment variable
* and stores it in the cache directory for reuse.
* This script should be run daily via cron to keep the proxy list updated.
*
* Supported proxy list formats:
* 1. http://USER:PASSWORD@HOST:PORT
* 2. IP:PORT:USER:PASSWORD
*/
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Curl\Curl;
$climate = new CLImate();
$climate->bold()->out('Proxy List Cache Updater');
$climate->br();
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../cache');
}
if (!isset($_ENV['PROXY_LIST']) || empty($_ENV['PROXY_LIST'])) {
$climate->yellow()->out('PROXY_LIST environment variable not set. No proxies to cache.');
exit(0);
}
$proxyListUrl = $_ENV['PROXY_LIST'];
$proxyCachePath = CACHE_DIR . '/proxy_list.json';
// Download proxy list from URL
$climate->out('Downloading proxy list from: ' . $proxyListUrl);
$proxyList = downloadProxyList($proxyListUrl, $climate);
if ($proxyList === false) {
$climate->red()->out('Failed to download proxy list from URL: ' . $proxyListUrl);
exit(1);
}
$climate->green()->out('Proxy list downloaded successfully (' . strlen($proxyList) . ' bytes)');
if (!is_dir(CACHE_DIR)) {
if (!mkdir(CACHE_DIR, 0755, true)) {
$climate->red()->out('Failed to create cache directory: ' . CACHE_DIR);
exit(1);
}
}
$climate->out('Parsing proxy list from environment variable...');
$proxies = parseProxyList($proxyList);
if (empty($proxies)) {
$climate->red()->out('No valid proxies found in PROXY_LIST. Supported formats are:');
$climate->red()->out('1. http://USER:PASSWORD@HOST:PORT');
$climate->red()->out('2. IP:PORT:USER:PASSWORD');
exit(1);
}
$climate->out('Found ' . count($proxies) . ' valid proxies.');
if (file_put_contents($proxyCachePath, json_encode($proxies))) {
$climate->green()->out('Proxy list successfully cached to: ' . $proxyCachePath);
} else {
$climate->red()->out('Failed to write proxy list to cache file: ' . $proxyCachePath);
exit(1);
}
/**
* Parse proxy list from environment variable
*
* @param string $proxyListString Proxy list in format http://USER:PASSWORD@HOST:PORT or IP:PORT:USER:PASSWORD
* @return array Array of valid proxy URLs
*/
function parseProxyList($proxyListString) {
$proxies = [];
$lines = preg_split('/[\r\n,]+/', $proxyListString);
foreach ($lines as $line) {
$line = trim($line);
if (empty($line)) continue;
// Format 1: http://USER:PASSWORD@HOST:PORT
if (preg_match('/^https?:\/\/[^:]+:[^@]+@[^:]+:\d+$/i', $line)) {
$proxies[] = $line;
continue;
}
// Format 2: IP:PORT:USER:PASSWORD
if (preg_match('/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+):([^:]+):(.+)$/', $line, $matches)) {
$ip = $matches[1];
$port = $matches[2];
$user = $matches[3];
$password = $matches[4];
// Convert to standard format
$proxies[] = "http://{$user}:{$password}@{$ip}:{$port}";
}
}
return $proxies;
}
/**
* Download proxy list from URL using php-curl-class
*
* @param string $url URL to download proxy list from
* @param CLImate $climate CLImate instance for output
* @return string|false Downloaded content or false on failure
*/
function downloadProxyList($url, $climate = null) {
$curl = new Curl();
// Configure cURL options
$curl->setTimeout(30);
$curl->setConnectTimeout(10);
$curl->setUserAgent('Marreta Proxy Updater/1.0');
$curl->setHeader('Accept', 'text/plain, text/html, */*');
$curl->setHeader('Accept-Encoding', 'gzip, deflate');
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 3);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false);
try {
if ($climate) {
$climate->out('Making HTTP request with php-curl-class...');
}
$curl->get($url);
if ($curl->error) {
$errorMsg = 'cURL request failed: ' . $curl->errorMessage . ' (Code: ' . $curl->errorCode . ')';
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
}
$statusCode = $curl->httpStatusCode;
if ($climate) {
$climate->out('HTTP Status Code: ' . $statusCode);
}
if ($statusCode === 200) {
$content = $curl->response;
if ($climate) {
$contentType = $curl->responseHeaders['Content-Type'] ?? 'unknown';
$climate->out('Content-Type: ' . $contentType);
$climate->out('Content-Length: ' . strlen($content) . ' bytes');
}
return $content;
}
if ($climate) {
$climate->yellow()->out('Unexpected HTTP status code: ' . $statusCode);
}
return false;
} catch (\Exception $e) {
$errorMsg = 'Unexpected error during download: ' . $e->getMessage();
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
} finally {
$curl->close();
}
}

View file

@ -21,11 +21,6 @@ try {
'SITE_URL'
])->notEmpty();
// Validate URL format
if (!filter_var($_ENV['SITE_URL'], FILTER_VALIDATE_URL)) {
throw new Exception('SITE_URL must be a valid URL');
}
// Core system settings
define('SITE_NAME', $_ENV['SITE_NAME']);
define('SITE_DESCRIPTION', $_ENV['SITE_DESCRIPTION']);
@ -66,6 +61,16 @@ try {
define('BLOCKED_DOMAINS', require __DIR__ . '/data/blocked_domains.php');
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');
// Load DMCA domains from JSON file
$dmcaDomainsFile = __DIR__ . '/cache/dmca_domains.json';
if (file_exists($dmcaDomainsFile)) {
$dmcaDomainsJson = file_get_contents($dmcaDomainsFile);
$dmcaDomains = json_decode($dmcaDomainsJson, true);
define('DMCA_DOMAINS', is_array($dmcaDomains) ? $dmcaDomains : []);
} else {
define('DMCA_DOMAINS', []);
}
} catch (Dotenv\Exception\ValidationException $e) {
die('Environment Error: ' . $e->getMessage());

View file

@ -13,6 +13,7 @@
* - classAttrRemove: Array of classes to be removed from elements
* - customCode: String containing custom JavaScript code
* - customStyle: String containing custom CSS code
* - proxy: Enable proxy in Guzzle or Selenium requests
* - excludeGlobalRules: Associative array of global rules to exclude for this domain
* Example:
* 'excludeGlobalRules' => [
@ -70,16 +71,19 @@ return [
'gauchazh.clicrbs.com.br' => [
'idElementRemove' => ['paywallTemplate'],
'classAttrRemove' => ['m-paid-content', 'paid-content-apply'],
'scriptTagRemove' => ['vendors-8'],
'scriptTagRemove' => ['vendors-9','vendors-10','vendors-11'],
'excludeGlobalRules' => [
'classElementRemove' => ['paid-content']
],
'fetchStrategies' => 'fetchFromSelenium',
'proxy' => true,
],
'reuters.com' => [
'classElementRemove' => ['leaderboard__container'],
'fetchStrategies' => 'fetchFromSelenium',
],
'cnn.com' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'lepoint.fr' => [
'classElementRemove' => ['paywall'],
],
@ -182,6 +186,12 @@ return [
'nzherald.co.nz' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'onetz.de' => [
'idElementRemove' => ['checkout-container'],
'classElementRemove' => ['tp-backdrop','dm-nobg'],
'classAttrRemove' => ['field-dnt-body-pp'],
'scriptTagRemove' => ['.js'],
],
'opovo.com.br' => [
'classElementRemove' => ['screen-loading', 'overlay-advise']
],
@ -261,7 +271,7 @@ return [
position: relative !important;
}
',
'fetchStrategies' => 'fetchFromSelenium',
'fetchStrategies' => 'fetchFromWaybackMachine',
'excludeGlobalRules' => [
'scriptTagRemove' => [
'gtm.js',
@ -618,7 +628,8 @@ return [
const headimage = document.querySelectorAll(\'div .aspect-custom\');
headimage.forEach(image => { image.style.filter = \'\'; });
});
'
',
'idElementRemove' => ['wall-bottom-drawer-container']
],
'usatoday.com' => [
'customCode' => '
@ -628,6 +639,12 @@ return [
});
'
],
'stcatharinesstandard.ca' => [
'proxy' => true,
'idElementRemove' => 'access-offers-modal',
'classElementRemove' => 'modal-backdrop',
'classAttrRemove' => ' modal-open'
],
'medium.com' => [
'headers' => [
'Referer' => 'https://t.co/x?amp=1',
@ -714,6 +731,7 @@ return [
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache'
],
'proxy' => true,
'idElementRemove' => ['test-id-1', 'paywall'],
'classElementRemove' => ['test-class-1'],
'scriptTagRemove' => ['analytics.js', 'test-script.js', 'paywall.js'],

View file

@ -7,6 +7,7 @@
* using the 'excludeGlobalRules' configuration in domain_rules.php
*/
return [
'proxy' => false,
// Classes to be removed from all pages:
'classElementRemove' => [
'subscription',

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3
app/dist/icons/moon.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/dist/icons/sun.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -1,2 +1,2 @@
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let r=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),r(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),r(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))});e=document.getElementById("paste");let a=document.getElementById("url");e&&a&&e.addEventListener("click",async e=>{e.preventDefault();try{var t=await navigator.clipboard.readText();a.value=t.trim()}catch(e){console.error("Failed to read clipboard contents",e)}})});
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let a=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),a(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),a(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))});e=document.getElementById("paste");let r=document.getElementById("url");e&&r&&e.addEventListener("click",async e=>{e.preventDefault();try{var t=await navigator.clipboard.readText();r.value=t.trim()}catch(e){console.error("Failed to read clipboard contents",e)}});n=document.getElementById("themeToggle");let c=document.documentElement;e=localStorage.getItem("theme")||"light";c.setAttribute("data-theme",e),n&&n.addEventListener("click",()=>{var e="dark"===c.getAttribute("data-theme")?"light":"dark";c.setAttribute("data-theme",e),localStorage.setItem("theme",e)})});
//# sourceMappingURL=scripts.js.map

File diff suppressed because one or more lines are too long

View file

@ -32,7 +32,8 @@ class Rules
'fromGoogleBot',
'removeElementsByTag',
'removeCustomAttr',
'urlMods'
'urlMods',
'proxy'
];
/**
@ -92,6 +93,35 @@ class Rules
return $this->getGlobalRules();
}
/**
* Retrieves merged rules for a domain
* @param string $domain Target domain
* @return array|null Combined ruleset or global rules
*/
public function hasDomainRules($domain)
{
$domainParts = $this->getDomainParts($domain);
// Check for exact domain match first
foreach ($this->domainRules as $pattern => $rules) {
if ($this->getBaseDomain($domain) === $this->getBaseDomain($pattern)) {
return true;
}
}
// Check for partial domain matches
foreach ($domainParts as $part) {
foreach ($this->domainRules as $pattern => $rules) {
if ($part === $this->getBaseDomain($pattern)) {
return true;
}
}
}
return false;
}
/**
* Combines domain rules with global configuration
* @param array $rules Domain-specific rules
@ -109,12 +139,14 @@ class Rules
if (isset($excludeGlobalRules[$ruleType])) {
if (is_assoc_array($globalTypeRules)) {
$mergedRules[$ruleType] = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$result = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$mergedRules[$ruleType] = is_array($result) ? $result : [];
} else {
$mergedRules[$ruleType] = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$result = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$mergedRules[$ruleType] = is_array($result) ? $result : [];
}
} else {
$mergedRules[$ruleType] = $globalTypeRules;
$mergedRules[$ruleType] = is_array($globalTypeRules) ? $globalTypeRules : [];
}
}
@ -127,10 +159,13 @@ class Rules
}
if (in_array($ruleType, ['cookies', 'headers'])) {
$mergedRules[$ruleType] = array_merge($mergedRules[$ruleType], $domainTypeRules);
$mergedRules[$ruleType] = array_merge(
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
is_array($domainTypeRules) ? $domainTypeRules : []
);
} else {
$mergedRules[$ruleType] = array_values(array_unique(array_merge(
$mergedRules[$ruleType],
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
(array)$domainTypeRules
)));
}

View file

@ -50,6 +50,31 @@ class URLAnalyzer extends URLAnalyzerBase
*/
public function analyze($url)
{
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
// Check if URL contains restricted keywords
if ($this->isRestrictedUrl($url)) {
Logger::getInstance()->logUrl($url, 'RESTRICTED_URL');
$this->error->throwError(self::ERROR_RESTRICTED_URL, '');
}
$originalHost = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in DMCA list FIRST (before any HTTP requests)
foreach (DMCA_DOMAINS as $dmcaEntry) {
if (is_array($dmcaEntry) && isset($dmcaEntry['host'])) {
if ($dmcaEntry['host'] === $host || $dmcaEntry['host'] === $originalHost) {
Logger::getInstance()->logUrl($url, 'DMCA_DOMAIN');
$customMessage = isset($dmcaEntry['message']) ? $dmcaEntry['message'] : '';
$this->error->throwError(self::ERROR_DMCA_DOMAIN, $customMessage);
}
}
}
// Reset activated rules for new analysis
$this->activatedRules = [];
@ -60,27 +85,25 @@ class URLAnalyzer extends URLAnalyzerBase
return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url);
}
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in blocked list
if (in_array($host, BLOCKED_DOMAINS)) {
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
$this->error->throwError(self::ERROR_BLOCKED_DOMAIN, '');
}
// Check HTTP status and handle any errors
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
if ($redirectInfo['httpCode'] === 404) {
$this->error->throwError(self::ERROR_NOT_FOUND, '');
} else {
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
// Check if domain has specific rules by looking for domain-specific configurations
$hasCustomRules = $this->hasDomainRules($host);
// Check HTTP status and handle any errors only if domain doesn't have custom rules
if (!$hasCustomRules) {
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
if ($redirectInfo['httpCode'] === 404) {
$this->error->throwError(self::ERROR_NOT_FOUND, '');
} else {
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
}
}
}

View file

@ -22,23 +22,27 @@ class URLAnalyzerBase
/** @var string Error constants for different failure scenarios */
const ERROR_INVALID_URL = 'INVALID_URL';
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
const ERROR_DMCA_DOMAIN = 'DMCA_DOMAIN';
const ERROR_NOT_FOUND = 'NOT_FOUND';
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
const ERROR_RESTRICTED_URL = 'RESTRICTED_URL';
/** @var array Maps error types to HTTP codes and message keys */
protected $errorMap = [
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
self::ERROR_DMCA_DOMAIN => ['code' => 403, 'message_key' => 'DMCA_DOMAIN'],
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR'],
self::ERROR_RESTRICTED_URL => ['code' => 403, 'message_key' => 'RESTRICTED_URL']
];
/** @var array List of user agents to rotate through, including Googlebot */
@ -113,4 +117,59 @@ class URLAnalyzerBase
{
return $this->rules->getDomainRules($domain);
}
/**
* Check if domain has specific rules
* @param string $host The domain host to check
* @return bool True if domain has custom rules, false otherwise
*/
protected function hasDomainRules($domain)
{
return $this->rules->hasDomainRules($domain);
}
/**
* Check if URL contains restricted keywords
* @param string $url The URL to check
* @return bool True if URL contains restricted keywords, false otherwise
*/
protected function isRestrictedUrl($url)
{
$restrictedKeywords = [
'login',
'signin',
'sign-in',
'signup',
'sign-up',
'register',
'registration',
'lost-password',
'forgot-password',
'reset-password',
'password',
'auth',
'authentication',
'account',
'profile',
'dashboard',
'admin',
'member',
'subscription',
'subscribe',
'premium',
'checkout',
'payment',
'billing'
];
$urlLower = strtolower($url);
foreach ($restrictedKeywords as $keyword) {
if (strpos($urlLower, $keyword) !== false) {
return true;
}
}
return false;
}
}

View file

@ -14,10 +14,17 @@ class URLAnalyzerError extends URLAnalyzerBase
public function throwError($errorType, $additionalInfo = '')
{
$errorConfig = $this->errorMap[$errorType];
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo) {
$message .= ': ' . $additionalInfo;
// For DMCA domains, use custom message if provided, otherwise use default
if ($errorType === self::ERROR_DMCA_DOMAIN && !empty($additionalInfo)) {
$message = $additionalInfo;
} else {
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo && $errorType !== self::ERROR_DMCA_DOMAIN) {
$message .= ': ' . $additionalInfo;
}
}
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
}
}

View file

@ -21,10 +21,44 @@ class URLAnalyzerFetch extends URLAnalyzerBase
/**
* Sets up the fetch handler with error handling capability
*/
/** @var array List of available proxies */
private $proxyList = [];
/** @var string Path to proxy cache file */
private $proxyCachePath = '';
public function __construct()
{
parent::__construct();
$this->error = new URLAnalyzerError();
$this->proxyCachePath = __DIR__ . '/../../cache/proxy_list.json';
$this->loadProxyList();
}
/**
* Loads proxy list from cache if available
*/
private function loadProxyList()
{
if (isset($_ENV['PROXY_LIST']) && file_exists($this->proxyCachePath)) {
$cachedList = file_get_contents($this->proxyCachePath);
if (!empty($cachedList)) {
$this->proxyList = json_decode($cachedList, true);
}
}
}
/**
* Gets a random proxy from the list
* @return string|null Random proxy URL or null if none available
*/
private function getRandomProxy()
{
if (empty($this->proxyList)) {
return null;
}
return $this->proxyList[array_rand($this->proxyList)];
}
/**
@ -45,27 +79,22 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$urlParts = parse_url($url);
// Handle query modifications
if (isset($domainRules['urlMods']['query']) && is_array($domainRules['urlMods']['query'])) {
$queryParams = [];
// Parse existing query parameters if any
if (isset($urlParts['query'])) {
parse_str($urlParts['query'], $queryParams);
}
// Apply query modifications
foreach ($domainRules['urlMods']['query'] as $queryMod) {
if (isset($queryMod['key']) && isset($queryMod['value'])) {
$queryParams[$queryMod['key']] = $queryMod['value'];
}
}
// Rebuild query string
$urlParts['query'] = http_build_query($queryParams);
}
// Rebuild URL
$modifiedUrl = '';
if (isset($urlParts['scheme'])) {
@ -114,7 +143,6 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$host = preg_replace('/^www\./', '', $host);
$domainRules = $this->getDomainRules($host);
// Apply URL modifications if any
$url = $this->applyUrlModifications($url, $domainRules);
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
@ -123,6 +151,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_DNS_SERVERS, implode(',', $this->dnsServers));
$curl->setOpt(CURLOPT_ENCODING, '');
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->setHeaders([
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@ -172,7 +207,6 @@ class URLAnalyzerFetch extends URLAnalyzerBase
*/
public function fetchFromWaybackMachine($url)
{
// Apply URL modifications if any
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
@ -188,6 +222,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 10);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($availabilityUrl);
@ -212,6 +253,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 10);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($archiveUrl);
@ -235,7 +283,6 @@ class URLAnalyzerFetch extends URLAnalyzerBase
{
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
// Apply URL modifications if any
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
@ -243,16 +290,25 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$url = $this->applyUrlModifications($url, $domainRules);
}
$useProxy = isset($domainRules['proxy']) && $domainRules['proxy'] === true;
$proxy = $useProxy ? $this->getRandomProxy() : null;
if ($browser === 'chrome') {
$options = new ChromeOptions();
$options->addArguments([
$arguments = [
'--headless',
'--disable-gpu',
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-images',
'--blink-settings=imagesEnabled=false'
]);
];
if ($useProxy && $proxy) {
$arguments[] = '--proxy-server=' . $proxy;
}
$options->addArguments($arguments);
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
@ -264,6 +320,22 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com");
$profile->setPreference("network.http.referer.spoofSource", true);
$profile->setPreference("network.http.referer.trimmingPolicy", 0);
if ($useProxy && $proxy) {
$proxyParts = parse_url($proxy);
if (isset($proxyParts['host']) && isset($proxyParts['port'])) {
$profile->setPreference("network.proxy.type", 1);
$profile->setPreference("network.proxy.http", $proxyParts['host']);
$profile->setPreference("network.proxy.http_port", $proxyParts['port']);
$profile->setPreference("network.proxy.ssl", $proxyParts['host']);
$profile->setPreference("network.proxy.ssl_port", $proxyParts['port']);
if (isset($proxyParts['user']) && isset($proxyParts['pass'])) {
$profile->setPreference("network.proxy.username", $proxyParts['user']);
$profile->setPreference("network.proxy.password", $proxyParts['pass']);
}
}
}
$options = new FirefoxOptions();
$options->setProfile($profile);

View file

@ -18,7 +18,17 @@ class URLAnalyzerUtils extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 5);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_NOBODY, true);
$curl->setUserAgent($this->getRandomUserAgent());
$curl->setOpt(CURLOPT_DNS_SERVERS, '8.8.8.8,8.4.4.8');
$curl->setHeaders([
'User-Agent' => 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache',
'DNT' => '1',
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
'From' => 'googlebot(at)googlebot.com'
]);
$curl->get($url);
if ($curl->error) {

View file

@ -21,6 +21,10 @@ return [
'message' => 'Diese Seite ist nicht erlaubt.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Die angeforderte Website kann aufgrund von Anfragen ihrer Eigentümer nicht angezeigt werden.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'DNS für die Domain konnte nicht aufgelöst werden. Bitte überprüfe, ob die URL korrekt ist.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Bei der Bearbeitung Ihrer Anfrage ist ein Fehler aufgetreten.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Diese URL enthält eingeschränkten Inhalt und kann aus Sicherheitsgründen nicht verarbeitet werden.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'This domain is blocked for extraction.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'The requested website cannot be displayed due to requests from its owners.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Failed to resolve DNS for the domain. Please verify if the URL is correct.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'An error occurred while processing your request.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'This URL contains restricted content and cannot be processed for security reasons.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este dominio está bloqueado para extracción.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'El sitio web solicitado no se puede mostrar debido a las solicitudes de sus propietarios.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Error al resolver DNS para el dominio. Verifique si la URL es correcta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocurrió un error al procesar su solicitud.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contiene contenido restringido y no se puede procesar por razones de seguridad.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este domínio está bloqueado para extração.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'O site solicitado não pode ser exibido por exigência dos seus proprietários.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Falha ao resolver DNS para o domínio. Verifique se a URL está correta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocorreu um erro ao processar sua solicitação.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contém conteúdo restrito e não pode ser processada por motivos de segurança.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Этот домен заблокирован для извлечения.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Запрошенный веб-сайт не может быть отображен по запросу его владельцев.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Не удалось разрешить DNS для домена. Проверьте правильность URL.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'При обработке вашего запроса произошла ошибка.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Этот URL-адрес содержит запрещенный контент и не может быть обработан по соображениям безопасности.',
'type' => 'error'
]
]
];

View file

@ -5,6 +5,7 @@ namespace App;
use Inc\Language;
use Inc\URLAnalyzer;
use Inc\URLAnalyzer\URLAnalyzerException;
use Inc\Cache;
/**
* URL Processor
@ -109,6 +110,19 @@ class URLProcessor
} else {
if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
$this->redirect(trim($additionalInfo), $errorType);
} elseif ($errorType === URLAnalyzer::ERROR_DMCA_DOMAIN) {
// For DMCA domains, show the custom message directly instead of redirecting
Language::init(LANGUAGE);
$message = $e->getMessage();
$message_type = 'error';
$url = ''; // Initialize url variable for the view
// Initialize cache for counting
$cache = new \Inc\Cache();
$cache_folder = $cache->getCacheFileCount();
require __DIR__ . '/views/home.php';
exit;
}
$this->redirect(SITE_URL, $errorType);
}

View file

@ -60,11 +60,19 @@
</div>
</div>
</nav>
<div class="extension">
<button class="extension__toggle"><?php echo \Inc\Language::get('nav_extension'); ?></button>
<div class="extension__menu">
<a target="_blank" href="https://addons.mozilla.org/pt-BR/firefox/addon/marreta/"><span class="name">Firefox</span><span class="icon icon--firefox"></span></a>
<a target="_blank" href="https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
<div class="fast_buttons">
<div class="extension">
<button class="extension__toggle"><?php echo \Inc\Language::get('nav_extension'); ?></button>
<div class="extension__menu">
<a target="_blank" href="https://addons.mozilla.org/pt-BR/firefox/addon/marreta/"><span class="name">Firefox</span><span class="icon icon--firefox"></span></a>
<a target="_blank" href="https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
</div>
</div>
<div class="theme-controls">
<button class="theme-toggle" id="themeToggle">
<span class="icon icon--sun"></span>
<span class="icon icon--moon"></span>
</button>
</div>
</div>
</header>

View file

@ -17,6 +17,7 @@ services:
- LOG_LEVEL=${LOG_LEVEL:-WARNING}
- SELENIUM_HOST=${SELENIUM_HOST:-selenium-hub:4444}
- CLEANUP_DAYS=7 # Optional
- PROXY_LIST=url # Optional
restart: unless-stopped
# Selenium
selenium-hub:

View file

@ -116,6 +116,14 @@ log_success "Cron started"
echo -e "\n${GREEN}=== Marreta initialized ===${NC}\n"
# Run proxy list updater
log_info "Running proxy list updater..."
if php /app/bin/proxy; then
log_success "Proxy list updater completed successfully"
else
log_info "Proxy list updater finished (may not have been configured)"
fi
# Wait for any process to exit
wait -n