Compare commits

...

34 commits
2.0.1 ... main

Author SHA1 Message Date
Renan Bernordi
ef2827a6d2 add cnn selenium 2025-08-16 21:56:20 -03:00
Renan Bernordi
734acedecb fix validate url 2025-08-16 21:53:57 -03:00
Renan Bernordi
7c01bce35f darkmode 2025-07-17 00:54:38 -03:00
Renan Bernordi
bbcbdff8bc add 2025-07-17 00:33:54 -03:00
Renan Bernordi
33b437d8fe fix fetcj 2025-07-17 00:26:05 -03:00
Renan Bernordi
2071d5c2bc add restrict urls 2025-07-06 19:32:52 -03:00
Renan Bernordi
0a57629cff fix bin tasks 2025-07-06 19:25:01 -03:00
Renan Bernordi
4d458fb75f css fixes 2025-06-26 18:32:07 -03:00
Renan Bernordi
deea4d6a2a fixing cli commands 2025-06-26 18:12:51 -03:00
Renan Bernordi
22e836b707 add dmca domains block 2025-06-26 17:38:05 -03:00
Renan Bernordi
01237362c5 zh, teste 2025-05-30 01:00:38 -03:00
Renan Bernordi
08ba5eb1a6 stcatharinesstandard, primeiro teste proxy 2025-05-30 00:58:15 -03:00
Renan Bernordi
80a0bec993 ajuste wp 2025-05-30 00:53:39 -03:00
Renan Bernordi
86be4a69a5 rodar proxy list 2025-05-30 00:52:57 -03:00
Renan Bernordi
33a7569d17 ajuste no comando inicial 2025-05-30 00:46:29 -03:00
Renan Bernordi
3e99e34fa7 validação de regras e proxy 2025-05-27 23:20:22 -03:00
Renan Bernordi
b283965299 adicionado suporte a lista de proxy 2025-05-26 16:39:54 -03:00
Renan Bernordi
86e6c9b838 integração com regras do periscope 2025-05-26 13:15:08 -03:00
Renan Bernordi
99258b0376 nova regra de modificador de url 2025-05-26 13:14:55 -03:00
Renan Bernordi
ee6f57aa43 marreta recursiva #36 2025-05-02 10:36:30 -03:00
Renan Bernordi
5409407833 autofocus #34 2025-05-02 10:33:43 -03:00
Renan Bernordi
f09a861cd1 novas regras de dominios, issue #33 2025-03-04 17:51:15 -03:00
Renan Bernordi
7d449b5229 delete sqlite 2025-03-04 17:50:07 -03:00
Renan Bernordi
5ca8403afc função de limpeza de cache 2025-02-28 17:15:10 -03:00
Renan Bernordi
91176050c0 adicionada ferramenta para limpar cache 2025-02-28 11:29:46 -03:00
Renan Bernordi
abb1966b33 ajuste (Package 'sqlite3', required by 'virtual:world', not found) 2025-02-28 11:01:09 -03:00
Renan Bernordi
badd23ba7c migrado do redis para sqlite, no futuro tera rotinas para limpar caches 2025-02-28 10:55:38 -03:00
Renan Bernordi
602fc277dd adicionado colar 2025-02-21 00:03:17 -03:00
Renan Bernordi
8f277a648e novas regras 2025-02-16 00:11:05 -03:00
Renan Bernordi
4079f568ba ajuste de regra 2025-02-13 18:59:34 -03:00
Renan Bernordi
30ad1d9113 issue 29 2025-02-09 15:07:06 -03:00
Renan Bernordi
72a5c6781f melhorias no docker compose 2025-02-08 02:07:58 -03:00
Renan Bernordi
d921dcd115 readme simplificado 2025-02-08 02:07:48 -03:00
Renan Bernordi
9a257efd46 suporte a dockerhub deploy 2025-02-08 01:46:09 -03:00
50 changed files with 1779 additions and 594 deletions

View file

@ -9,6 +9,7 @@ on:
env:
DOCKER_REGISTRY: ghcr.io
DOCKER_IMAGE_NAME: ${{ github.repository }}
DOCKERHUB_REPOSITORY: ${{ secrets.DOCKERHUB_USERNAME }}/marreta
jobs:
docker-build:
@ -38,19 +39,27 @@ jobs:
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_IMAGE_NAME }}
images: |
${{ env.DOCKER_REGISTRY }}/${{ env.DOCKER_IMAGE_NAME }}
${{ env.DOCKERHUB_REPOSITORY }}
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
- name: 🔐 Log in to Registry
- name: 🔐 Log in to GitHub Registry
uses: docker/login-action@v3
with:
registry: ${{ env.DOCKER_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: 🔐 Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: 🏗️ Build and Push
uses: docker/build-push-action@v5
with:

2
.gitignore vendored
View file

@ -3,6 +3,8 @@ composer.lock
.env
app/logs/*.log
app/cache/*.gz
app/cache/database/.sqlite
app/cache/*.json
TODO.md
node_modules

View file

@ -10,11 +10,11 @@ RUN apt-get update && apt-get install -y \
zip \
git \
htop \
cron \
libzip-dev \
libhiredis-dev \
&& docker-php-ext-install zip opcache \
&& pecl install redis \
&& docker-php-ext-enable redis opcache \
libsqlite3-dev \
&& docker-php-ext-install zip opcache pdo_sqlite \
&& docker-php-ext-enable opcache \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
# Stage 1: Build stage
@ -47,14 +47,21 @@ COPY default.conf /etc/nginx/sites-available/default
# Copy and configure initialization script permissions
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /app/bin/cleanup
RUN chmod +x /app/bin/proxy
# Create cache and logs folders
RUN mkdir -p /app/cache /app/logs
# Create cache, database, and logs folders
RUN mkdir -p /app/cache /app/cache/database /app/logs
# Configure base permissions for /app directory
RUN chown -R www-data:www-data /app \
&& chmod -R 755 /app
# Configure Cron
RUN touch /app/logs/cron.log
RUN echo '0 * * * * root php "/app/bin/cleanup" >> /app/logs/cleanup.log 2>&1' >> /etc/crontab
RUN echo '0 * * * * root php "/app/bin/proxy" >> /app/logs/proxy.log 2>&1' >> /etc/crontab
EXPOSE 80
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

View file

@ -1,7 +1,7 @@
# 🛠️ Marreta
[![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/manualdousuario/marreta/blob/master/README.en.md)
[![pt-br](https://img.shields.io/badge/lang-pt--br-green.svg)](https://github.com/manualdousuario/marreta/blob/master/README.md)
[![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/manualdousuario/marreta/blob/master/README.en.md)
[![Forks](https://img.shields.io/github/forks/manualdousuario/marreta)](https://github.com/manualdousuario/marreta/network/members)
[![Stars](https://img.shields.io/github/stars/manualdousuario/marreta)](https://github.com/manualdousuario/marreta/stargazers)
@ -9,32 +9,33 @@
Marreta is a tool that breaks access barriers and elements that hinder reading!
![Before and after Marreta](https://github.com/manualdousuario/marreta/blob/main/screen.en.png?raw=true)
![Before and after Marreta](https://github.com/manualdousuario/marreta/blob/main/screen.png?raw=true)
Public instance at [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
## ✨ What's cool about it?
## ✨ What's Cool?
- Cleans and corrects URLs automatically
- Automatically cleans and corrects URLs
- Removes annoying tracking parameters
- Forces HTTPS to keep everything secure
- Changes user agent to avoid blockages
- Leaves the HTML clean and optimized
- Changes user agent to avoid blocking
- Leaves HTML clean and optimized
- Fixes relative URLs on its own
- Allows you to put your own styles and scripts
- Allows you to add your own styles and scripts
- Removes unwanted elements
- Cache, cache!
- Caching, caching!
- Blocks domains you don't want
- Allows you to configure headers and cookies your way
- Allows configuring headers and cookies your way
- PHP-FPM and OPcache
- Proxy Support
## 🐳 Installing with Docker
Install Docker and Docker Compose
Install [Docker and Docker Compose](https://docs.docker.com/engine/install/)
`curl -o ./docker-compose.yml https://raw.githubusercontent.com/manualdousuario/marreta/main/docker-compose.yml`
Now modify it with your settings:
Now modify with your preferences:
`nano docker-compose.yml`
@ -49,180 +50,49 @@ services:
- SITE_NAME=
- SITE_DESCRIPTION=
- SITE_URL=
- LANGUAGE=
```
- `SITE_NAME`: Your Marreta's name
- `SITE_DESCRIPTION`: What it's for
- `SITE_URL`: Where it will run, complete address with `https://`. If you change the port in docker-compose (e.g. 8080:80), you must also include the port in SITE_URL (e.g. https://yoursite:8080)
- `DNS_SERVERS`: Which DNS servers to use `1.1.1.1, 8.8.8.8`
- `SELENIUM_HOST`: Selenium host server:PORT (e.g. selenium-hub:4444)
- `SITE_NAME`: Name of your Marreta
- `SITE_DESCRIPTION`: Explain what it's for
- `SITE_URL`: Where it will run, full address with `https://`. If you change the port in docker-compose (e.g., 8080:80), you must also include the port in SITE_URL (e.g., https://yoursite:8080)
- `SELENIUM_HOST`: Server:PORT of Selenium host (e.g., selenium-hub:4444)
- `LANGUAGE`: pt-br (Brazilian Portuguese), en (English), es (Spanish), de-de (German), ru-ru (Russian)
Now you can run `docker compose up -d`
Now just run `docker compose up -d`
### S3 Cache
### More configurations:
- Selenium: https://github.com/manualdousuario/marreta/wiki/%F0%9F%92%BB-Selenium-Hub-(Chrome-and-Firefox)
- S3 Cache: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Maintenance: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
Support for cache storage in S3. Configure the following variables in your `.env`:
```env
S3_CACHE_ENABLED=true
### 🛡️ DMCA
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=bucket_name
S3_REGION=us-east-1
S3_FOLDER_=cache/
S3_ACL=private
S3_ENDPOINT=
```
To block domains from DMCA requests, create the file `app/cache/dmca_domains.json`:
Possible configurations:
```
## R2
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=bucket_name
S3_ENDPOINT=https://{TOKEN}.r2.cloudflarestorage.com
S3_REGION=auto
S3_FOLDER_=cache/
S3_ACL=private
## DigitalOcean
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=bucket_name
S3_ENDPOINT=https://{REGION}.digitaloceanspaces.com
S3_REGION=auto
S3_FOLDER_=cache/
S3_ACL=private
```
### Selenium Integration
Integration with Selenium allows processing sites that require JavaScript or have some more advanced protection barriers. To use this feature, you need to set up a Selenium environment with Firefox. Add the following configuration to your `docker-compose.yml`:
```yaml
services:
selenium-firefox:
container_name: selenium-firefox
image: selenium/node-firefox:4.27.0-20241204
shm_size: 2gb
environment:
- SE_EVENT_BUS_HOST=selenium-hub
- SE_EVENT_BUS_PUBLISH_PORT=4442
- SE_EVENT_BUS_SUBSCRIBE_PORT=4443
- SE_ENABLE_TRACING=false
- SE_NODE_MAX_SESSIONS=10
- SE_NODE_OVERRIDE_MAX_SESSIONS=true
entrypoint: bash -c 'SE_OPTS="--host $$HOSTNAME" /opt/bin/entry_point.sh'
depends_on:
- selenium-hub
selenium-hub:
image: selenium/hub:4.27.0-20241204
container_name: selenium-hub
environment:
- SE_ENABLE_TRACING=false
- GRID_MAX_SESSION=10
- GRID_BROWSER_TIMEOUT=10
- GRID_TIMEOUT=10
ports:
- 4442:4442
- 4443:4443
- 4444:4444
```
Important settings:
- `shm_size`: Sets the shared memory size for Firefox (2GB recommended)
- `SE_NODE_MAX_SESSIONS`: Maximum number of concurrent sessions per node
- `GRID_MAX_SESSION`: Maximum number of concurrent sessions on the hub
- `GRID_BROWSER_TIMEOUT` and `GRID_TIMEOUT`: Timeouts in seconds
After configuring Selenium, make sure to set the `SELENIUM_HOST` variable in your environment to point to the Selenium hub (usually `selenium-hub:4444`).
## Development
1. First, clone the project:
```bash
git clone https://github.com/manualdousuario/marreta/
cd marreta/app
```
2. Install the project dependencies:
```bash
composer install
npm install
```
3. Create the configuration file:
```bash
cp .env.sample .env
```
4. Configure the environment variables in `.env`
5. Use the `default.conf` as a base for NGINX or point your webservice to `app/`
Gulp is used to compile Sass to CSS, minify JavaScript, use: `gulp`
### ⚙️ Customizing
The settings are organized in `data/`:
- `domain_rules.php`: Specific rules for each site
- `global_rules.php`: Rules that apply to all sites
- `blocked_domains.php`: List of blocked sites
### Translations
- `/languages/`: Each language is in its ISO id (`pt-br, en, es or de-de`) and can be defined in the environment `LANGUAGE`
## 🛠️ Maintenance
### Logging System
Logs are stored in `app/logs/*.log` with automatic rotation every 7 days.
Log settings available in `.env` or docker:
```env
LOG_LEVEL=WARNING
```
Available log levels:
- DEBUG: Detailed information for debugging
- INFO: General information about operations
- WARNING: Warnings that deserve attention (default)
- ERROR: Errors that do not interrupt operation
- CRITICAL: Critical errors that need immediate attention
View the application logs:
```bash
docker-compose logs app
# or directly from the log file
cat app/logs/*.log
```
### Clearing the cache
When you need to clear:
```bash
docker-compose exec app rm -rf /app/cache/*
```json
[
{
"host": "exemplo.com.br",
"message": "This content has been blocked on request"
}
]
```
## 🚀 Integrations
- 🤖 **Telegram**: [Official Bot](https://t.me/leissoai_bot)
- 🦊 **Firefox**: Extension by [Clarissa Mendes](https://claromes.com/pages/whoami) - [Download](https://addons.mozilla.org/pt-BR/firefox/addon/marreta/) | [Source Code](https://github.com/manualdousuario/marreta-extensao)
- 🦊 **Firefox**: Extension by [Clarissa Mendes](https://claromes.com/pages/whoami) - [Download](https://addons.mozilla.org/en-US/firefox/addon/marreta/) | [Source Code](https://github.com/manualdousuario/marreta-extensao)
- 🌀 **Chrome**: Extension by [Clarissa Mendes](https://claromes.com/pages/whoami) - [Download](https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe) | [Source Code](https://github.com/manualdousuario/marreta-extensao)
- 🦋 **Bluesky**: Bot by [Joselito](https://bsky.app/profile/joseli.to) - [Profile](https://bsky.app/profile/marreta.pcdomanual.com) | [Source Code](https://github.com/manualdousuario/marreta-bot)
- 🍎 **Apple**: Integration with [Shortcuts](https://www.icloud.com/shortcuts/3594074b69ee4707af52ed78922d624f)
---
Made with ❤️! If you have any questions or suggestions, open an issue and we'll help! 😉
Made with ❤️! If you have questions or suggestions, open an issue and we'll help! 😉
Thanks to the [https://github.com/burlesco/burlesco](Burlesco) and [https://github.com/nang-dev/hover-paywalls-browser-extension/](Hover) projects that served as the basis for several rules!
Special thanks to the projects [Burlesco](https://github.com/burlesco/burlesco) and [Hover](https://github.com/nang-dev/hover-paywalls-browser-extension/) which served as the basis for many rules!
## Star History

172
README.md
View file

@ -1,7 +1,7 @@
# 🛠️ Marreta
[![pt-br](https://img.shields.io/badge/lang-pt--br-green.svg)](https://github.com/manualdousuario/marreta/blob/master/README.md)
[![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/manualdousuario/marreta/blob/master/README.en.md)
[![pt-br](https://img.shields.io/badge/lang-pt--br-green.svg)](https://github.com/manualdousuario/marreta/blob/master/README.md)
[![Forks](https://img.shields.io/github/forks/manualdousuario/marreta)](https://github.com/manualdousuario/marreta/network/members)
[![Stars](https://img.shields.io/github/stars/manualdousuario/marreta)](https://github.com/manualdousuario/marreta/stargazers)
@ -25,16 +25,18 @@ Instancia publica em [marreta.pcdomanual.com](https://marreta.pcdomanual.com)!
- Remove elementos indesejados
- Cache, cache!
- Bloqueia domínios que você não quer
- Proteção DMCA com mensagens personalizadas
- Permite configurar headers e cookies do seu jeito
- PHP-FPM e OPcache
- Suporte a Proxy
## 🐳 Instalando em Docker
Instale Docker e Docker Compose
Instale [Docker e Docker Compose](https://docs.docker.com/engine/install/)
`curl -o ./docker-compose.yml https://raw.githubusercontent.com/manualdousuario/marreta/main/docker-compose.yml`
Agora modifique com suas configurações:
Agora modifique com suas preferencias:
`nano docker-compose.yml`
@ -49,165 +51,33 @@ services:
- SITE_NAME=
- SITE_DESCRIPTION=
- SITE_URL=
- LANGUAGE=
```
- `SITE_NAME`: Nome do seu Marreta
- `SITE_DESCRIPTION`: Conta pra que serve
- `SITE_URL`: Onde vai rodar, endereço completo com `https://`. Se você alterar a porta no docker-compose (ex: 8080:80), você também deve incluir a porta no SITE_URL (ex: https://seusite:8080)
- `DNS_SERVERS`: Quais servidores DNS usar `1.1.1.1, 8.8.8.8`
- `SELENIUM_HOST`: Servidor:PORTA do host do Selenium (ex: selenium-hub:4444)
-
Agora pode rodar `docker compose up -d`
- `LANGUAGE`: pt-br (Português Brasil), en (Inglês), es (Espanhol) ou de-de (Alemão), ru-ru (Russo)
### Cache S3
Agora só rodar `docker compose up -d`
Suporte de armazenamento do cache em S3. Configure as seguintes variáveis no seu `.env`:
### Mais configurações:
- Selenium: https://github.com/manualdousuario/marreta/wiki/%F0%9F%92%BB-Selenium-Hub-(Chrome-and-Firefox)
- Cache S3: https://github.com/manualdousuario/marreta/wiki/%F0%9F%97%83%EF%B8%8F-Cache-S3
- Manutenção: https://github.com/manualdousuario/marreta/wiki/%F0%9F%9B%A0%EF%B8%8F-Maintenance
```env
S3_CACHE_ENABLED=true
### 🛡️ DMCA
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=nome_do_bucket
S3_REGION=us-east-1
S3_FOLDER_=cache/
S3_ACL=private
S3_ENDPOINT=
```
Para bloquear dominios por pedidos de DMCA, crie o arquivo `app/cache/dmca_domains.json`:
Configurações possiveis:
```
## R2
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=nome_do_bucket
S3_ENDPOINT=https://{TOKEN}.r2.cloudflarestorage.com
S3_REGION=auto
S3_FOLDER_=cache/
S3_ACL=private
## DigitalOcean
S3_ACCESS_KEY=access_key
S3_SECRET_KEY=secret_key
S3_BUCKET=nome_do_bucket
S3_ENDPOINT=https://{REGIAO}.digitaloceanspaces.com
S3_REGION=auto
S3_FOLDER_=cache/
S3_ACL=private
```
### Integração com Selenium
Integração com Selenium permite processar sites que requerem javascript ou têm algumas barreiras de proteção mais avançadas. Para usar esta funcionalidade, você precisa configurar um ambiente Selenium com Firefox. Adicione a seguinte configuração ao seu `docker-compose.yml`:
```yaml
services:
selenium-firefox:
container_name: selenium-firefox
image: selenium/node-firefox:4.27.0-20241204
shm_size: 2gb
environment:
- SE_EVENT_BUS_HOST=selenium-hub
- SE_EVENT_BUS_PUBLISH_PORT=4442
- SE_EVENT_BUS_SUBSCRIBE_PORT=4443
- SE_ENABLE_TRACING=false
- SE_NODE_MAX_SESSIONS=10
- SE_NODE_OVERRIDE_MAX_SESSIONS=true
entrypoint: bash -c 'SE_OPTS="--host $$HOSTNAME" /opt/bin/entry_point.sh'
depends_on:
- selenium-hub
selenium-hub:
image: selenium/hub:4.27.0-20241204
container_name: selenium-hub
environment:
- SE_ENABLE_TRACING=false
- GRID_MAX_SESSION=10
- GRID_BROWSER_TIMEOUT=10
- GRID_TIMEOUT=10
ports:
- 4442:4442
- 4443:4443
- 4444:4444
```
Configurações importantes:
- `shm_size`: Define o tamanho da memória compartilhada para o Firefox (2GB recomendado)
- `SE_NODE_MAX_SESSIONS`: Número máximo de sessões simultâneas por nó
- `GRID_MAX_SESSION`: Número máximo de sessões simultâneas no hub
- `GRID_BROWSER_TIMEOUT` e `GRID_TIMEOUT`: Timeouts em segundos
Após configurar o Selenium, certifique-se de definir a variável `SELENIUM_HOST` no seu ambiente para apontar para o hub do Selenium (geralmente `selenium-hub:4444`).
## Desenvolvimento
1. Primeiro, clone o projeto:
```bash
git clone https://github.com/manualdousuario/marreta/
cd marreta/app
```
2. Instale as dependências do projeto:
```bash
composer install
npm install
```
3. Cria o arquivo de configuração:
```bash
cp .env.sample .env
```
4. Configure as variáveis de ambiente no `.env`
5. Utilize o `default.conf` como base do NGINX ou aponte seu webservice para `app/`
O Gulp é usado para compilar Sass para CSS, minificar JavaScript, utilize: `gulp`
### ⚙️ Personalizando
As configurações estão organizadas em `data/`:
- `domain_rules.php`: Regras específicas para cada site
- `global_rules.php`: Regras que se aplicam a todos os sites
- `blocked_domains.php`: Lista de sites bloqueados
### Traduções
- `/languages/`: Cada lingua está em seu ISO id (`pt-br, en, es ou de-de`) e pode ser definida no environment `LANGUAGE`
## 🛠️ Manutenção
### Sistema de Logs
Os logs são armazenados em `app/logs/*.log` com rotação automática a cada 7 dias.
Configurações de log disponíveis no `.env` ou docker:
```env
LOG_LEVEL=WARNING
```
Níveis de log disponíveis:
- DEBUG: Informações detalhadas para debug
- INFO: Informações gerais sobre operações
- WARNING: Avisos que merecem atenção (padrão)
- ERROR: Erros que não interrompem a operação
- CRITICAL: Erros críticos que precisam de atenção imediata
Ver os logs da aplicação:
```bash
docker-compose logs app
# ou diretamente do arquivo de log
cat app/logs/*.log
```
### Limpando o cache
Quando precisar limpar:
```bash
docker-compose exec app rm -rf /app/cache/*
```json
[
{
"host": "exemplo.com.br",
"message": "Este conteúdo foi bloqueado a pedido"
}
]
```
## 🚀 Integrações

View file

@ -42,3 +42,16 @@ SELENIUM_HOST=localhost:4444
# Debug Settings
DEBUG=false
# Cache Cleanup Settings
# Number of days to keep cache files (*.gz)
# If not set, no files will be cleaned
CLEANUP_DAYS=7
# Proxy List Configuration
# URL to download proxy list from (used by bin/proxy script)
# The proxy list should contain proxies in one of these formats:
# 1. http://USER:PASSWORD@HOST:PORT
# 2. IP:PORT:USER:PASSWORD
# Example: PROXY_LIST=https://example.com/proxy-list.txt
PROXY_LIST=

View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/assets/icons/sun.svg Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View file

@ -85,4 +85,36 @@ document.addEventListener('DOMContentLoaded', function () {
}
}
});
// Paste button functionality
const pasteButton = document.getElementById('paste');
const urlInput = document.getElementById('url');
if (pasteButton && urlInput) {
pasteButton.addEventListener('click', async (e) => {
e.preventDefault();
try {
const clipboardText = await navigator.clipboard.readText();
urlInput.value = clipboardText.trim();
} catch (err) {
console.error('Failed to read clipboard contents', err);
}
});
}
// Dark mode
const themeToggle = document.getElementById('themeToggle');
const html = document.documentElement;
const savedTheme = localStorage.getItem('theme') || 'light';
html.setAttribute('data-theme', savedTheme);
if (themeToggle) {
themeToggle.addEventListener('click', () => {
const currentTheme = html.getAttribute('data-theme');
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
html.setAttribute('data-theme', newTheme);
localStorage.setItem('theme', newTheme);
});
}
});

View file

@ -1,10 +1,10 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-500.eot');
src: url('/dist/fonts/inter-500.eot');
src: local('Inter Medium'), local('Inter-Medium'),
url('../dist/fonts/inter-500.woff2') format('woff2'),
url('../dist/fonts/inter-500.woff') format('woff'),
url('../dist/fonts/inter-500.ttf') format('truetype');
url('/dist/fonts/inter-500.woff2') format('woff2'),
url('/dist/fonts/inter-500.woff') format('woff'),
url('/dist/fonts/inter-500.ttf') format('truetype');
font-weight: 500;
font-style: normal;
font-display: swap;
@ -12,11 +12,11 @@
@font-face {
font-family: 'inter';
src: url('../dist/fonts/inter-600.eot');
src: url('/dist/fonts/inter-600.eot');
src: local('Inter SemiBold'), local('Inter-SemiBold'),
url('../dist/fonts/inter-600.woff2') format('woff2'),
url('../dist/fonts/inter-600.woff') format('woff'),
url('../dist/fonts/inter-600.ttf') format('truetype');
url('/dist/fonts/inter-600.woff2') format('woff2'),
url('/dist/fonts/inter-600.woff') format('woff'),
url('/dist/fonts/inter-600.ttf') format('truetype');
font-weight: 600;
font-style: normal;
font-display: swap;
@ -24,11 +24,11 @@
@font-face {
font-family: 'unna';
src: url('../dist/fonts/unna-400.eot');
src: url('/dist/fonts/unna-400.eot');
src: local('Unna Regular'), local('Unna-Regular'),
url('../dist/fonts/unna-400.woff2') format('woff2'),
url('../dist/fonts/unna-400.woff') format('woff'),
url('../dist/fonts/unna-400.ttf') format('truetype');
url('/dist/fonts/unna-400.woff2') format('woff2'),
url('/dist/fonts/unna-400.woff') format('woff'),
url('/dist/fonts/unna-400.ttf') format('truetype');
font-weight: 400;
font-style: normal;
font-display: swap;

View file

@ -39,3 +39,8 @@
@include mixin.icon('hamburguer', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('close', 'invert(100%) sepia(32%) saturate(8%) hue-rotate(23deg) brightness(102%) contrast(100%)');
@include mixin.icon('paste', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('sun', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');
@include mixin.icon('moon', 'invert(0%) sepia(21%) saturate(7425%) hue-rotate(12deg) brightness(96%) contrast(96%)');

View file

@ -17,7 +17,7 @@
@mixin icon($name, $filter) {
.icon--#{$name} {
background-image: url("../dist/icons/#{$name}.svg");
background-image: url("/dist/icons/#{$name}.svg");
filter: #{$filter};
}
}

View file

@ -27,17 +27,53 @@
--font-weight: 500;
--line-height: 160%;
/* Light theme colors */
@include mixin.create-color('marreta', #3B82F6);
@include mixin.create-color('text', #484848);
@include mixin.create-color('textmuted', #818181);
@include mixin.create-color('link', #3B82F6);
/* Theme-aware colors */
--background: #ffffff;
--surface: #F4F4F5;
--surface-hover: #e4e4e7;
--border: #e4e4e7;
--header-text: #000000;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #333333;
--nav-desktop-hover: #007bff;
--input-bg: #F4F4F5;
--toast-error: rgb(247, 102, 97);
--toast-warning: rgb(247, 152, 97);
--container_spacing: 24px;
@include mixin.devices(desktop) {
--container_spacing: 64px;
}
}
/* Dark theme */
[data-theme="dark"] {
@include mixin.create-color('marreta', #60A5FA);
@include mixin.create-color('text', #e5e5e5);
@include mixin.create-color('textmuted', #a1a1aa);
@include mixin.create-color('link', #60A5FA);
--background: #000;
--surface: #1f1f1f;
--surface-hover: #2a2a2a;
--border: #2a2a2a;
--header-text: #ffffff;
--nav-mobile-bg: var(--marreta);
--nav-mobile-text: #ffffff;
--nav-desktop-text: #e5e5e5;
--nav-desktop-hover: #60A5FA;
--input-bg: #1f1f1f;
--toast-error: rgb(220, 38, 127);
--toast-warning: rgb(245, 158, 11);
}
html {
scroll-behavior: smooth;
}

View file

@ -46,22 +46,22 @@ body {
}
&--error {
background-color: rgb(247, 102, 97);
background-color: var(--toast-error);
}
&--warning {
background-color: rgb(247, 152, 97);
background-color: var(--toast-warning);
}
}
header {
display: grid;
grid-template-columns: auto 1fr 1fr;
grid-template-columns: auto 1fr auto 1fr;
align-items: center;
padding: 0 0 42px 0;
@include mixin.devices(desktop) {
grid-template-columns: 1fr 2fr 1fr;
grid-template-columns: 1fr 2fr auto 1fr;
}
&.open {
@ -126,7 +126,77 @@ header {
h1 {
font-family: var(--font-family-unna);
color: #000;
color: var(--header-text);
}
}
.fast_buttons {
display: flex;
gap: 8px;
}
.theme-controls {
display: flex;
justify-content: center;
align-items: center;
padding: 0 16px;
@include mixin.devices(desktop) {
padding: 0;
}
.theme-toggle {
background: none;
border: 2px solid var(--border);
border-radius: 50%;
width: 40px;
height: 40px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
position: relative;
transition: all 0.3s ease;
&:hover {
border-color: var(--marreta);
background-color: var(--surface-hover);
}
.icon {
width: 18px;
height: 18px;
position: absolute;
transition: all 0.3s ease;
&--sun {
opacity: 1;
transform: rotate(0deg) scale(1);
}
&--moon {
opacity: 0;
transform: rotate(180deg) scale(0.8);
}
[data-theme="dark"] & {
filter: invert(1);
}
}
[data-theme="dark"] & {
.icon {
&--sun {
opacity: 0;
transform: rotate(-180deg) scale(0.8);
}
&--moon {
opacity: 1;
transform: rotate(0deg) scale(1);
}
}
}
}
}
@ -137,7 +207,7 @@ header {
left: 0;
right: 0;
bottom: 0;
background-color: var(--marreta);
background-color: var(--nav-mobile-bg);
padding: var(--container_spacing) var(--container_spacing) calc(4*var(--container_spacing)) var(--container_spacing);
z-index: 500;
align-items: flex-end;
@ -172,20 +242,20 @@ header {
font-size: 24px;
padding: 16px 0;
border-bottom: 1px solid rgba(255, 255, 255, 0.24);
color: #fff;
color: var(--nav-mobile-text);
text-decoration: none;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
font-size: initial;
padding: 0;
border-bottom: 0;
}
&:hover {
color: #fff;
color: var(--nav-mobile-text);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -204,14 +274,14 @@ header {
color: rgba(255,255,255,0.5);
padding: 0;
@include mixin.devices(desktop) {
color: #333;
color: var(--nav-desktop-text);
cursor: pointer;
}
&:hover {
color: rgba(255,255,255,0.5);
@include mixin.devices(desktop) {
color: #007bff;
color: var(--nav-desktop-hover);
}
}
}
@ -222,8 +292,8 @@ header {
top: 110%;
left: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -246,7 +316,7 @@ header {
font-weight: 600;
@include mixin.devices(desktop) {
background-color: #fff;
background-color: var(--background);
margin-top: 0;
margin-bottom: 4px;
padding: 8px 16px;
@ -358,8 +428,8 @@ header {
top: 110%;
right: 0;
border-radius: 16px;
background-color: #F4F4F5;
border: 4px solid #F4F4F5;
background-color: var(--surface);
border: 4px solid var(--surface);
z-index: 10;
box-shadow: 0px 4px 6px 0px rgba(0, 0, 0, 0.05);
box-shadow: 0px 10px 15px 0px rgba(0, 0, 0, 0.1);
@ -388,7 +458,7 @@ header {
font-weight: 600;
display: block;
padding: 8px 16px;
background-color: #fff;
background-color: var(--background);
display: flex;
align-items: center;
@ -408,7 +478,7 @@ header {
&.open {
.extension__toggle {
background-color: #F4F4F5;
background-color: var(--surface);
color: var(--textmuted);
}
@ -428,7 +498,7 @@ main {
font-size: 64px;
line-height: 61.44px;
text-align: center;
color: #000;
color: var(--header-text);
max-width: 512px;
margin: 0 auto;
}
@ -452,7 +522,7 @@ main {
.fields {
&::before {
content: '';
background-image: url(../assets/images/wall.png);
background-image: url(/assets/images/wall.png);
background-repeat: no-repeat;
background-size: 100% 100%;
width: 422px;
@ -461,6 +531,11 @@ main {
top: -110px;
right: -180px;
z-index: 1;
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
max-width: 470px;
@ -485,7 +560,7 @@ main {
}
input {
background-color: #F4F4F5;
background-color: var(--input-bg);
padding: 16px 0 16px 44px;
border: 0;
border-radius: 8px;
@ -493,6 +568,29 @@ main {
box-sizing: border-box;
position: relative;
line-height: 1.3em;
color: var(--text);
}
}
.paste {
background: var(--input-bg);
background: linear-gradient(90deg, transparent 0%, var(--input-bg) 30%, var(--input-bg) 100%);
align-items: center;
z-index: 3;
position: absolute;
top: 4px;
padding: 0 18px 0 22px;
right: 50px;
cursor: pointer;
height: 48px;
display: flex;
.icon {
transition: filter 0.3s ease;
[data-theme="dark"] & {
filter: invert(1);
}
}
}
@ -540,7 +638,7 @@ main {
.plus {
z-index: 3;
position: relative;
background-color: rgba(244, 244, 245, 1);
background-color: var(--surface);
margin-left: calc(-1*var(--container_spacing));
margin-right: calc(-1*var(--container_spacing));

211
app/bin/cleanup Normal file
View file

@ -0,0 +1,211 @@
#!/usr/bin/env php
<?php
/**
* Cache Cleanup Script
*
* Removes *.gz files from the cache directory that are older than the number
* of days specified in the CLEANUP_DAYS environment variable.
* If CLEANUP_DAYS is not set, no files will be cleaned.
*/
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Aws\S3\S3Client;
use Aws\Exception\AwsException;
$climate = new CLImate();
$climate->bold()->out('Cache Cleanup Tool');
$climate->br();
$cleanupDays = 0;
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
$cleanupDays = $_ENV['CLEANUP_DAYS'];
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../cache');
}
if ($cleanupDays == 0) {
$climate->yellow()->out('CLEANUP_DAYS variable not set or 0. No files will be cleaned.');
exit(0);
}
$cleanupDays = (int)$cleanupDays;
if ($cleanupDays <= 0) {
$climate->red()->out('CLEANUP_DAYS must be a positive integer. No files will be cleaned.');
exit(1);
};
// Calculate the cutoff timestamp
$cutoffTime = time() - ($cleanupDays * 86400);
// Check if S3 cache is enabled
$s3CacheEnabled = isset($_ENV['S3_CACHE_ENABLED']) && filter_var($_ENV['S3_CACHE_ENABLED'], FILTER_VALIDATE_BOOLEAN);
if ($s3CacheEnabled) {
// Clean S3 cache
cleanS3Cache($climate, $cutoffTime, $cleanupDays);
} else {
// Clean local disk cache
cleanDiskCache($climate, $cutoffTime, $cleanupDays);
}
/**
* Clean cache files from S3 bucket
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanS3Cache($climate, $cutoffTime, $cleanupDays) {
$requiredVars = ['S3_ACCESS_KEY', 'S3_SECRET_KEY', 'S3_BUCKET'];
foreach ($requiredVars as $var) {
if (!isset($_ENV[$var]) || empty($_ENV[$var])) {
$climate->red()->out("$var environment variable is required for S3 cache cleaning.");
exit(1);
}
}
$climate->out("S3 cache enabled. Cleaning S3 cache files older than {$cleanupDays} days...");
$clientConfig = [
'version' => 'latest',
'region' => $_ENV['S3_REGION'] ?? 'us-east-1',
'credentials' => [
'key' => $_ENV['S3_ACCESS_KEY'],
'secret' => $_ENV['S3_SECRET_KEY'],
]
];
if (!empty($_ENV['S3_ENDPOINT'])) {
$clientConfig['endpoint'] = $_ENV['S3_ENDPOINT'];
$clientConfig['use_path_style_endpoint'] = true;
}
try {
$s3Client = new S3Client($clientConfig);
$bucket = $_ENV['S3_BUCKET'];
$prefix = $_ENV['S3_FOLDER'] ?? 'cache/';
$climate->out("Listing objects in bucket: {$bucket} with prefix: {$prefix}");
$objects = [];
$marker = null;
do {
$params = [
'Bucket' => $bucket,
'Prefix' => $prefix,
'MaxKeys' => 1000
];
if ($marker) {
$params['Marker'] = $marker;
}
$result = $s3Client->listObjects($params);
if (isset($result['Contents'])) {
foreach ($result['Contents'] as $object) {
if (substr($object['Key'], -3) === '.gz') {
$objects[] = $object;
}
}
}
$marker = $result['NextMarker'] ?? ($result['IsTruncated'] ? end($result['Contents'])['Key'] : null);
} while ($marker);
$totalObjects = count($objects);
$climate->out("Found {$totalObjects} .gz objects in S3 bucket.");
if ($totalObjects === 0) {
$climate->out('No .gz objects found in S3 bucket.');
return;
}
$progress = $climate->progress()->total($totalObjects);
$deletedObjects = 0;
foreach ($objects as $index => $object) {
$progress->current($index + 1);
$lastModified = strtotime($object['LastModified']);
if ($lastModified < $cutoffTime) {
try {
$s3Client->deleteObject([
'Bucket' => $bucket,
'Key' => $object['Key']
]);
$deletedObjects++;
} catch (AwsException $e) {
$climate->red()->out("Failed to delete: " . $object['Key'] . " - " . $e->getMessage());
}
}
}
$climate->br();
$climate->green()->out("S3 cleanup complete: {$deletedObjects} objects deleted.");
} catch (AwsException $e) {
$climate->red()->out("AWS Error: " . $e->getMessage());
exit(1);
}
}
/**
* Clean cache files from local disk
*
* @param CLImate $climate CLImate instance for output
* @param int $cutoffTime Timestamp to use as cutoff for file age
* @param int $cleanupDays Number of days to keep files
*/
function cleanDiskCache($climate, $cutoffTime, $cleanupDays) {
$cacheDir = CACHE_DIR;
$climate->out("Cleaning cache files older than {$cleanupDays} days from: {$cacheDir}");
if (!is_dir($cacheDir)) {
$climate->red()->out("Cache directory not found: {$cacheDir}");
exit(1);
}
$gzFiles = glob($cacheDir . '/*.gz');
$totalFiles = count($gzFiles);
$deletedFiles = 0;
if ($totalFiles === 0) {
$climate->out('No .gz files found in cache directory.');
return;
}
$climate->out("Found {$totalFiles} .gz files in cache directory.");
$progress = $climate->progress()->total($totalFiles);
foreach ($gzFiles as $index => $file) {
$progress->current($index + 1);
$fileTime = filemtime($file);
if ($fileTime < $cutoffTime) {
if (unlink($file)) {
$deletedFiles++;
} else {
$climate->red()->out("Failed to delete: " . basename($file));
}
}
}
$climate->br();
$climate->green()->out("Disk cleanup complete: {$deletedFiles} files deleted.");
}

196
app/bin/proxy Normal file
View file

@ -0,0 +1,196 @@
#!/usr/bin/env php
<?php
/**
* Proxy List Cache Updater
*
* Downloads proxy list from the URL specified in the PROXY_LIST environment variable
* and stores it in the cache directory for reuse.
* This script should be run daily via cron to keep the proxy list updated.
*
* Supported proxy list formats:
* 1. http://USER:PASSWORD@HOST:PORT
* 2. IP:PORT:USER:PASSWORD
*/
require_once __DIR__ . '/../vendor/autoload.php';
use League\CLImate\CLImate;
use Dotenv\Dotenv;
use Curl\Curl;
$climate = new CLImate();
$climate->bold()->out('Proxy List Cache Updater');
$climate->br();
try {
$dotenv = Dotenv::createImmutable(__DIR__ . '/..');
$dotenv->load();
$climate->out('Environment variables loaded');
} catch (\Exception $e) {
$climate->yellow()->out('Warning: ' . $e->getMessage());
exit(0);
}
if (!defined('CACHE_DIR')) {
define('CACHE_DIR', __DIR__ . '/../cache');
}
if (!isset($_ENV['PROXY_LIST']) || empty($_ENV['PROXY_LIST'])) {
$climate->yellow()->out('PROXY_LIST environment variable not set. No proxies to cache.');
exit(0);
}
$proxyListUrl = $_ENV['PROXY_LIST'];
$proxyCachePath = CACHE_DIR . '/proxy_list.json';
// Download proxy list from URL
$climate->out('Downloading proxy list from: ' . $proxyListUrl);
$proxyList = downloadProxyList($proxyListUrl, $climate);
if ($proxyList === false) {
$climate->red()->out('Failed to download proxy list from URL: ' . $proxyListUrl);
exit(1);
}
$climate->green()->out('Proxy list downloaded successfully (' . strlen($proxyList) . ' bytes)');
if (!is_dir(CACHE_DIR)) {
if (!mkdir(CACHE_DIR, 0755, true)) {
$climate->red()->out('Failed to create cache directory: ' . CACHE_DIR);
exit(1);
}
}
$climate->out('Parsing proxy list from environment variable...');
$proxies = parseProxyList($proxyList);
if (empty($proxies)) {
$climate->red()->out('No valid proxies found in PROXY_LIST. Supported formats are:');
$climate->red()->out('1. http://USER:PASSWORD@HOST:PORT');
$climate->red()->out('2. IP:PORT:USER:PASSWORD');
exit(1);
}
$climate->out('Found ' . count($proxies) . ' valid proxies.');
if (file_put_contents($proxyCachePath, json_encode($proxies))) {
$climate->green()->out('Proxy list successfully cached to: ' . $proxyCachePath);
} else {
$climate->red()->out('Failed to write proxy list to cache file: ' . $proxyCachePath);
exit(1);
}
/**
* Parse proxy list from environment variable
*
* @param string $proxyListString Proxy list in format http://USER:PASSWORD@HOST:PORT or IP:PORT:USER:PASSWORD
* @return array Array of valid proxy URLs
*/
function parseProxyList($proxyListString) {
$proxies = [];
$lines = preg_split('/[\r\n,]+/', $proxyListString);
foreach ($lines as $line) {
$line = trim($line);
if (empty($line)) continue;
// Format 1: http://USER:PASSWORD@HOST:PORT
if (preg_match('/^https?:\/\/[^:]+:[^@]+@[^:]+:\d+$/i', $line)) {
$proxies[] = $line;
continue;
}
// Format 2: IP:PORT:USER:PASSWORD
if (preg_match('/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+):([^:]+):(.+)$/', $line, $matches)) {
$ip = $matches[1];
$port = $matches[2];
$user = $matches[3];
$password = $matches[4];
// Convert to standard format
$proxies[] = "http://{$user}:{$password}@{$ip}:{$port}";
}
}
return $proxies;
}
/**
* Download proxy list from URL using php-curl-class
*
* @param string $url URL to download proxy list from
* @param CLImate $climate CLImate instance for output
* @return string|false Downloaded content or false on failure
*/
function downloadProxyList($url, $climate = null) {
$curl = new Curl();
// Configure cURL options
$curl->setTimeout(30);
$curl->setConnectTimeout(10);
$curl->setUserAgent('Marreta Proxy Updater/1.0');
$curl->setHeader('Accept', 'text/plain, text/html, */*');
$curl->setHeader('Accept-Encoding', 'gzip, deflate');
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 3);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_SSL_VERIFYHOST, false);
try {
if ($climate) {
$climate->out('Making HTTP request with php-curl-class...');
}
$curl->get($url);
if ($curl->error) {
$errorMsg = 'cURL request failed: ' . $curl->errorMessage . ' (Code: ' . $curl->errorCode . ')';
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
}
$statusCode = $curl->httpStatusCode;
if ($climate) {
$climate->out('HTTP Status Code: ' . $statusCode);
}
if ($statusCode === 200) {
$content = $curl->response;
if ($climate) {
$contentType = $curl->responseHeaders['Content-Type'] ?? 'unknown';
$climate->out('Content-Type: ' . $contentType);
$climate->out('Content-Length: ' . strlen($content) . ' bytes');
}
return $content;
}
if ($climate) {
$climate->yellow()->out('Unexpected HTTP status code: ' . $statusCode);
}
return false;
} catch (\Exception $e) {
$errorMsg = 'Unexpected error during download: ' . $e->getMessage();
if ($climate) {
$climate->red()->out($errorMsg);
} else {
error_log($errorMsg);
}
return false;
} finally {
$curl->close();
}
}

0
app/cache/database/.gitkeep vendored Normal file
View file

View file

@ -5,7 +5,8 @@
"php-curl-class/php-curl-class": "^11.0",
"php-webdriver/webdriver": "^1.15",
"monolog/monolog": "^3.8.1",
"nikic/fast-route": "^1.3"
"nikic/fast-route": "^1.3",
"league/climate": "^3.8"
},
"autoload": {
"psr-4": {

View file

@ -21,15 +21,11 @@ try {
'SITE_URL'
])->notEmpty();
// Validate URL format
if (!filter_var($_ENV['SITE_URL'], FILTER_VALIDATE_URL)) {
throw new Exception('SITE_URL must be a valid URL');
}
// Core system settings
define('SITE_NAME', $_ENV['SITE_NAME']);
define('SITE_DESCRIPTION', $_ENV['SITE_DESCRIPTION']);
define('SITE_URL', $_ENV['SITE_URL']);
define('CLEANUP_DAYS', $_ENV['CLEANUP_DAYS'] ?? 0);
// Optional settings with defaults
define('DNS_SERVERS', $_ENV['DNS_SERVERS'] ?? '1.1.1.1, 8.8.8.8');
@ -38,11 +34,6 @@ try {
define('CACHE_DIR', __DIR__ . '/cache');
define('LANGUAGE', $_ENV['LANGUAGE'] ?? 'pt-br');
// Redis connection settings
define('REDIS_HOST', $_ENV['REDIS_HOST'] ?? 'localhost');
define('REDIS_PORT', $_ENV['REDIS_PORT'] ?? 6379);
define('REDIS_PREFIX', $_ENV['REDIS_PREFIX'] ?? 'marreta:');
// Logging configuration
define('LOG_LEVEL', $_ENV['LOG_LEVEL'] ?? 'WARNING'); // DEBUG, INFO, WARNING, ERROR, CRITICAL
define('LOG_DAYS_TO_KEEP', 7);
@ -71,6 +62,16 @@ try {
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');
// Load DMCA domains from JSON file
$dmcaDomainsFile = __DIR__ . '/cache/dmca_domains.json';
if (file_exists($dmcaDomainsFile)) {
$dmcaDomainsJson = file_get_contents($dmcaDomainsFile);
$dmcaDomains = json_decode($dmcaDomainsJson, true);
define('DMCA_DOMAINS', is_array($dmcaDomains) ? $dmcaDomains : []);
} else {
define('DMCA_DOMAINS', []);
}
} catch (Dotenv\Exception\ValidationException $e) {
die('Environment Error: ' . $e->getMessage());
} catch (Exception $e) {

View file

@ -6,7 +6,12 @@
* Defines domains that cannot be accessed by the system
* due to usage policies or technical restrictions
*/
$host = parse_url(defined('SITE_URL') ? SITE_URL : '', PHP_URL_HOST);
return [
$host,
'localhost',
'127.0.0.1',
// News sites
//-- Content behind login access/hard paywall
'wsj.com',
@ -27,6 +32,7 @@ return [
'mittelbayerische.de',
'josimarfootball.com',
'nordsee-zeitung.de',
'zorgvisie.nl',
// List of common blocked sites to avoid unnecessary requests
//-- Technical access blocking
'bloomberg.com',
@ -120,7 +126,6 @@ return [
'jusbrasil.com.br',
'glassdoor.com.br',
'gov.br',
'medium.com',
'stackoverflow.com',
'hoteis.com',
'amazon.com',

View file

@ -13,6 +13,7 @@
* - classAttrRemove: Array of classes to be removed from elements
* - customCode: String containing custom JavaScript code
* - customStyle: String containing custom CSS code
* - proxy: Enable proxy in Guzzle or Selenium requests
* - excludeGlobalRules: Associative array of global rules to exclude for this domain
* Example:
* 'excludeGlobalRules' => [
@ -27,18 +28,34 @@
* - fromGoogleBot: Adds simulation of request coming from Google Bot
* - removeElementsByTag: Remove specific elements via DOM
* - removeCustomAttr: Remove custom attributes from elements
* - urlMods: Modify the URL before fetching content.
* Example:
* 'urlMods' => [
* 'query' => [
* [
* 'key' => 'amp',
* 'value' => '1'
* ]
* ]
* ]
*/
return [
'nsctotal.com.br' => [
'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
],
'elcorreo.com' => [
'idElementRemove' => ['didomi-popup','engagement-top'],
'idElementRemove' => ['didomi-popup', 'engagement-top'],
'classElementRemove' => ['content-exclusive-bg'],
'classAttrRemove' => ['didomi-popup-open','paywall'],
'classAttrRemove' => ['didomi-popup-open', 'paywall'],
'fromGoogleBot' => true,
'removeElementsByTag' => ['style'],
'removeCustomAttr' => ['hidden','data-*']
'removeCustomAttr' => ['hidden', 'data-*']
],
'wired.com' => [
'scriptTagRemove' => ['.js'],
],
'newyorker.com' => [
'scriptTagRemove' => ['.js'],
],
'globo.com' => [
'idElementRemove' => ['cookie-banner-lgpd', 'paywall-cpt', 'mc-read-more-wrapper', 'paywall-cookie-content', 'paywall-cpt'],
@ -54,16 +71,19 @@ return [
'gauchazh.clicrbs.com.br' => [
'idElementRemove' => ['paywallTemplate'],
'classAttrRemove' => ['m-paid-content', 'paid-content-apply'],
'scriptTagRemove' => ['vendors-8'],
'scriptTagRemove' => ['vendors-9','vendors-10','vendors-11'],
'excludeGlobalRules' => [
'classElementRemove' => ['paid-content']
],
'fetchStrategies' => 'fetchFromSelenium',
'proxy' => true,
],
'reuters.com' => [
'classElementRemove' => ['leaderboard__container'],
'fetchStrategies' => 'fetchFromSelenium',
],
'cnn.com' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'lepoint.fr' => [
'classElementRemove' => ['paywall'],
],
@ -77,7 +97,7 @@ return [
'removeElementsByTag' => ['a-gift']
],
'fortune.com' => [
'classElementRemove' => ['latest-popular-module','own','drawer-menu'],
'classElementRemove' => ['latest-popular-module', 'own', 'drawer-menu'],
'fetchStrategies' => 'fetchFromSelenium',
'browser' => 'chrome',
'scriptTagRemove' => ['queryly.com'],
@ -86,10 +106,6 @@ return [
'idElementRemove' => ['cboxOverlay'],
'fetchStrategies' => 'fetchFromSelenium',
],
'washingtonpost.com' => [
'classElementRemove' => ['paywall-overlay'],
'fetchStrategies' => 'fetchFromSelenium',
],
'oantagonista.com.br' => [
'fetchStrategies' => 'fetchFromSelenium',
],
@ -142,6 +158,21 @@ return [
'paywall_access' => 'true'
]
],
'ftm.nl' => [
'fetchStrategies' => 'fetchFromSelenium',
'removeCustomAttr' => ['dialog', 'iframe'],
'classElementRemove' => ['modal'],
'scriptTagRemove' => ['footer.min', 'diffuser.js', 'insight.ftm.nl'],
'classAttrRemove' => ['hasBlockingOverlay', 'localstorage']
],
'denikn.cz' => [
'idElementRemove' => ['e_lock__hard']
],
'dtest.cz' => [
'fetchStrategies' => 'fetchFromSelenium',
'classAttrRemove' => ['is-hidden-compare'],
'classElementRemove' => ['cc-window']
],
'uol.com.br' => [
'scriptTagRemove' => ['me.jsuol.com.br', 'c.jsuol.com.br'],
'classElementRemove' => ['header-top-wrapper'],
@ -155,6 +186,12 @@ return [
'nzherald.co.nz' => [
'fetchStrategies' => 'fetchFromSelenium',
],
'onetz.de' => [
'idElementRemove' => ['checkout-container'],
'classElementRemove' => ['tp-backdrop','dm-nobg'],
'classAttrRemove' => ['field-dnt-body-pp'],
'scriptTagRemove' => ['.js'],
],
'opovo.com.br' => [
'classElementRemove' => ['screen-loading', 'overlay-advise']
],
@ -164,7 +201,8 @@ return [
]
],
'theverge.com' => [
'fetchStrategies' => 'fetchFromSelenium',
'scriptTagRemove' => 'zephr',
'classElementRemove' => 'zephr'
],
'economist.com' => [
'cookies' => [
@ -183,15 +221,8 @@ return [
}
'
],
'ft.com' => [
'cookies' => [
'next-flags' => null,
'next:ads' => null
],
'fromGoogleBot' => true
],
'nytimes.com' => [
'idElementRemove' => ['gateway-content','site-index'],
'idElementRemove' => ['gateway-content', 'site-index', 'complianceOverlay'],
'customCode' => '
setTimeout(function() {
const walk = document.createTreeWalker(
@ -240,7 +271,7 @@ return [
position: relative !important;
}
',
'fetchStrategies' => 'fetchFromSelenium',
'fetchStrategies' => 'fetchFromWaybackMachine',
'excludeGlobalRules' => [
'scriptTagRemove' => [
'gtm.js',
@ -341,8 +372,358 @@ return [
'_pctx' => null
]
],
// Domain test
'thestar.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'niagarafallsreview.ca' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'thepeterboroughexaminer.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'therecord.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'thespec.com' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'wellandtribune.ca' => [
'classElementRemove' => ['subscriber-offers', 'subscriber-only', 'subscription-required', 'redacted-overlay', 'subscriber-hide', 'tnt-ads-container'],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll(\'div.subscriber-offers\');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll(\'div.subscriber-only\');
for (const elem of subscriber_only) {
if (elem.classList.contains(\'encrypted-content\') && typeof DOMPurify !== \'undefined\' && typeof unscramble !== \'undefined\') {
const parser = new DOMParser();
const doc = parser.parseFromString(\'<div>\' + DOMPurify.sanitize(unscramble(elem.innerText)) + \'</div>\', \'text/html\');
const content_new = doc.querySelector(\'div\');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute(\'style\');
elem.removeAttribute(\'class\');
}
const banners = document.querySelectorAll(\'div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container\');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll(\'div.tnt-ads-container, div[class*="adLabelWrapper"]\');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll(\'div[id^="tncms-region-article"]\');
recommendations.forEach(el => { el.remove(); });
});
'
],
'time.com' => [
'headers' => [
'User-Agent' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Cookie' => 'nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1',
'Referer' => 'https://www.google.com/'
],
'customCode' => '
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]\');
banners.forEach(el => { el.remove(); });
});
'
],
'architecturaldigest.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'bonappetit.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'cntraveler.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'epicurious.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'gq.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'vanityfair.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'vogue.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'.paywall-bar, div[class^="MessageBannerWrapper-"\');
banners.forEach(el => { el.remove(); });
});
'
],
'americanbanker.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const inlineGate = document.querySelector(\'.inline-gate\');
if (inlineGate) {
inlineGate.classList.remove(\'inline-gate\');
const inlineGated = document.querySelectorAll(\'.inline-gated\');
for (const elem of inlineGated) { elem.classList.remove(\'inline-gated\'); }
}
});
'
],
'washingtonpost.com' => [
'classElementRemove' => ['paywall-overlay'],
'fetchStrategies' => 'fetchFromSelenium',
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
let paywall = document.querySelectorAll(\'div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]\');
paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll(\'img\');
images.forEach(image => { image.parentElement.style.filter = \'\'; });
const headimage = document.querySelectorAll(\'div .aspect-custom\');
headimage.forEach(image => { image.style.filter = \'\'; });
});
',
'idElementRemove' => ['wall-bottom-drawer-container']
],
'usatoday.com' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll(\'div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]\');
banners.forEach(el => { el.remove(); });
});
'
],
'stcatharinesstandard.ca' => [
'proxy' => true,
'idElementRemove' => 'access-offers-modal',
'classElementRemove' => 'modal-backdrop',
'classAttrRemove' => ' modal-open'
],
'medium.com' => [
'headers' => [
'Referer' => 'https://t.co/x?amp=1',
'X-Forwarded-For' => 'none',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Content-Security-Policy' => 'script-src \'self\';'
]
],
'tagesspiegel.de' => [
'headers' => [
'Content-Security-Policy' => 'script-src \'self\';',
'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
],
'urlMods' => [
'query' => [
[
'key' => 'amp',
'value' => '1'
]
]
]
],
'nzz.ch' => [
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelector(\'.dynamic-regwall\');
if (paywall) {
paywall.remove();
}
});
'
],
'demorgen.be' => [
'headers' => [
'Cookie' => 'isBot=true; authId=1',
'User-Agent' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; Googlebot-News; +http://www.google.com/bot.html) Chrome/121.0.6140.0 Safari/537.36',
'X-Forwarded-For' => 'none',
'Referer' => 'https://news.google.com'
],
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
// remove paywall items
let paywall = document.querySelectorAll(\'script[src*="advertising-cdn.dpgmedia.cloud"], div[data-temptation-position="ARTICLE_BOTTOM"]\');
paywall.forEach(el => { el.remove(); });
// remove empty advert
const advert = document.querySelector(\'div[data-advert-placeholder-collapses]\');
if (advert) {
advert.remove();
}
});
'
],
'ft.com' => [
'cookies' => [
'next-flags' => null,
'next:ads' => null
],
'fromGoogleBot' => true,
'headers' => [
'Referer' => 'https://t.co/x?amp=1'
],
'customCode' => '
document.addEventListener("DOMContentLoaded", () => {
const styleTags = document.querySelectorAll(\'link[rel="stylesheet"]\');
styleTags.forEach(el => {
const href = el.getAttribute(\'href\');
if (href && href.substring(0, 1) === \'/\') {
const updatedHref = href.substring(1).replace(/(https?:\\/\\/.+?)\\/{2,}/, \'$1/\');
el.setAttribute(\'href\', updatedHref);
}
});
setTimeout(() => {
const cookie = document.querySelectorAll(\'.o-cookie-message, .js-article-ribbon, .o-ads, .o-banner, .o-message, .article__content-sign-up\');
cookie.forEach(el => { el.remove(); });
}, 1000);
})
'
],
// Test domain
'altendorfme.github.io' => [
'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'headers' => [
@ -350,6 +731,7 @@ return [
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache'
],
'proxy' => true,
'idElementRemove' => ['test-id-1', 'paywall'],
'classElementRemove' => ['test-class-1'],
'scriptTagRemove' => ['analytics.js', 'test-script.js', 'paywall.js'],
@ -358,7 +740,7 @@ return [
'consent' => 'accepted',
'session_id' => null
],
'classAttrRemove' => ['test-attr-1','paywall'],
'classAttrRemove' => ['test-attr-1', 'paywall'],
'customCode' => '
console.log("worked");
',

View file

@ -7,6 +7,7 @@
* using the 'excludeGlobalRules' configuration in domain_rules.php
*/
return [
'proxy' => false,
// Classes to be removed from all pages:
'classElementRemove' => [
'subscription',
@ -68,6 +69,8 @@ return [
'getblue.io',
'smartocto.com',
'cdn.pn.vg',
'static.vocstatic.com'
'static.vocstatic.com',
'recaptcha',
'intercom'
]
];

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3
app/dist/icons/moon.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-moon-fill" viewBox="0 0 16 16">
<path d="M6 .278a.77.77 0 0 1 .08.858 7.2 7.2 0 0 0-.878 3.46c0 4.021 3.278 7.277 7.318 7.277q.792-.001 1.533-.16a.79.79 0 0 1 .81.316.73.73 0 0 1-.031.893A8.35 8.35 0 0 1 8.344 16C3.734 16 0 12.286 0 7.71 0 4.266 2.114 1.312 5.124.06A.75.75 0 0 1 6 .278"/>
</svg>

After

Width:  |  Height:  |  Size: 394 B

3
app/dist/icons/paste.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M3.33333 2.66667H3.45333C3.72667 3.44 4.46 4 5.33333 4H6.66667C7.53333 4 8.26667 3.44 8.54667 2.66667H8.66667C9.58 2.66667 10.3733 3.28 10.6067 4.16667C10.7 4.52667 11.06 4.74 11.42 4.64667C11.7733 4.55333 11.9933 4.19333 11.9 3.83333C11.52 2.36 10.1933 1.33333 8.67333 1.33333H8.56C8.28667 0.56 7.54667 0 6.67333 0H5.34C4.47333 0 3.72667 0.56 3.45333 1.33333H3.34C1.49333 1.33333 0 2.82667 0 4.66667V12.6667C0 14.5067 1.49333 16 3.33333 16H5.33333C5.7 16 6 15.7 6 15.3333C6 14.9667 5.7 14.6667 5.33333 14.6667H3.33333C2.23333 14.6667 1.33333 13.7667 1.33333 12.6667V4.66667C1.33333 3.56667 2.23333 2.66667 3.33333 2.66667ZM5.33333 1.33333H6.66667C7.03333 1.33333 7.33333 1.63333 7.33333 2C7.33333 2.36667 7.03333 2.66667 6.66667 2.66667H5.33333C4.96667 2.66667 4.66667 2.36667 4.66667 2C4.66667 1.63333 4.96667 1.33333 5.33333 1.33333ZM13.3333 6H10C8.52667 6 7.33333 7.19333 7.33333 8.66667V13.3333C7.33333 14.8067 8.52667 16 10 16H13.3333C14.8067 16 16 14.8067 16 13.3333V8.66667C16 7.19333 14.8067 6 13.3333 6ZM14.6667 13.3333C14.6667 14.0667 14.0667 14.6667 13.3333 14.6667H10C9.26667 14.6667 8.66667 14.0667 8.66667 13.3333V8.66667C8.66667 7.93333 9.26667 7.33333 10 7.33333H13.3333C14.0667 7.33333 14.6667 7.93333 14.6667 8.66667V13.3333ZM13.3333 9.33333C13.3333 9.7 13.0333 10 12.6667 10H10.6667C10.3 10 10 9.7 10 9.33333C10 8.96667 10.3 8.66667 10.6667 8.66667H12.6667C13.0333 8.66667 13.3333 8.96667 13.3333 9.33333ZM13.3333 12C13.3333 12.3667 13.0333 12.6667 12.6667 12.6667H10.6667C10.3 12.6667 10 12.3667 10 12C10 11.6333 10.3 11.3333 10.6667 11.3333H12.6667C13.0333 11.3333 13.3333 11.6333 13.3333 12Z" fill="black"/>
</svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

3
app/dist/icons/sun.svg vendored Normal file
View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-brightness-high-fill" viewBox="0 0 16 16">
<path d="M12 8a4 4 0 1 1-8 0 4 4 0 0 1 8 0M8 0a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 0m0 13a.5.5 0 0 1 .5.5v2a.5.5 0 0 1-1 0v-2A.5.5 0 0 1 8 13m8-5a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2a.5.5 0 0 1 .5.5M3 8a.5.5 0 0 1-.5.5h-2a.5.5 0 0 1 0-1h2A.5.5 0 0 1 3 8m10.657-5.657a.5.5 0 0 1 0 .707l-1.414 1.415a.5.5 0 1 1-.707-.708l1.414-1.414a.5.5 0 0 1 .707 0m-9.193 9.193a.5.5 0 0 1 0 .707L3.05 13.657a.5.5 0 0 1-.707-.707l1.414-1.414a.5.5 0 0 1 .707 0m9.193 2.121a.5.5 0 0 1-.707 0l-1.414-1.414a.5.5 0 0 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .707M4.464 4.465a.5.5 0 0 1-.707 0L2.343 3.05a.5.5 0 1 1 .707-.707l1.414 1.414a.5.5 0 0 1 0 .708"/>
</svg>

After

Width:  |  Height:  |  Size: 791 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -1,2 +1,2 @@
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let s=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),s(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),s(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))})});
"serviceWorker"in navigator&&window.addEventListener("load",()=>{navigator.serviceWorker.register("/service-worker.js").then(()=>{}).catch(()=>{})}),document.addEventListener("DOMContentLoaded",function(){let t=document.querySelector(".integration");var e=document.querySelector(".integration__toggle");let o=document.querySelector(".extension");var n=document.querySelector(".extension__toggle");let a=e=>{e!==t&&t.classList.remove("open"),e!==o&&o.classList.remove("open")};e.addEventListener("click",e=>{e.stopPropagation(),a(t),t.classList.toggle("open")}),n.addEventListener("click",e=>{e.stopPropagation(),a(o),o.classList.toggle("open")}),t.addEventListener("click",e=>{e.stopPropagation()}),o.addEventListener("click",e=>{e.stopPropagation()}),document.addEventListener("click",()=>{t.classList.remove("open"),o.classList.remove("open")}),document.addEventListener("click",e=>{e=e.target.closest(".toasty");e&&e.remove()}),document.addEventListener("click",e=>{e.target.closest(".open-nav")&&((e=document.querySelector("header")).classList.contains("open")?e.classList.remove("open"):e.classList.add("open"))});e=document.getElementById("paste");let r=document.getElementById("url");e&&r&&e.addEventListener("click",async e=>{e.preventDefault();try{var t=await navigator.clipboard.readText();r.value=t.trim()}catch(e){console.error("Failed to read clipboard contents",e)}});n=document.getElementById("themeToggle");let c=document.documentElement;e=localStorage.getItem("theme")||"light";c.setAttribute("data-theme",e),n&&n.addEventListener("click",()=>{var e="dark"===c.getAttribute("data-theme")?"light":"dark";c.setAttribute("data-theme",e),localStorage.setItem("theme",e)})});
//# sourceMappingURL=scripts.js.map

File diff suppressed because one or more lines are too long

View file

@ -5,7 +5,7 @@ namespace Inc;
use Inc\Cache\CacheStorageInterface;
use Inc\Cache\DiskStorage;
use Inc\Cache\S3Storage;
use Inc\Cache\RedisStorage;
use Inc\Cache\SQLiteStorage;
/**
* System cache management with multiple storage backends (disk/S3)
@ -17,17 +17,17 @@ class Cache
/** @var CacheStorageInterface Cache storage implementation */
private $storage;
/** @var RedisStorage Redis instance for file counting */
private $redisStorage;
/** @var SQLiteStorage SQLite instance for file counting */
private $sqliteStorage;
/**
* Initializes storage based on configuration
* Uses S3Storage if configured and enabled
* Defaults to DiskStorage otherwise
* Defaults to SQLiteStorage otherwise (which delegates to DiskStorage)
*/
public function __construct()
{
$this->redisStorage = new RedisStorage(CACHE_DIR);
$this->sqliteStorage = new SQLiteStorage(CACHE_DIR);
if (defined('S3_CACHE_ENABLED') && S3_CACHE_ENABLED === true) {
$this->storage = new S3Storage([
@ -40,14 +40,14 @@ class Cache
'endpoint' => defined('S3_ENDPOINT') ? S3_ENDPOINT : null
]);
} else {
$this->storage = new DiskStorage(CACHE_DIR);
$this->storage = $this->sqliteStorage;
}
}
/** Gets total number of cached files */
public function getCacheFileCount(): int
{
return $this->redisStorage->countCacheFiles();
return $this->sqliteStorage->countCacheFiles();
}
/**

View file

@ -1,128 +0,0 @@
<?php
namespace Inc\Cache;
use Redis;
/**
* Redis-based cache storage implementation
* Provides cache storage and file counting functionality using Redis
*/
class RedisStorage implements CacheStorageInterface
{
/**
* @var \Redis|null Redis client instance
*/
private $redis;
/**
* @var string Cache directory for file counting
*/
private $cacheDir;
/**
* Class constructor
* @param string $cacheDir Base directory for cache storage
*/
public function __construct(string $cacheDir)
{
$this->cacheDir = $cacheDir;
// Try to initialize Redis connection
try {
$this->redis = new \Redis();
$this->redis->connect(REDIS_HOST, REDIS_PORT, 2.5);
$this->redis->setOption(\Redis::OPT_PREFIX, REDIS_PREFIX);
} catch (\Exception $e) {
$this->redis = null;
}
}
/**
* Counts the number of files in the cache directory
* @return int Number of files in the cache directory
*/
public function countCacheFiles(): int
{
$cacheCountKey = 'cache_file_count';
if ($this->redis !== null) {
$cachedCount = $this->redis->get($cacheCountKey);
if ($cachedCount !== false) {
return (int)$cachedCount;
}
}
$fileCount = 0;
$iterator = new \FilesystemIterator($this->cacheDir);
foreach ($iterator as $file) {
if ($file->isFile() && $file->getExtension() === 'gz') {
$fileCount++;
}
}
if ($this->redis !== null) {
$this->redis->set($cacheCountKey, $fileCount);
}
return $fileCount;
}
/**
* Updates the file count in Redis
* @param int $count Number of files
*/
public function updateCacheFileCount(int $count): void
{
if ($this->redis !== null) {
$this->redis->set('cache_file_count', $count);
}
}
/**
* Checks if cache exists for a given ID
* @param string $id Cache ID
* @return bool True if cache exists, false otherwise
*/
public function exists(string $id): bool
{
return $this->redis !== null ? $this->redis->exists($id) : false;
}
/**
* Retrieves cached content
* @param string $id Cache ID
* @return string|null Cached content or null if not found
*/
public function get(string $id): ?string
{
if ($this->redis === null) {
return null;
}
$content = $this->redis->get($id);
return $content === false ? null : $content;
}
/**
* Stores content in cache
* @param string $id Cache ID
* @param string $content Content to be stored
* @return bool True if successful, false otherwise
*/
public function set(string $id, string $content): bool
{
if ($this->redis === null) {
return false;
}
$result = $this->redis->set($id, $content);
if ($result) {
$currentCount = $this->redis->get('cache_file_count') ?: 0;
$this->redis->set('cache_file_count', $currentCount + 1);
}
return $result;
}
}

View file

@ -0,0 +1,177 @@
<?php
namespace Inc\Cache;
use PDO;
use PDOException;
/**
* SQLite-based cache storage implementation
* Provides file counting functionality using SQLite
* Delegates actual cache storage to DiskStorage
*/
class SQLiteStorage implements CacheStorageInterface
{
/**
* @var PDO|null SQLite connection
*/
private $db;
/**
* @var string Cache directory for file counting
*/
private $cacheDir;
/**
* @var string Path to SQLite database file
*/
private $dbPath;
/**
* @var DiskStorage Disk storage for cache entries
*/
private $diskStorage;
/**
* Class constructor
* @param string $cacheDir Base directory for cache storage
*/
public function __construct(string $cacheDir)
{
$this->cacheDir = $cacheDir;
$this->diskStorage = new DiskStorage($cacheDir);
// Ensure database directory exists
$dbDir = $cacheDir . '/database';
if (!is_dir($dbDir)) {
mkdir($dbDir, 0755, true);
}
$this->dbPath = $dbDir . '/.sqlite';
// Try to initialize SQLite connection
try {
$this->db = new PDO('sqlite:' . $this->dbPath);
$this->db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// Create tables if they don't exist
$this->initDatabase();
// If database file was just created, count cache files
if (!file_exists($this->dbPath) || filesize($this->dbPath) < 1024) {
$this->countCacheFiles();
}
} catch (PDOException $e) {
$this->db = null;
}
}
/**
* Initialize database tables
*/
private function initDatabase(): void
{
$this->db->exec("
CREATE TABLE IF NOT EXISTS stats (
key TEXT PRIMARY KEY,
value INTEGER NOT NULL
)
");
}
/**
* Counts the number of files in the cache directory
* @return int Number of files in the cache directory
*/
public function countCacheFiles(): int
{
if ($this->db !== null) {
try {
$stmt = $this->db->query("SELECT value FROM stats WHERE key = 'count'");
$result = $stmt->fetch(PDO::FETCH_ASSOC);
if ($result) {
return (int)$result['value'];
}
} catch (PDOException $e) {
// Continue to count files if query fails
}
}
$fileCount = 0;
$iterator = new \FilesystemIterator($this->cacheDir);
foreach ($iterator as $file) {
if ($file->isFile() && $file->getExtension() === 'gz') {
$fileCount++;
}
}
if ($this->db !== null) {
$this->updateCacheFileCount($fileCount);
}
return $fileCount;
}
/**
* Updates the file count in SQLite
* @param int $count Number of files
*/
public function updateCacheFileCount(int $count): void
{
if ($this->db !== null) {
try {
$stmt = $this->db->prepare("
INSERT OR REPLACE INTO stats (key, value)
VALUES ('count', :count)
");
$stmt->bindParam(':count', $count, PDO::PARAM_INT);
$stmt->execute();
} catch (PDOException $e) {
// Silently fail if update fails
}
}
}
/**
* Checks if cache exists for a given ID
* Delegates to DiskStorage
* @param string $id Cache ID
* @return bool True if cache exists, false otherwise
*/
public function exists(string $id): bool
{
return $this->diskStorage->exists($id);
}
/**
* Retrieves cached content
* Delegates to DiskStorage
* @param string $id Cache ID
* @return string|null Cached content or null if not found
*/
public function get(string $id): ?string
{
return $this->diskStorage->get($id);
}
/**
* Stores content in cache
* Delegates to DiskStorage and updates file count
* @param string $id Cache ID
* @param string $content Content to be stored
* @return bool True if successful, false otherwise
*/
public function set(string $id, string $content): bool
{
$result = $this->diskStorage->set($id, $content);
if ($result) {
// Increment cache file count
$currentCount = $this->countCacheFiles();
$this->updateCacheFileCount($currentCount + 1);
}
return $result;
}
}

View file

@ -31,7 +31,9 @@ class Rules
'fetchStrategies',
'fromGoogleBot',
'removeElementsByTag',
'removeCustomAttr'
'removeCustomAttr',
'urlMods',
'proxy'
];
/**
@ -91,6 +93,35 @@ class Rules
return $this->getGlobalRules();
}
/**
* Retrieves merged rules for a domain
* @param string $domain Target domain
* @return array|null Combined ruleset or global rules
*/
public function hasDomainRules($domain)
{
$domainParts = $this->getDomainParts($domain);
// Check for exact domain match first
foreach ($this->domainRules as $pattern => $rules) {
if ($this->getBaseDomain($domain) === $this->getBaseDomain($pattern)) {
return true;
}
}
// Check for partial domain matches
foreach ($domainParts as $part) {
foreach ($this->domainRules as $pattern => $rules) {
if ($part === $this->getBaseDomain($pattern)) {
return true;
}
}
}
return false;
}
/**
* Combines domain rules with global configuration
* @param array $rules Domain-specific rules
@ -108,12 +139,14 @@ class Rules
if (isset($excludeGlobalRules[$ruleType])) {
if (is_assoc_array($globalTypeRules)) {
$mergedRules[$ruleType] = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$result = array_diff_key($globalTypeRules, array_flip($excludeGlobalRules[$ruleType]));
$mergedRules[$ruleType] = is_array($result) ? $result : [];
} else {
$mergedRules[$ruleType] = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$result = array_diff($globalTypeRules, $excludeGlobalRules[$ruleType]);
$mergedRules[$ruleType] = is_array($result) ? $result : [];
}
} else {
$mergedRules[$ruleType] = $globalTypeRules;
$mergedRules[$ruleType] = is_array($globalTypeRules) ? $globalTypeRules : [];
}
}
@ -126,10 +159,13 @@ class Rules
}
if (in_array($ruleType, ['cookies', 'headers'])) {
$mergedRules[$ruleType] = array_merge($mergedRules[$ruleType], $domainTypeRules);
$mergedRules[$ruleType] = array_merge(
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
is_array($domainTypeRules) ? $domainTypeRules : []
);
} else {
$mergedRules[$ruleType] = array_values(array_unique(array_merge(
$mergedRules[$ruleType],
is_array($mergedRules[$ruleType]) ? $mergedRules[$ruleType] : [],
(array)$domainTypeRules
)));
}

View file

@ -50,6 +50,31 @@ class URLAnalyzer extends URLAnalyzerBase
*/
public function analyze($url)
{
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
// Check if URL contains restricted keywords
if ($this->isRestrictedUrl($url)) {
Logger::getInstance()->logUrl($url, 'RESTRICTED_URL');
$this->error->throwError(self::ERROR_RESTRICTED_URL, '');
}
$originalHost = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in DMCA list FIRST (before any HTTP requests)
foreach (DMCA_DOMAINS as $dmcaEntry) {
if (is_array($dmcaEntry) && isset($dmcaEntry['host'])) {
if ($dmcaEntry['host'] === $host || $dmcaEntry['host'] === $originalHost) {
Logger::getInstance()->logUrl($url, 'DMCA_DOMAIN');
$customMessage = isset($dmcaEntry['message']) ? $dmcaEntry['message'] : '';
$this->error->throwError(self::ERROR_DMCA_DOMAIN, $customMessage);
}
}
}
// Reset activated rules for new analysis
$this->activatedRules = [];
@ -60,20 +85,17 @@ class URLAnalyzer extends URLAnalyzerBase
return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url);
}
// Extract and validate hostname
$host = parse_url($url, PHP_URL_HOST);
if (!$host) {
$this->error->throwError(self::ERROR_INVALID_URL, '');
}
$host = preg_replace('/^www\./', '', $host);
// Check if domain is in blocked list
if (in_array($host, BLOCKED_DOMAINS)) {
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
$this->error->throwError(self::ERROR_BLOCKED_DOMAIN, '');
}
// Check HTTP status and handle any errors
// Check if domain has specific rules by looking for domain-specific configurations
$hasCustomRules = $this->hasDomainRules($host);
// Check HTTP status and handle any errors only if domain doesn't have custom rules
if (!$hasCustomRules) {
$redirectInfo = $this->utils->checkStatus($url);
if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
@ -83,6 +105,7 @@ class URLAnalyzer extends URLAnalyzerBase
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
}
}
}
try {
// Get specific rules for this domain

View file

@ -22,23 +22,27 @@ class URLAnalyzerBase
/** @var string Error constants for different failure scenarios */
const ERROR_INVALID_URL = 'INVALID_URL';
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
const ERROR_DMCA_DOMAIN = 'DMCA_DOMAIN';
const ERROR_NOT_FOUND = 'NOT_FOUND';
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
const ERROR_RESTRICTED_URL = 'RESTRICTED_URL';
/** @var array Maps error types to HTTP codes and message keys */
protected $errorMap = [
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
self::ERROR_DMCA_DOMAIN => ['code' => 403, 'message_key' => 'DMCA_DOMAIN'],
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR'],
self::ERROR_RESTRICTED_URL => ['code' => 403, 'message_key' => 'RESTRICTED_URL']
];
/** @var array List of user agents to rotate through, including Googlebot */
@ -113,4 +117,59 @@ class URLAnalyzerBase
{
return $this->rules->getDomainRules($domain);
}
/**
* Check if domain has specific rules
* @param string $host The domain host to check
* @return bool True if domain has custom rules, false otherwise
*/
protected function hasDomainRules($domain)
{
return $this->rules->hasDomainRules($domain);
}
/**
* Check if URL contains restricted keywords
* @param string $url The URL to check
* @return bool True if URL contains restricted keywords, false otherwise
*/
protected function isRestrictedUrl($url)
{
$restrictedKeywords = [
'login',
'signin',
'sign-in',
'signup',
'sign-up',
'register',
'registration',
'lost-password',
'forgot-password',
'reset-password',
'password',
'auth',
'authentication',
'account',
'profile',
'dashboard',
'admin',
'member',
'subscription',
'subscribe',
'premium',
'checkout',
'payment',
'billing'
];
$urlLower = strtolower($url);
foreach ($restrictedKeywords as $keyword) {
if (strpos($urlLower, $keyword) !== false) {
return true;
}
}
return false;
}
}

View file

@ -14,10 +14,17 @@ class URLAnalyzerError extends URLAnalyzerBase
public function throwError($errorType, $additionalInfo = '')
{
$errorConfig = $this->errorMap[$errorType];
// For DMCA domains, use custom message if provided, otherwise use default
if ($errorType === self::ERROR_DMCA_DOMAIN && !empty($additionalInfo)) {
$message = $additionalInfo;
} else {
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo) {
if ($additionalInfo && $errorType !== self::ERROR_DMCA_DOMAIN) {
$message .= ': ' . $additionalInfo;
}
}
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
}
}

View file

@ -21,16 +21,117 @@ class URLAnalyzerFetch extends URLAnalyzerBase
/**
* Sets up the fetch handler with error handling capability
*/
/** @var array List of available proxies */
private $proxyList = [];
/** @var string Path to proxy cache file */
private $proxyCachePath = '';
public function __construct()
{
parent::__construct();
$this->error = new URLAnalyzerError();
$this->proxyCachePath = __DIR__ . '/../../cache/proxy_list.json';
$this->loadProxyList();
}
/**
* Loads proxy list from cache if available
*/
private function loadProxyList()
{
if (isset($_ENV['PROXY_LIST']) && file_exists($this->proxyCachePath)) {
$cachedList = file_get_contents($this->proxyCachePath);
if (!empty($cachedList)) {
$this->proxyList = json_decode($cachedList, true);
}
}
}
/**
* Gets a random proxy from the list
* @return string|null Random proxy URL or null if none available
*/
private function getRandomProxy()
{
if (empty($this->proxyList)) {
return null;
}
return $this->proxyList[array_rand($this->proxyList)];
}
/**
* Fetches content using cURL
* Handles redirects and custom headers
*/
/**
* Modifies URL based on urlMods rules
* @param string $url Original URL
* @param array $domainRules Domain rules containing urlMods
* @return string Modified URL
*/
private function applyUrlModifications($url, $domainRules)
{
if (!isset($domainRules['urlMods'])) {
return $url;
}
$urlParts = parse_url($url);
if (isset($domainRules['urlMods']['query']) && is_array($domainRules['urlMods']['query'])) {
$queryParams = [];
if (isset($urlParts['query'])) {
parse_str($urlParts['query'], $queryParams);
}
foreach ($domainRules['urlMods']['query'] as $queryMod) {
if (isset($queryMod['key']) && isset($queryMod['value'])) {
$queryParams[$queryMod['key']] = $queryMod['value'];
}
}
$urlParts['query'] = http_build_query($queryParams);
}
$modifiedUrl = '';
if (isset($urlParts['scheme'])) {
$modifiedUrl .= $urlParts['scheme'] . '://';
}
if (isset($urlParts['user'])) {
$modifiedUrl .= $urlParts['user'];
if (isset($urlParts['pass'])) {
$modifiedUrl .= ':' . $urlParts['pass'];
}
$modifiedUrl .= '@';
}
if (isset($urlParts['host'])) {
$modifiedUrl .= $urlParts['host'];
}
if (isset($urlParts['port'])) {
$modifiedUrl .= ':' . $urlParts['port'];
}
if (isset($urlParts['path'])) {
$modifiedUrl .= $urlParts['path'];
}
if (isset($urlParts['query'])) {
$modifiedUrl .= '?' . $urlParts['query'];
}
if (isset($urlParts['fragment'])) {
$modifiedUrl .= '#' . $urlParts['fragment'];
}
return $modifiedUrl;
}
public function fetchContent($url)
{
$curl = new Curl();
@ -42,6 +143,8 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$host = preg_replace('/^www\./', '', $host);
$domainRules = $this->getDomainRules($host);
$url = $this->applyUrlModifications($url, $domainRules);
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->setOpt(CURLOPT_MAXREDIRS, 2);
$curl->setOpt(CURLOPT_TIMEOUT, 10);
@ -49,6 +152,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_DNS_SERVERS, implode(',', $this->dnsServers));
$curl->setOpt(CURLOPT_ENCODING, '');
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->setHeaders([
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
@ -97,6 +207,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
*/
public function fetchFromWaybackMachine($url)
{
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
$domainRules = $this->getDomainRules($domainHost);
$url = $this->applyUrlModifications($url, $domainRules);
}
$url = preg_replace('#^https?://#', '', $url);
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($url);
@ -106,6 +223,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($availabilityUrl);
if ($curl->error) {
@ -130,6 +254,13 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setUserAgent($this->getRandomUserAgent());
if (isset($domainRules['proxy']) && $domainRules['proxy'] === true) {
$proxy = $this->getRandomProxy();
if ($proxy) {
$curl->setOpt(CURLOPT_PROXY, $proxy);
}
}
$curl->get($archiveUrl);
if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) {
@ -152,16 +283,32 @@ class URLAnalyzerFetch extends URLAnalyzerBase
{
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
$domainHost = parse_url($url, PHP_URL_HOST);
if ($domainHost) {
$domainHost = preg_replace('/^www\./', '', $domainHost);
$domainRules = $this->getDomainRules($domainHost);
$url = $this->applyUrlModifications($url, $domainRules);
}
$useProxy = isset($domainRules['proxy']) && $domainRules['proxy'] === true;
$proxy = $useProxy ? $this->getRandomProxy() : null;
if ($browser === 'chrome') {
$options = new ChromeOptions();
$options->addArguments([
$arguments = [
'--headless',
'--disable-gpu',
'--no-sandbox',
'--disable-dev-shm-usage',
'--disable-images',
'--blink-settings=imagesEnabled=false'
]);
];
if ($useProxy && $proxy) {
$arguments[] = '--proxy-server=' . $proxy;
}
$options->addArguments($arguments);
$capabilities = DesiredCapabilities::chrome();
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
@ -174,6 +321,22 @@ class URLAnalyzerFetch extends URLAnalyzerBase
$profile->setPreference("network.http.referer.spoofSource", true);
$profile->setPreference("network.http.referer.trimmingPolicy", 0);
if ($useProxy && $proxy) {
$proxyParts = parse_url($proxy);
if (isset($proxyParts['host']) && isset($proxyParts['port'])) {
$profile->setPreference("network.proxy.type", 1);
$profile->setPreference("network.proxy.http", $proxyParts['host']);
$profile->setPreference("network.proxy.http_port", $proxyParts['port']);
$profile->setPreference("network.proxy.ssl", $proxyParts['host']);
$profile->setPreference("network.proxy.ssl_port", $proxyParts['port']);
if (isset($proxyParts['user']) && isset($proxyParts['pass'])) {
$profile->setPreference("network.proxy.username", $proxyParts['user']);
$profile->setPreference("network.proxy.password", $proxyParts['pass']);
}
}
}
$options = new FirefoxOptions();
$options->setProfile($profile);

View file

@ -18,7 +18,17 @@ class URLAnalyzerUtils extends URLAnalyzerBase
$curl->setOpt(CURLOPT_TIMEOUT, 5);
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
$curl->setOpt(CURLOPT_NOBODY, true);
$curl->setUserAgent($this->getRandomUserAgent());
$curl->setOpt(CURLOPT_DNS_SERVERS, '8.8.8.8,8.4.4.8');
$curl->setHeaders([
'User-Agent' => 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language' => 'en-US,en;q=0.5',
'Cache-Control' => 'no-cache',
'Pragma' => 'no-cache',
'DNT' => '1',
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
'From' => 'googlebot(at)googlebot.com'
]);
$curl->get($url);
if ($curl->error) {

View file

@ -21,6 +21,10 @@ return [
'message' => 'Diese Seite ist nicht erlaubt.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Die angeforderte Website kann aufgrund von Anfragen ihrer Eigentümer nicht angezeigt werden.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'DNS für die Domain konnte nicht aufgelöst werden. Bitte überprüfe, ob die URL korrekt ist.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Bei der Bearbeitung Ihrer Anfrage ist ein Fehler aufgetreten.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Diese URL enthält eingeschränkten Inhalt und kann aus Sicherheitsgründen nicht verarbeitet werden.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'This domain is blocked for extraction.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'The requested website cannot be displayed due to requests from its owners.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Failed to resolve DNS for the domain. Please verify if the URL is correct.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'An error occurred while processing your request.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'This URL contains restricted content and cannot be processed for security reasons.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este dominio está bloqueado para extracción.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'El sitio web solicitado no se puede mostrar debido a las solicitudes de sus propietarios.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Error al resolver DNS para el dominio. Verifique si la URL es correcta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocurrió un error al procesar su solicitud.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contiene contenido restringido y no se puede procesar por razones de seguridad.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Este domínio está bloqueado para extração.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'O site solicitado não pode ser exibido por exigência dos seus proprietários.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Falha ao resolver DNS para o domínio. Verifique se a URL está correta.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'Ocorreu um erro ao processar sua solicitação.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Esta URL contém conteúdo restrito e não pode ser processada por motivos de segurança.',
'type' => 'error'
]
]
];

View file

@ -21,6 +21,10 @@ return [
'message' => 'Этот домен заблокирован для извлечения.',
'type' => 'error'
],
'DMCA_DOMAIN' => [
'message' => 'Запрошенный веб-сайт не может быть отображен по запросу его владельцев.',
'type' => 'error'
],
'DNS_FAILURE' => [
'message' => 'Не удалось разрешить DNS для домена. Проверьте правильность URL.',
'type' => 'warning'
@ -48,6 +52,10 @@ return [
'GENERIC_ERROR' => [
'message' => 'При обработке вашего запроса произошла ошибка.',
'type' => 'warning'
],
'RESTRICTED_URL' => [
'message' => 'Этот URL-адрес содержит запрещенный контент и не может быть обработан по соображениям безопасности.',
'type' => 'error'
]
]
];

View file

@ -5,6 +5,7 @@ namespace App;
use Inc\Language;
use Inc\URLAnalyzer;
use Inc\URLAnalyzer\URLAnalyzerException;
use Inc\Cache;
/**
* URL Processor
@ -109,6 +110,19 @@ class URLProcessor
} else {
if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
$this->redirect(trim($additionalInfo), $errorType);
} elseif ($errorType === URLAnalyzer::ERROR_DMCA_DOMAIN) {
// For DMCA domains, show the custom message directly instead of redirecting
Language::init(LANGUAGE);
$message = $e->getMessage();
$message_type = 'error';
$url = ''; // Initialize url variable for the view
// Initialize cache for counting
$cache = new \Inc\Cache();
$cache_folder = $cache->getCacheFileCount();
require __DIR__ . '/views/home.php';
exit;
}
$this->redirect(SITE_URL, $errorType);
}

View file

@ -60,6 +60,7 @@
</div>
</div>
</nav>
<div class="fast_buttons">
<div class="extension">
<button class="extension__toggle"><?php echo \Inc\Language::get('nav_extension'); ?></button>
<div class="extension__menu">
@ -67,6 +68,13 @@
<a target="_blank" href="https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
</div>
</div>
<div class="theme-controls">
<button class="theme-toggle" id="themeToggle">
<span class="icon icon--sun"></span>
<span class="icon icon--moon"></span>
</button>
</div>
</div>
</header>
<main>
@ -85,7 +93,9 @@
value="<?php echo htmlspecialchars($url); ?>"
required
pattern="https?://.+"
title="<?php echo \Inc\Language::getMessage('INVALID_URL')['message']; ?>">
title="<?php echo \Inc\Language::getMessage('INVALID_URL')['message']; ?>"
autofocus>
<span class="paste" id="paste"><span class="icon icon--paste"></span></span>
</div>
<button type="submit" alt="<?php echo \Inc\Language::get('analyze_button'); ?>">
<span class="icon icon--marreta"></span>

View file

@ -1,4 +1,5 @@
services:
# Marreta
marreta:
container_name: marreta
image: ghcr.io/manualdousuario/marreta:latest
@ -11,11 +12,28 @@ services:
- SITE_NAME=${SITE_NAME:-}
- SITE_DESCRIPTION=${SITE_DESCRIPTION:-}
- SITE_URL=${SITE_URL:-https://marreta.localhost}
- DNS_SERVERS=${DNS_SERVER:-}
- LANGUAGE=${LANGUAGE:-}
- DNS_SERVERS=${DNS_SERVER:-1.1.1.1,8.8.8.8}
- LANGUAGE=${LANGUAGE:-pt-br}
- LOG_LEVEL=${LOG_LEVEL:-WARNING}
- SELENIUM_HOST=${SELENIUM_HOST:-selenium-hub:4444}
- CLEANUP_DAYS=7 # Optional
- PROXY_LIST=url # Optional
restart: unless-stopped
# Selenium
selenium-hub:
image: selenium/hub:4.27.0-20241204
container_name: selenium-hub
environment:
- SE_ENABLE_TRACING=false
- GRID_MAX_SESSION=10
- GRID_BROWSER_TIMEOUT=10
- GRID_TIMEOUT=10
depends_on:
- marreta
ports:
- 4442:4442
- 4443:4443
- 4444:4444
selenium-chromium:
container_name: selenium-chromium
image: selenium/node-chromium:4.27.0-20241204
@ -44,17 +62,3 @@ services:
entrypoint: bash -c 'SE_OPTS="--host $$HOSTNAME" /opt/bin/entry_point.sh'
depends_on:
- selenium-hub
selenium-hub:
image: selenium/hub:4.27.0-20241204
container_name: selenium-hub
environment:
- SE_ENABLE_TRACING=false
- GRID_MAX_SESSION=10
- GRID_BROWSER_TIMEOUT=10
- GRID_TIMEOUT=10
depends_on:
- marreta
ports:
- 4442:4442
- 4443:4443
- 4444:4444

View file

@ -54,6 +54,7 @@ log_success "Environment variables configured"
log_info "Adjusting directory permissions..."
mkdir -p /app/cache /app/logs # Ensures directories exist
mkdir -p /app/cache/database
chown -R www-data:www-data /app/cache /app/logs
chmod -R 775 /app/cache /app/logs
@ -108,8 +109,21 @@ nginx -g "daemon off;" &
sleep 3
check_nginx
# Starting Cron
log_info "Starting Cron..."
service cron restart
log_success "Cron started"
echo -e "\n${GREEN}=== Marreta initialized ===${NC}\n"
# Run proxy list updater
log_info "Running proxy list updater..."
if php /app/bin/proxy; then
log_success "Proxy list updater completed successfully"
else
log_info "Proxy list updater finished (may not have been configured)"
fi
# Wait for any process to exit
wait -n