removida configuração de user agent

This commit is contained in:
Renan Bernordi 2025-01-09 21:41:28 -03:00
parent 1d1bc21ab7
commit 0514435f2a
5 changed files with 18 additions and 33 deletions

View file

@ -105,7 +105,6 @@ The configurations are organized in `data/`:
- `domain_rules.php`: Site-specific rules
- `global_rules.php`: Rules that apply to all sites
- `blocked_domains.php`: List of blocked sites
- `user_agents.php`: User Agents configurations
### Translations

View file

@ -105,7 +105,6 @@ As configurações estão organizadas em `data/`:
- `domain_rules.php`: Regras específicas para cada site
- `global_rules.php`: Regras que se aplicam a todos os sites
- `blocked_domains.php`: Lista de sites bloqueados
- `user_agents.php`: Configurações de User Agents
### Traduções

View file

@ -99,7 +99,6 @@ try {
* Load system configurations
* Carrega as configurações do sistema
*/
define('USER_AGENTS', require __DIR__ . '/data/user_agents.php');
define('BLOCKED_DOMAINS', require __DIR__ . '/data/blocked_domains.php');
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');

View file

@ -1,25 +0,0 @@
<?php
/**
* Bot configurations
* Configurações dos bots
*
* Defines user agents that can be used to make requests
* Define os user agents que podem ser utilizados para fazer requisições
*
* These user agents are used to simulate legitimate web crawlers
* Estes user agents são usados para simular crawlers web legítimos
*/
return [
// Google News bot
// Bot do Google News
'Googlebot-News',
// Mobile Googlebot
// Googlebot para dispositivos móveis
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
// Desktop Googlebot
// Googlebot para desktop
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
];

View file

@ -35,17 +35,31 @@ class URLAnalyzer
* @var array List of available User Agents for requests
* @var array Lista de User Agents disponíveis para requisições
*/
private $userAgents;
private $userAgents = [
// Google News bot
// Bot do Google News
'Googlebot-News',
// Mobile Googlebot
// Googlebot para dispositivos móveis
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
// Desktop Googlebot
// Googlebot para desktop
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
];
/**
* @var array List of social media referrers
* @var array Lista de referenciadores de mídia social
*/
private $socialReferrers = [
// Twitter
'https://t.co/',
'https://www.google.com/',
'https://www.facebook.com/',
'https://www.twitter.com/',
// Google
'https://www.google.com/',
// Facebook
'https://www.facebook.com/',
// Linkedin
'https://www.linkedin.com/'
];
@ -82,7 +96,6 @@ class URLAnalyzer
*/
public function __construct()
{
$this->userAgents = USER_AGENTS;
$this->dnsServers = explode(',', DNS_SERVERS);
$this->rules = new Rules();
$this->cache = new Cache();
@ -326,7 +339,7 @@ class URLAnalyzer
private function fetchContent($url)
{
$curl = new Curl();
$this->setupBasicCurlOptions($curl);
$this->setupBasicCurlOptions($curl, $url);
$host = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host);