removida configuração de user agent

This commit is contained in:
Renan Bernordi 2025-01-09 21:41:28 -03:00
parent 1d1bc21ab7
commit 0514435f2a
5 changed files with 18 additions and 33 deletions

View file

@ -105,7 +105,6 @@ The configurations are organized in `data/`:
- `domain_rules.php`: Site-specific rules - `domain_rules.php`: Site-specific rules
- `global_rules.php`: Rules that apply to all sites - `global_rules.php`: Rules that apply to all sites
- `blocked_domains.php`: List of blocked sites - `blocked_domains.php`: List of blocked sites
- `user_agents.php`: User Agents configurations
### Translations ### Translations

View file

@ -105,7 +105,6 @@ As configurações estão organizadas em `data/`:
- `domain_rules.php`: Regras específicas para cada site - `domain_rules.php`: Regras específicas para cada site
- `global_rules.php`: Regras que se aplicam a todos os sites - `global_rules.php`: Regras que se aplicam a todos os sites
- `blocked_domains.php`: Lista de sites bloqueados - `blocked_domains.php`: Lista de sites bloqueados
- `user_agents.php`: Configurações de User Agents
### Traduções ### Traduções

View file

@ -99,7 +99,6 @@ try {
* Load system configurations * Load system configurations
* Carrega as configurações do sistema * Carrega as configurações do sistema
*/ */
define('USER_AGENTS', require __DIR__ . '/data/user_agents.php');
define('BLOCKED_DOMAINS', require __DIR__ . '/data/blocked_domains.php'); define('BLOCKED_DOMAINS', require __DIR__ . '/data/blocked_domains.php');
define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php'); define('DOMAIN_RULES', require __DIR__ . '/data/domain_rules.php');
define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php'); define('GLOBAL_RULES', require __DIR__ . '/data/global_rules.php');

View file

@ -1,25 +0,0 @@
<?php
/**
* Bot configurations
* Configurações dos bots
*
* Defines user agents that can be used to make requests
* Define os user agents que podem ser utilizados para fazer requisições
*
* These user agents are used to simulate legitimate web crawlers
* Estes user agents são usados para simular crawlers web legítimos
*/
return [
// Google News bot
// Bot do Google News
'Googlebot-News',
// Mobile Googlebot
// Googlebot para dispositivos móveis
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
// Desktop Googlebot
// Googlebot para desktop
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
];

View file

@ -35,17 +35,31 @@ class URLAnalyzer
* @var array List of available User Agents for requests * @var array List of available User Agents for requests
* @var array Lista de User Agents disponíveis para requisições * @var array Lista de User Agents disponíveis para requisições
*/ */
private $userAgents; private $userAgents = [
// Google News bot
// Bot do Google News
'Googlebot-News',
// Mobile Googlebot
// Googlebot para dispositivos móveis
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
// Desktop Googlebot
// Googlebot para desktop
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
];
/** /**
* @var array List of social media referrers * @var array List of social media referrers
* @var array Lista de referenciadores de mídia social * @var array Lista de referenciadores de mídia social
*/ */
private $socialReferrers = [ private $socialReferrers = [
// Twitter
'https://t.co/', 'https://t.co/',
'https://www.google.com/',
'https://www.facebook.com/',
'https://www.twitter.com/', 'https://www.twitter.com/',
// Google
'https://www.google.com/',
// Facebook
'https://www.facebook.com/',
// Linkedin
'https://www.linkedin.com/' 'https://www.linkedin.com/'
]; ];
@ -82,7 +96,6 @@ class URLAnalyzer
*/ */
public function __construct() public function __construct()
{ {
$this->userAgents = USER_AGENTS;
$this->dnsServers = explode(',', DNS_SERVERS); $this->dnsServers = explode(',', DNS_SERVERS);
$this->rules = new Rules(); $this->rules = new Rules();
$this->cache = new Cache(); $this->cache = new Cache();
@ -326,7 +339,7 @@ class URLAnalyzer
private function fetchContent($url) private function fetchContent($url)
{ {
$curl = new Curl(); $curl = new Curl();
$this->setupBasicCurlOptions($curl); $this->setupBasicCurlOptions($curl, $url);
$host = parse_url($url, PHP_URL_HOST); $host = parse_url($url, PHP_URL_HOST);
$host = preg_replace('/^www\./', '', $host); $host = preg_replace('/^www\./', '', $host);