mirror of
https://github.com/manualdousuario/marreta.git
synced 2025-09-01 10:10:14 +00:00
extends urlanalyzer
This commit is contained in:
parent
9ffd8260fd
commit
91f58e61c7
14 changed files with 772 additions and 781 deletions
|
@ -1,5 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
namespace Inc;
|
||||||
|
|
||||||
use Inc\Cache\CacheStorageInterface;
|
use Inc\Cache\CacheStorageInterface;
|
||||||
use Inc\Cache\DiskStorage;
|
use Inc\Cache\DiskStorage;
|
||||||
use Inc\Cache\S3Storage;
|
use Inc\Cache\S3Storage;
|
||||||
|
@ -88,4 +90,4 @@ class Cache
|
||||||
|
|
||||||
return $this->storage->set($this->generateId($url), $content);
|
return $this->storage->set($this->generateId($url), $content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
namespace Inc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manages language translations and localization
|
* Manages language translations and localization
|
||||||
* Loads language files based on system configuration
|
* Loads language files based on system configuration
|
||||||
|
@ -51,4 +53,4 @@ class Language {
|
||||||
public static function getCurrentLanguage() {
|
public static function getCurrentLanguage() {
|
||||||
return self::$currentLanguage;
|
return self::$currentLanguage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
namespace Inc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manages domain-specific content manipulation rules
|
* Manages domain-specific content manipulation rules
|
||||||
* Handles rule merging between global and domain-specific configurations
|
* Handles rule merging between global and domain-specific configurations
|
||||||
|
@ -146,4 +148,4 @@ class Rules
|
||||||
// Helper function for associative array check
|
// Helper function for associative array check
|
||||||
function is_assoc_array($array) {
|
function is_assoc_array($array) {
|
||||||
return array_keys($array) !== range(0, count($array) - 1);
|
return array_keys($array) !== range(0, count($array) - 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,273 +1,100 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
namespace Inc;
|
||||||
* Class for URL analysis and processing
|
|
||||||
* URL analysis and cleaning
|
|
||||||
* Content caching
|
|
||||||
* DNS resolution
|
|
||||||
* HTTP requests with multiple attempts
|
|
||||||
* Content processing based on domain-specific rules
|
|
||||||
* Wayback Machine support
|
|
||||||
* Selenium extraction support
|
|
||||||
*/
|
|
||||||
|
|
||||||
require_once __DIR__ . '/Rules.php';
|
|
||||||
require_once __DIR__ . '/Cache.php';
|
|
||||||
require_once __DIR__ . '/Logger.php';
|
|
||||||
require_once __DIR__ . '/Language.php';
|
|
||||||
|
|
||||||
use Curl\Curl;
|
|
||||||
use Facebook\WebDriver\Remote\DesiredCapabilities;
|
|
||||||
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
|
||||||
use Facebook\WebDriver\Firefox\FirefoxOptions;
|
|
||||||
use Facebook\WebDriver\Firefox\FirefoxProfile;
|
|
||||||
use Facebook\WebDriver\Chrome\ChromeOptions;
|
|
||||||
use Inc\Logger;
|
use Inc\Logger;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerBase;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerException;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerFetch;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerProcess;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerError;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerUtils;
|
||||||
|
|
||||||
/**
|
class URLAnalyzer extends URLAnalyzerBase
|
||||||
* Custom exception class for URL analysis errors
|
|
||||||
*/
|
|
||||||
class URLAnalyzerException extends Exception
|
|
||||||
{
|
{
|
||||||
private $errorType;
|
private $fetch;
|
||||||
private $additionalInfo;
|
private $process;
|
||||||
|
private $error;
|
||||||
|
private $utils;
|
||||||
|
|
||||||
public function __construct($message, $code, $errorType, $additionalInfo = '')
|
|
||||||
{
|
|
||||||
parent::__construct($message, $code);
|
|
||||||
$this->errorType = $errorType;
|
|
||||||
$this->additionalInfo = $additionalInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getErrorType()
|
|
||||||
{
|
|
||||||
return $this->errorType;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getAdditionalInfo()
|
|
||||||
{
|
|
||||||
return $this->additionalInfo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class URLAnalyzer
|
|
||||||
{
|
|
||||||
// Error type constants
|
|
||||||
const ERROR_INVALID_URL = 'INVALID_URL';
|
|
||||||
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
|
|
||||||
const ERROR_NOT_FOUND = 'NOT_FOUND';
|
|
||||||
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
|
|
||||||
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
|
|
||||||
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
|
|
||||||
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
|
|
||||||
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
|
|
||||||
|
|
||||||
// Error mapping
|
|
||||||
private $errorMap = [
|
|
||||||
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
|
|
||||||
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
|
|
||||||
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
|
|
||||||
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
|
|
||||||
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
|
|
||||||
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
|
|
||||||
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
|
|
||||||
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to throw errors
|
|
||||||
*/
|
|
||||||
private function throwError($errorType, $additionalInfo = '')
|
|
||||||
{
|
|
||||||
$errorConfig = $this->errorMap[$errorType];
|
|
||||||
$message = Language::getMessage($errorConfig['message_key'])['message'];
|
|
||||||
if ($additionalInfo) {
|
|
||||||
$message;
|
|
||||||
}
|
|
||||||
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array List of User Agents
|
|
||||||
*/
|
|
||||||
private $userAgents = [
|
|
||||||
// Google News bot
|
|
||||||
'Googlebot-News',
|
|
||||||
// Mobile Googlebot
|
|
||||||
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
|
||||||
// Desktop Googlebot
|
|
||||||
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array List of social media referrers
|
|
||||||
*/
|
|
||||||
private $socialReferrers = [
|
|
||||||
// Twitter
|
|
||||||
'https://t.co/',
|
|
||||||
'https://www.twitter.com/',
|
|
||||||
// Facebook
|
|
||||||
'https://www.facebook.com/',
|
|
||||||
// Linkedin
|
|
||||||
'https://www.linkedin.com/'
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array List of DNS servers
|
|
||||||
*/
|
|
||||||
private $dnsServers;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var Rules Instance of rules class
|
|
||||||
*/
|
|
||||||
private $rules;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var Cache Instance of cache class
|
|
||||||
*/
|
|
||||||
private $cache;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array List of activated rules
|
|
||||||
*/
|
|
||||||
private $activatedRules = [];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class constructor
|
|
||||||
* Initializes dependencies
|
|
||||||
*/
|
|
||||||
public function __construct()
|
|
||||||
{
|
|
||||||
$this->dnsServers = explode(',', DNS_SERVERS);
|
|
||||||
$this->rules = new Rules();
|
|
||||||
$this->cache = new Cache();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a URL has redirects and return the final URL
|
|
||||||
* @param string $url URL to check redirects
|
|
||||||
* @return array Array with final URL and if there was a redirect
|
|
||||||
*/
|
|
||||||
public function checkStatus($url)
|
public function checkStatus($url)
|
||||||
{
|
{
|
||||||
$curl = new Curl();
|
return $this->utils->checkStatus($url);
|
||||||
$curl->setFollowLocation();
|
|
||||||
$curl->setOpt(CURLOPT_TIMEOUT, 5);
|
|
||||||
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
|
||||||
$curl->setOpt(CURLOPT_NOBODY, true);
|
|
||||||
$curl->setUserAgent($this->getRandomUserAgent());
|
|
||||||
$curl->get($url);
|
|
||||||
|
|
||||||
if ($curl->error) {
|
|
||||||
return [
|
|
||||||
'finalUrl' => $url,
|
|
||||||
'hasRedirect' => false,
|
|
||||||
'httpCode' => $curl->httpStatusCode
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
|
||||||
'finalUrl' => $curl->effectiveUrl,
|
|
||||||
'hasRedirect' => ($curl->effectiveUrl !== $url),
|
|
||||||
'httpCode' => $curl->httpStatusCode
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public function __construct()
|
||||||
* Get a random user agent, with possibility of using Google bot
|
|
||||||
* @param bool $preferGoogleBot Whether to prefer Google bot user agents
|
|
||||||
* @return string Selected user agent
|
|
||||||
*/
|
|
||||||
private function getRandomUserAgent($preferGoogleBot = false)
|
|
||||||
{
|
{
|
||||||
if ($preferGoogleBot && rand(0, 100) < 70) {
|
parent::__construct();
|
||||||
return $this->userAgents[array_rand($this->userAgents)];
|
$this->fetch = new URLAnalyzerFetch();
|
||||||
}
|
$this->process = new URLAnalyzerProcess();
|
||||||
return $this->userAgents[array_rand($this->userAgents)];
|
$this->error = new URLAnalyzerError();
|
||||||
|
$this->utils = new URLAnalyzerUtils();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a random social media referrer
|
|
||||||
* @return string Selected referrer
|
|
||||||
*/
|
|
||||||
private function getRandomSocialReferrer()
|
|
||||||
{
|
|
||||||
return $this->socialReferrers[array_rand($this->socialReferrers)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Main method for URL analysis
|
|
||||||
* @param string $url URL to be analyzed
|
|
||||||
* @return string Processed content
|
|
||||||
* @throws URLAnalyzerException In case of processing errors
|
|
||||||
*/
|
|
||||||
public function analyze($url)
|
public function analyze($url)
|
||||||
{
|
{
|
||||||
// Reset activated rules for new analysis
|
|
||||||
$this->activatedRules = [];
|
$this->activatedRules = [];
|
||||||
|
|
||||||
// 1. Check cache
|
// Get and process cached content if it exists
|
||||||
if ($this->cache->exists($url)) {
|
if ($this->cache->exists($url)) {
|
||||||
return $this->cache->get($url);
|
$rawContent = $this->cache->get($url);
|
||||||
|
// Process the raw content in real-time
|
||||||
|
return $this->process->processContent($rawContent, parse_url($url, PHP_URL_HOST), $url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Check blocked domains
|
|
||||||
$host = parse_url($url, PHP_URL_HOST);
|
$host = parse_url($url, PHP_URL_HOST);
|
||||||
if (!$host) {
|
if (!$host) {
|
||||||
$this->throwError(self::ERROR_INVALID_URL);
|
$this->error->throwError(self::ERROR_INVALID_URL, '');
|
||||||
}
|
}
|
||||||
$host = preg_replace('/^www\./', '', $host);
|
$host = preg_replace('/^www\./', '', $host);
|
||||||
|
|
||||||
if (in_array($host, BLOCKED_DOMAINS)) {
|
if (in_array($host, BLOCKED_DOMAINS)) {
|
||||||
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
|
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
|
||||||
$this->throwError(self::ERROR_BLOCKED_DOMAIN);
|
$this->error->throwError(self::ERROR_BLOCKED_DOMAIN, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Check URL status code before proceeding
|
$redirectInfo = $this->utils->checkStatus($url);
|
||||||
$redirectInfo = $this->checkStatus($url);
|
|
||||||
if ($redirectInfo['httpCode'] !== 200) {
|
if ($redirectInfo['httpCode'] !== 200) {
|
||||||
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
|
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
|
||||||
if ($redirectInfo['httpCode'] === 404) {
|
if ($redirectInfo['httpCode'] === 404) {
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
$this->error->throwError(self::ERROR_NOT_FOUND, '');
|
||||||
} else {
|
} else {
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR, "HTTP {$redirectInfo['httpCode']}");
|
$this->error->throwError(self::ERROR_HTTP_ERROR, (string)$redirectInfo['httpCode']);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 4. Get domain rules and check fetch strategy
|
|
||||||
$domainRules = $this->getDomainRules($host);
|
$domainRules = $this->getDomainRules($host);
|
||||||
$fetchStrategy = isset($domainRules['fetchStrategies']) ? $domainRules['fetchStrategies'] : null;
|
$fetchStrategy = isset($domainRules['fetchStrategies']) ? $domainRules['fetchStrategies'] : null;
|
||||||
|
|
||||||
// If a specific fetch strategy is defined, use only that
|
|
||||||
if ($fetchStrategy) {
|
if ($fetchStrategy) {
|
||||||
try {
|
try {
|
||||||
$content = null;
|
$content = null;
|
||||||
switch ($fetchStrategy) {
|
switch ($fetchStrategy) {
|
||||||
case 'fetchContent':
|
case 'fetchContent':
|
||||||
$content = $this->fetchContent($url);
|
$content = $this->fetch->fetchContent($url);
|
||||||
break;
|
break;
|
||||||
case 'fetchFromWaybackMachine':
|
case 'fetchFromWaybackMachine':
|
||||||
$content = $this->fetchFromWaybackMachine($url);
|
$content = $this->fetch->fetchFromWaybackMachine($url);
|
||||||
break;
|
break;
|
||||||
case 'fetchFromSelenium':
|
case 'fetchFromSelenium':
|
||||||
$content = $this->fetchFromSelenium($url, isset($domainRules['browser']) ? $domainRules['browser'] : 'firefox');
|
$content = $this->fetch->fetchFromSelenium($url, isset($domainRules['browser']) ? $domainRules['browser'] : 'firefox');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($content)) {
|
if (!empty($content)) {
|
||||||
$this->activatedRules[] = "fetchStrategy: $fetchStrategy";
|
$this->activatedRules[] = "fetchStrategy: $fetchStrategy";
|
||||||
$processedContent = $this->processContent($content, $host, $url);
|
// Cache the raw HTML content
|
||||||
$this->cache->set($url, $processedContent);
|
$this->cache->set($url, $content);
|
||||||
return $processedContent;
|
// Process content in real-time
|
||||||
|
return $this->process->processContent($content, $host, $url);
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} catch (\Exception $e) {
|
||||||
Logger::getInstance()->logUrl($url, strtoupper($fetchStrategy) . '_ERROR', $e->getMessage());
|
Logger::getInstance()->logUrl($url, strtoupper($fetchStrategy) . '_ERROR', $e->getMessage());
|
||||||
throw $e;
|
throw $e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Try all strategies in sequence
|
|
||||||
$fetchStrategies = [
|
$fetchStrategies = [
|
||||||
['method' => 'fetchContent', 'args' => [$url]],
|
['method' => 'fetchContent', 'args' => [$url]],
|
||||||
['method' => 'fetchFromWaybackMachine', 'args' => [$url]],
|
['method' => 'fetchFromWaybackMachine', 'args' => [$url]],
|
||||||
|
@ -277,566 +104,50 @@ class URLAnalyzer
|
||||||
$lastError = null;
|
$lastError = null;
|
||||||
foreach ($fetchStrategies as $strategy) {
|
foreach ($fetchStrategies as $strategy) {
|
||||||
try {
|
try {
|
||||||
$content = call_user_func_array([$this, $strategy['method']], $strategy['args']);
|
$content = call_user_func_array([$this->fetch, $strategy['method']], $strategy['args']);
|
||||||
if (!empty($content)) {
|
if (!empty($content)) {
|
||||||
$this->activatedRules[] = "fetchStrategy: {$strategy['method']}";
|
$this->activatedRules[] = "fetchStrategy: {$strategy['method']}";
|
||||||
$processedContent = $this->processContent($content, $host, $url);
|
// Cache the raw HTML content
|
||||||
$this->cache->set($url, $processedContent);
|
$this->cache->set($url, $content);
|
||||||
return $processedContent;
|
// Process content in real-time
|
||||||
|
return $this->process->processContent($content, $host, $url);
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} catch (\Exception $e) {
|
||||||
$lastError = $e;
|
$lastError = $e;
|
||||||
error_log("{$strategy['method']}_ERROR: " . $e->getMessage());
|
error_log("{$strategy['method']}_ERROR: " . $e->getMessage());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If all strategies failed
|
|
||||||
Logger::getInstance()->logUrl($url, 'GENERAL_FETCH_ERROR');
|
Logger::getInstance()->logUrl($url, 'GENERAL_FETCH_ERROR');
|
||||||
if ($lastError) {
|
if ($lastError) {
|
||||||
$message = $lastError->getMessage();
|
$message = $lastError->getMessage();
|
||||||
if (strpos($message, 'DNS') !== false) {
|
if (strpos($message, 'DNS') !== false) {
|
||||||
$this->throwError(self::ERROR_DNS_FAILURE);
|
$this->error->throwError(self::ERROR_DNS_FAILURE, '');
|
||||||
} elseif (strpos($message, 'CURL') !== false) {
|
} elseif (strpos($message, 'CURL') !== false) {
|
||||||
$this->throwError(self::ERROR_CONNECTION_ERROR);
|
$this->error->throwError(self::ERROR_CONNECTION_ERROR, '');
|
||||||
} elseif (strpos($message, 'HTTP') !== false) {
|
} elseif (strpos($message, 'HTTP') !== false) {
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
$this->error->throwError(self::ERROR_HTTP_ERROR, '');
|
||||||
} elseif (strpos($message, 'not found') !== false) {
|
} elseif (strpos($message, 'not found') !== false) {
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
$this->error->throwError(self::ERROR_NOT_FOUND, '');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->throwError(self::ERROR_CONTENT_ERROR);
|
$this->error->throwError(self::ERROR_CONTENT_ERROR, '');
|
||||||
} catch (URLAnalyzerException $e) {
|
} catch (URLAnalyzerException $e) {
|
||||||
throw $e;
|
throw $e;
|
||||||
} catch (Exception $e) {
|
} catch (\Exception $e) {
|
||||||
// Map exceptions to error types
|
|
||||||
$message = $e->getMessage();
|
$message = $e->getMessage();
|
||||||
if (strpos($message, 'DNS') !== false) {
|
if (strpos($message, 'DNS') !== false) {
|
||||||
$this->throwError(self::ERROR_DNS_FAILURE);
|
$this->error->throwError(self::ERROR_DNS_FAILURE, '');
|
||||||
} elseif (strpos($message, 'CURL') !== false) {
|
} elseif (strpos($message, 'CURL') !== false) {
|
||||||
$this->throwError(self::ERROR_CONNECTION_ERROR);
|
$this->error->throwError(self::ERROR_CONNECTION_ERROR, '');
|
||||||
} elseif (strpos($message, 'HTTP') !== false) {
|
} elseif (strpos($message, 'HTTP') !== false) {
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
$this->error->throwError(self::ERROR_HTTP_ERROR, '');
|
||||||
} elseif (strpos($message, 'not found') !== false) {
|
} elseif (strpos($message, 'not found') !== false) {
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
$this->error->throwError(self::ERROR_NOT_FOUND, '');
|
||||||
} else {
|
} else {
|
||||||
$this->throwError(self::ERROR_GENERIC_ERROR, $message);
|
$this->error->throwError(self::ERROR_GENERIC_ERROR, (string)$message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/**
|
|
||||||
* Fetch content from URL
|
|
||||||
*/
|
|
||||||
private function fetchContent($url)
|
|
||||||
{
|
|
||||||
$curl = new Curl();
|
|
||||||
|
|
||||||
$host = parse_url($url, PHP_URL_HOST);
|
|
||||||
if (!$host) {
|
|
||||||
$this->throwError(self::ERROR_INVALID_URL);
|
|
||||||
}
|
|
||||||
$host = preg_replace('/^www\./', '', $host);
|
|
||||||
$domainRules = $this->getDomainRules($host);
|
|
||||||
|
|
||||||
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
|
||||||
$curl->setOpt(CURLOPT_MAXREDIRS, 2);
|
|
||||||
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
|
||||||
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
|
||||||
$curl->setOpt(CURLOPT_DNS_SERVERS, implode(',', $this->dnsServers));
|
|
||||||
$curl->setOpt(CURLOPT_ENCODING, '');
|
|
||||||
|
|
||||||
// Additional anti-detection headers
|
|
||||||
$curl->setHeaders([
|
|
||||||
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
||||||
'Accept-Language' => 'en-US,en;q=0.5',
|
|
||||||
'Cache-Control' => 'no-cache',
|
|
||||||
'Pragma' => 'no-cache',
|
|
||||||
'DNT' => '1'
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Set Google bot specific headers
|
|
||||||
if (isset($domainRules['fromGoogleBot'])) {
|
|
||||||
$curl->setUserAgent($this->getRandomUserAgent(true));
|
|
||||||
$curl->setHeaders([
|
|
||||||
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
|
|
||||||
'From' => 'googlebot(at)googlebot.com'
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add domain-specific headers
|
|
||||||
if (isset($domainRules['headers'])) {
|
|
||||||
$curl->setHeaders($domainRules['headers']);
|
|
||||||
}
|
|
||||||
|
|
||||||
$curl->get($url);
|
|
||||||
|
|
||||||
if ($curl->error) {
|
|
||||||
$errorMessage = $curl->errorMessage;
|
|
||||||
if (strpos($errorMessage, 'DNS') !== false) {
|
|
||||||
$this->throwError(self::ERROR_DNS_FAILURE);
|
|
||||||
} elseif (strpos($errorMessage, 'CURL') !== false) {
|
|
||||||
$this->throwError(self::ERROR_CONNECTION_ERROR);
|
|
||||||
} elseif ($curl->httpStatusCode === 404) {
|
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
|
||||||
} else {
|
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($curl->httpStatusCode !== 200 || empty($curl->response)) {
|
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $curl->response;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to get content from Wayback Machine
|
|
||||||
*/
|
|
||||||
private function fetchFromWaybackMachine($url)
|
|
||||||
{
|
|
||||||
$url = preg_replace('#^https?://#', '', $url);
|
|
||||||
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($url);
|
|
||||||
|
|
||||||
$curl = new Curl();
|
|
||||||
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
|
||||||
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
|
||||||
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
|
||||||
$curl->setUserAgent($this->getRandomUserAgent());
|
|
||||||
|
|
||||||
$curl->get($availabilityUrl);
|
|
||||||
|
|
||||||
if ($curl->error) {
|
|
||||||
if (strpos($curl->errorMessage, 'DNS') !== false) {
|
|
||||||
$this->throwError(self::ERROR_DNS_FAILURE);
|
|
||||||
} elseif (strpos($curl->errorMessage, 'CURL') !== false) {
|
|
||||||
$this->throwError(self::ERROR_CONNECTION_ERROR);
|
|
||||||
} else {
|
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$data = $curl->response;
|
|
||||||
if (!isset($data->archived_snapshots->closest->url)) {
|
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
|
||||||
}
|
|
||||||
|
|
||||||
$archiveUrl = $data->archived_snapshots->closest->url;
|
|
||||||
$curl = new Curl();
|
|
||||||
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
|
||||||
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
|
||||||
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
|
||||||
$curl->setUserAgent($this->getRandomUserAgent());
|
|
||||||
|
|
||||||
$curl->get($archiveUrl);
|
|
||||||
|
|
||||||
if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) {
|
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
$content = $curl->response;
|
|
||||||
|
|
||||||
// Remove Wayback Machine toolbar and cache URLs
|
|
||||||
$content = preg_replace('/<!-- BEGIN WAYBACK TOOLBAR INSERT -->.*?<!-- END WAYBACK TOOLBAR INSERT -->/s', '', $content);
|
|
||||||
$content = preg_replace('/https?:\/\/web\.archive\.org\/web\/\d+im_\//', '', $content);
|
|
||||||
|
|
||||||
return $content;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to get content using Selenium
|
|
||||||
*/
|
|
||||||
private function fetchFromSelenium($url, $browser = 'firefox')
|
|
||||||
{
|
|
||||||
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
|
|
||||||
|
|
||||||
if ($browser === 'chrome') {
|
|
||||||
$options = new ChromeOptions();
|
|
||||||
$options->addArguments([
|
|
||||||
'--headless',
|
|
||||||
'--disable-gpu',
|
|
||||||
'--no-sandbox',
|
|
||||||
'--disable-dev-shm-usage',
|
|
||||||
'--disable-images',
|
|
||||||
'--blink-settings=imagesEnabled=false'
|
|
||||||
]);
|
|
||||||
|
|
||||||
$capabilities = DesiredCapabilities::chrome();
|
|
||||||
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
|
|
||||||
} else {
|
|
||||||
$profile = new FirefoxProfile();
|
|
||||||
$profile->setPreference("permissions.default.image", 2);
|
|
||||||
$profile->setPreference("javascript.enabled", true);
|
|
||||||
$profile->setPreference("network.http.referer.defaultPolicy", 0);
|
|
||||||
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com");
|
|
||||||
$profile->setPreference("network.http.referer.spoofSource", true);
|
|
||||||
$profile->setPreference("network.http.referer.trimmingPolicy", 0);
|
|
||||||
|
|
||||||
$options = new FirefoxOptions();
|
|
||||||
$options->setProfile($profile);
|
|
||||||
|
|
||||||
$capabilities = DesiredCapabilities::firefox();
|
|
||||||
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $options);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
$driver = RemoteWebDriver::create($host, $capabilities);
|
|
||||||
$driver->manage()->timeouts()->pageLoadTimeout(10);
|
|
||||||
$driver->manage()->timeouts()->setScriptTimeout(5);
|
|
||||||
|
|
||||||
$driver->get($url);
|
|
||||||
|
|
||||||
$htmlSource = $driver->executeScript("return document.documentElement.outerHTML;");
|
|
||||||
|
|
||||||
$driver->quit();
|
|
||||||
|
|
||||||
if (empty($htmlSource)) {
|
|
||||||
$this->throwError(self::ERROR_CONTENT_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $htmlSource;
|
|
||||||
} catch (Exception $e) {
|
|
||||||
if (isset($driver)) {
|
|
||||||
$driver->quit();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map Selenium errors to appropriate error types
|
|
||||||
$message = $e->getMessage();
|
|
||||||
if (strpos($message, 'DNS') !== false) {
|
|
||||||
$this->throwError(self::ERROR_DNS_FAILURE);
|
|
||||||
} elseif (strpos($message, 'timeout') !== false) {
|
|
||||||
$this->throwError(self::ERROR_CONNECTION_ERROR);
|
|
||||||
} elseif (strpos($message, 'not found') !== false) {
|
|
||||||
$this->throwError(self::ERROR_NOT_FOUND);
|
|
||||||
} else {
|
|
||||||
$this->throwError(self::ERROR_HTTP_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get specific rules for a domain
|
|
||||||
*/
|
|
||||||
private function getDomainRules($domain)
|
|
||||||
{
|
|
||||||
return $this->rules->getDomainRules($domain);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Process HTML content applying domain rules
|
|
||||||
*/
|
|
||||||
private function processContent($content, $host, $url)
|
|
||||||
{
|
|
||||||
if (strlen($content) < 5120) {
|
|
||||||
$this->throwError(self::ERROR_CONTENT_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
$dom = new DOMDocument();
|
|
||||||
$dom->preserveWhiteSpace = true;
|
|
||||||
libxml_use_internal_errors(true);
|
|
||||||
@$dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
||||||
libxml_clear_errors();
|
|
||||||
|
|
||||||
$xpath = new DOMXPath($dom);
|
|
||||||
|
|
||||||
// Process canonical tags
|
|
||||||
$canonicalLinks = $xpath->query("//link[@rel='canonical']");
|
|
||||||
if ($canonicalLinks !== false) {
|
|
||||||
foreach ($canonicalLinks as $link) {
|
|
||||||
if ($link->parentNode) {
|
|
||||||
$link->parentNode->removeChild($link);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add new canonical tag
|
|
||||||
$head = $xpath->query('//head')->item(0);
|
|
||||||
if ($head) {
|
|
||||||
$newCanonical = $dom->createElement('link');
|
|
||||||
$newCanonical->setAttribute('rel', 'canonical');
|
|
||||||
$newCanonical->setAttribute('href', $url);
|
|
||||||
$head->appendChild($newCanonical);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fix relative URLs
|
|
||||||
$this->fixRelativeUrls($dom, $xpath, $url);
|
|
||||||
|
|
||||||
$domainRules = $this->getDomainRules($host);
|
|
||||||
|
|
||||||
// Apply domain rules
|
|
||||||
if (isset($domainRules['customStyle'])) {
|
|
||||||
$styleElement = $dom->createElement('style');
|
|
||||||
$styleElement->appendChild($dom->createTextNode($domainRules['customStyle']));
|
|
||||||
$dom->getElementsByTagName('head')[0]->appendChild($styleElement);
|
|
||||||
$this->activatedRules[] = 'customStyle';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['customCode'])) {
|
|
||||||
$scriptElement = $dom->createElement('script');
|
|
||||||
$scriptElement->setAttribute('type', 'text/javascript');
|
|
||||||
$scriptElement->appendChild($dom->createTextNode($domainRules['customCode']));
|
|
||||||
$dom->getElementsByTagName('body')[0]->appendChild($scriptElement);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove unwanted elements
|
|
||||||
$this->removeUnwantedElements($dom, $xpath, $domainRules);
|
|
||||||
|
|
||||||
// Clean inline styles
|
|
||||||
$this->cleanInlineStyles($xpath);
|
|
||||||
|
|
||||||
// Add Brand bar
|
|
||||||
$this->addBrandBar($dom, $xpath);
|
|
||||||
|
|
||||||
// Add Debug panel
|
|
||||||
$this->addDebugBar($dom, $xpath);
|
|
||||||
|
|
||||||
return $dom->saveHTML();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove unwanted elements based on domain rules
|
|
||||||
*/
|
|
||||||
private function removeUnwantedElements($dom, $xpath, $domainRules)
|
|
||||||
{
|
|
||||||
if (isset($domainRules['classAttrRemove'])) {
|
|
||||||
foreach ($domainRules['classAttrRemove'] as $class) {
|
|
||||||
$elements = $xpath->query("//*[contains(@class, '$class')]");
|
|
||||||
if ($elements !== false && $elements->length > 0) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
$this->removeClassNames($element, [$class]);
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "classAttrRemove: $class";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['removeElementsByTag'])) {
|
|
||||||
$tagsToRemove = $domainRules['removeElementsByTag'];
|
|
||||||
foreach ($tagsToRemove as $tag) {
|
|
||||||
$tagElements = $xpath->query("//$tag");
|
|
||||||
if ($tagElements !== false) {
|
|
||||||
foreach ($tagElements as $element) {
|
|
||||||
if ($element->parentNode) {
|
|
||||||
$element->parentNode->removeChild($element);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "removeElementsByTag: $tag";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['idElementRemove'])) {
|
|
||||||
foreach ($domainRules['idElementRemove'] as $id) {
|
|
||||||
$elements = $xpath->query("//*[@id='$id']");
|
|
||||||
if ($elements !== false && $elements->length > 0) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element->parentNode) {
|
|
||||||
$element->parentNode->removeChild($element);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "idElementRemove: $id";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['classElementRemove'])) {
|
|
||||||
foreach ($domainRules['classElementRemove'] as $class) {
|
|
||||||
$elements = $xpath->query("//*[contains(@class, '$class')]");
|
|
||||||
if ($elements !== false && $elements->length > 0) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element->parentNode) {
|
|
||||||
$element->parentNode->removeChild($element);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "classElementRemove: $class";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['scriptTagRemove'])) {
|
|
||||||
foreach ($domainRules['scriptTagRemove'] as $script) {
|
|
||||||
$scriptElements = $xpath->query("//script[contains(@src, '$script')] | //script[contains(text(), '$script')]");
|
|
||||||
if ($scriptElements !== false && $scriptElements->length > 0) {
|
|
||||||
foreach ($scriptElements as $element) {
|
|
||||||
if ($element->parentNode) {
|
|
||||||
$element->parentNode->removeChild($element);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "scriptTagRemove: $script";
|
|
||||||
}
|
|
||||||
|
|
||||||
$linkElements = $xpath->query("//link[@as='script' and contains(@href, '$script') and @type='application/javascript']");
|
|
||||||
if ($linkElements !== false && $linkElements->length > 0) {
|
|
||||||
foreach ($linkElements as $element) {
|
|
||||||
if ($element->parentNode) {
|
|
||||||
$element->parentNode->removeChild($element);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "scriptTagRemove: $script";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($domainRules['removeCustomAttr'])) {
|
|
||||||
foreach ($domainRules['removeCustomAttr'] as $attrPattern) {
|
|
||||||
if (strpos($attrPattern, '*') !== false) {
|
|
||||||
// For wildcard attributes (e.g. data-*)
|
|
||||||
$elements = $xpath->query('//*');
|
|
||||||
if ($elements !== false) {
|
|
||||||
$pattern = '/^' . str_replace('*', '.*', $attrPattern) . '$/';
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element->hasAttributes()) {
|
|
||||||
$attrs = [];
|
|
||||||
foreach ($element->attributes as $attr) {
|
|
||||||
if (preg_match($pattern, $attr->name)) {
|
|
||||||
$attrs[] = $attr->name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
foreach ($attrs as $attr) {
|
|
||||||
$element->removeAttribute($attr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "removeCustomAttr: $attrPattern";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// For non-wildcard attributes
|
|
||||||
$elements = $xpath->query("//*[@$attrPattern]");
|
|
||||||
if ($elements !== false && $elements->length > 0) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
$element->removeAttribute($attrPattern);
|
|
||||||
}
|
|
||||||
$this->activatedRules[] = "removeCustomAttr: $attrPattern";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean inline styles
|
|
||||||
*/
|
|
||||||
private function cleanInlineStyles($xpath)
|
|
||||||
{
|
|
||||||
$elements = $xpath->query("//*[@style]");
|
|
||||||
if ($elements !== false) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element instanceof DOMElement) {
|
|
||||||
$style = $element->getAttribute('style');
|
|
||||||
$style = preg_replace('/(max-height|height|overflow|position|display|visibility)\s*:\s*[^;]+;?/', '', $style);
|
|
||||||
$element->setAttribute('style', $style);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add Brand Bar in pages
|
|
||||||
*/
|
|
||||||
private function addBrandBar($dom, $xpath)
|
|
||||||
{
|
|
||||||
$body = $xpath->query('//body')->item(0);
|
|
||||||
if ($body) {
|
|
||||||
$brandDiv = $dom->createElement('div');
|
|
||||||
$brandDiv->setAttribute('style', 'z-index: 99999; position: fixed; top: 0; right: 1rem; background: rgba(37,99,235, 0.9); backdrop-filter: blur(8px); color: #fff; font-size: 13px; line-height: 1em; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); padding: 8px 12px; margin: 0px; overflow: hidden; border-bottom-left-radius: 8px; border-bottom-right-radius: 8px; font-family: Tahoma, sans-serif;');
|
|
||||||
$brandHtml = $dom->createDocumentFragment();
|
|
||||||
$brandHtml->appendXML('<a href="'.SITE_URL.'" style="color: #fff; text-decoration: none; font-weight: bold;" target="_blank">'.htmlspecialchars(SITE_DESCRIPTION).'</a>');
|
|
||||||
$brandDiv->appendChild($brandHtml);
|
|
||||||
$body->appendChild($brandDiv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add debug panel if LOG_LEVEL is DEBUG
|
|
||||||
*/
|
|
||||||
private function addDebugBar($dom, $xpath)
|
|
||||||
{
|
|
||||||
if (LOG_LEVEL === 'DEBUG') {
|
|
||||||
$body = $xpath->query('//body')->item(0);
|
|
||||||
if ($body) {
|
|
||||||
$debugDiv = $dom->createElement('div');
|
|
||||||
$debugDiv->setAttribute('style', 'position: fixed; bottom: 1rem; right: 1rem; max-width: 400px; padding: 1rem; background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(8px); border: 1px solid #e5e7eb; border-radius: 0.5rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); overflow: auto; max-height: 80vh; z-index: 9999; font-family: monospace; font-size: 13px; line-height: 1.4;');
|
|
||||||
|
|
||||||
if (empty($this->activatedRules)) {
|
|
||||||
$ruleElement = $dom->createElement('div');
|
|
||||||
$ruleElement->textContent = 'No rules activated / Nenhuma regra ativada';
|
|
||||||
$debugDiv->appendChild($ruleElement);
|
|
||||||
} else {
|
|
||||||
foreach ($this->activatedRules as $rule) {
|
|
||||||
$ruleElement = $dom->createElement('div');
|
|
||||||
$ruleElement->textContent = $rule;
|
|
||||||
$debugDiv->appendChild($ruleElement);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$body->appendChild($debugDiv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove specific classes from an element
|
|
||||||
*/
|
|
||||||
private function removeClassNames($element, $classesToRemove)
|
|
||||||
{
|
|
||||||
if (!$element->hasAttribute('class')) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$classes = explode(' ', $element->getAttribute('class'));
|
|
||||||
$newClasses = array_filter($classes, function ($class) use ($classesToRemove) {
|
|
||||||
return !in_array(trim($class), $classesToRemove);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (empty($newClasses)) {
|
|
||||||
$element->removeAttribute('class');
|
|
||||||
} else {
|
|
||||||
$element->setAttribute('class', implode(' ', $newClasses));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fix relative URLs in a DOM document
|
|
||||||
*/
|
|
||||||
private function fixRelativeUrls($dom, $xpath, $baseUrl)
|
|
||||||
{
|
|
||||||
$parsedBase = parse_url($baseUrl);
|
|
||||||
$baseHost = $parsedBase['scheme'] . '://' . $parsedBase['host'];
|
|
||||||
|
|
||||||
$elements = $xpath->query("//*[@src]");
|
|
||||||
if ($elements !== false) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element instanceof DOMElement) {
|
|
||||||
$src = $element->getAttribute('src');
|
|
||||||
if (strpos($src, 'base64') !== false) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
|
|
||||||
$src = ltrim($src, '/');
|
|
||||||
$element->setAttribute('src', $baseHost . '/' . $src);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$elements = $xpath->query("//*[@href]");
|
|
||||||
if ($elements !== false) {
|
|
||||||
foreach ($elements as $element) {
|
|
||||||
if ($element instanceof DOMElement) {
|
|
||||||
$href = $element->getAttribute('href');
|
|
||||||
if (strpos($href, 'mailto:') === 0 ||
|
|
||||||
strpos($href, 'tel:') === 0 ||
|
|
||||||
strpos($href, 'javascript:') === 0 ||
|
|
||||||
strpos($href, '#') === 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (strpos($href, 'http') !== 0 && strpos($href, '//') !== 0) {
|
|
||||||
$href = ltrim($href, '/');
|
|
||||||
$element->setAttribute('href', $baseHost . '/' . $href);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
82
app/inc/URLAnalyzer/URLAnalyzerBase.php
Normal file
82
app/inc/URLAnalyzer/URLAnalyzerBase.php
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
use Inc\Rules;
|
||||||
|
use Inc\Cache;
|
||||||
|
use Inc\Logger;
|
||||||
|
use Inc\Language;
|
||||||
|
use Curl\Curl;
|
||||||
|
use Facebook\WebDriver\Remote\DesiredCapabilities;
|
||||||
|
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
||||||
|
use Facebook\WebDriver\Firefox\FirefoxOptions;
|
||||||
|
use Facebook\WebDriver\Firefox\FirefoxProfile;
|
||||||
|
use Facebook\WebDriver\Chrome\ChromeOptions;
|
||||||
|
|
||||||
|
class URLAnalyzerBase
|
||||||
|
{
|
||||||
|
// Error type constants
|
||||||
|
const ERROR_INVALID_URL = 'INVALID_URL';
|
||||||
|
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
|
||||||
|
const ERROR_NOT_FOUND = 'NOT_FOUND';
|
||||||
|
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
|
||||||
|
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
|
||||||
|
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
|
||||||
|
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
|
||||||
|
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
|
||||||
|
|
||||||
|
// Error mapping
|
||||||
|
protected $errorMap = [
|
||||||
|
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
|
||||||
|
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
|
||||||
|
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
|
||||||
|
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
|
||||||
|
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
|
||||||
|
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
|
||||||
|
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
|
||||||
|
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
|
||||||
|
];
|
||||||
|
|
||||||
|
protected $userAgents = [
|
||||||
|
'Googlebot-News',
|
||||||
|
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||||
|
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
|
||||||
|
];
|
||||||
|
|
||||||
|
protected $socialReferrers = [
|
||||||
|
'https://t.co/',
|
||||||
|
'https://www.twitter.com/',
|
||||||
|
'https://www.facebook.com/',
|
||||||
|
'https://www.linkedin.com/'
|
||||||
|
];
|
||||||
|
|
||||||
|
protected $dnsServers;
|
||||||
|
protected $rules;
|
||||||
|
protected $cache;
|
||||||
|
protected $activatedRules = [];
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$this->dnsServers = explode(',', DNS_SERVERS);
|
||||||
|
$this->rules = new Rules();
|
||||||
|
$this->cache = new Cache();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getRandomUserAgent($preferGoogleBot = false)
|
||||||
|
{
|
||||||
|
if ($preferGoogleBot && rand(0, 100) < 70) {
|
||||||
|
return $this->userAgents[array_rand($this->userAgents)];
|
||||||
|
}
|
||||||
|
return $this->userAgents[array_rand($this->userAgents)];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getRandomSocialReferrer()
|
||||||
|
{
|
||||||
|
return $this->socialReferrers[array_rand($this->socialReferrers)];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getDomainRules($domain)
|
||||||
|
{
|
||||||
|
return $this->rules->getDomainRules($domain);
|
||||||
|
}
|
||||||
|
}
|
18
app/inc/URLAnalyzer/URLAnalyzerError.php
Normal file
18
app/inc/URLAnalyzer/URLAnalyzerError.php
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
use Inc\Language;
|
||||||
|
|
||||||
|
class URLAnalyzerError extends URLAnalyzerBase
|
||||||
|
{
|
||||||
|
public function throwError($errorType, $additionalInfo = '')
|
||||||
|
{
|
||||||
|
$errorConfig = $this->errorMap[$errorType];
|
||||||
|
$message = Language::getMessage($errorConfig['message_key'])['message'];
|
||||||
|
if ($additionalInfo) {
|
||||||
|
$message .= ': ' . $additionalInfo;
|
||||||
|
}
|
||||||
|
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
|
||||||
|
}
|
||||||
|
}
|
26
app/inc/URLAnalyzer/URLAnalyzerException.php
Normal file
26
app/inc/URLAnalyzer/URLAnalyzerException.php
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
class URLAnalyzerException extends \Exception
|
||||||
|
{
|
||||||
|
private $errorType;
|
||||||
|
private $additionalInfo;
|
||||||
|
|
||||||
|
public function __construct($message, $code, $errorType, $additionalInfo = '')
|
||||||
|
{
|
||||||
|
parent::__construct($message, $code);
|
||||||
|
$this->errorType = $errorType;
|
||||||
|
$this->additionalInfo = $additionalInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getErrorType()
|
||||||
|
{
|
||||||
|
return $this->errorType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAdditionalInfo()
|
||||||
|
{
|
||||||
|
return $this->additionalInfo;
|
||||||
|
}
|
||||||
|
}
|
197
app/inc/URLAnalyzer/URLAnalyzerFetch.php
Normal file
197
app/inc/URLAnalyzer/URLAnalyzerFetch.php
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
use Curl\Curl;
|
||||||
|
use Facebook\WebDriver\Remote\DesiredCapabilities;
|
||||||
|
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
||||||
|
use Facebook\WebDriver\Firefox\FirefoxOptions;
|
||||||
|
use Facebook\WebDriver\Firefox\FirefoxProfile;
|
||||||
|
use Facebook\WebDriver\Chrome\ChromeOptions;
|
||||||
|
|
||||||
|
class URLAnalyzerFetch extends URLAnalyzerBase
|
||||||
|
{
|
||||||
|
private $error;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
parent::__construct();
|
||||||
|
$this->error = new URLAnalyzerError();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fetchContent($url)
|
||||||
|
{
|
||||||
|
$curl = new Curl();
|
||||||
|
|
||||||
|
$host = parse_url($url, PHP_URL_HOST);
|
||||||
|
if (!$host) {
|
||||||
|
$this->error->throwError(self::ERROR_INVALID_URL);
|
||||||
|
}
|
||||||
|
$host = preg_replace('/^www\./', '', $host);
|
||||||
|
$domainRules = $this->getDomainRules($host);
|
||||||
|
|
||||||
|
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
$curl->setOpt(CURLOPT_MAXREDIRS, 2);
|
||||||
|
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
||||||
|
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
||||||
|
$curl->setOpt(CURLOPT_DNS_SERVERS, implode(',', $this->dnsServers));
|
||||||
|
$curl->setOpt(CURLOPT_ENCODING, '');
|
||||||
|
|
||||||
|
$curl->setHeaders([
|
||||||
|
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language' => 'en-US,en;q=0.5',
|
||||||
|
'Cache-Control' => 'no-cache',
|
||||||
|
'Pragma' => 'no-cache',
|
||||||
|
'DNT' => '1'
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (isset($domainRules['fromGoogleBot'])) {
|
||||||
|
$curl->setUserAgent($this->getRandomUserAgent(true));
|
||||||
|
$curl->setHeaders([
|
||||||
|
'X-Forwarded-For' => '66.249.' . rand(64, 95) . '.' . rand(1, 254),
|
||||||
|
'From' => 'googlebot(at)googlebot.com'
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['headers'])) {
|
||||||
|
$curl->setHeaders($domainRules['headers']);
|
||||||
|
}
|
||||||
|
|
||||||
|
$curl->get($url);
|
||||||
|
|
||||||
|
if ($curl->error) {
|
||||||
|
$errorMessage = $curl->errorMessage;
|
||||||
|
if (strpos($errorMessage, 'DNS') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_DNS_FAILURE);
|
||||||
|
} elseif (strpos($errorMessage, 'CURL') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_CONNECTION_ERROR);
|
||||||
|
} elseif ($curl->httpStatusCode === 404) {
|
||||||
|
$this->error->throwError(self::ERROR_NOT_FOUND);
|
||||||
|
} else {
|
||||||
|
$this->error->throwError(self::ERROR_HTTP_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($curl->httpStatusCode !== 200 || empty($curl->response)) {
|
||||||
|
$this->error->throwError(self::ERROR_HTTP_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $curl->response;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fetchFromWaybackMachine($url)
|
||||||
|
{
|
||||||
|
$url = preg_replace('#^https?://#', '', $url);
|
||||||
|
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($url);
|
||||||
|
|
||||||
|
$curl = new Curl();
|
||||||
|
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
||||||
|
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
||||||
|
$curl->setUserAgent($this->getRandomUserAgent());
|
||||||
|
|
||||||
|
$curl->get($availabilityUrl);
|
||||||
|
|
||||||
|
if ($curl->error) {
|
||||||
|
if (strpos($curl->errorMessage, 'DNS') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_DNS_FAILURE);
|
||||||
|
} elseif (strpos($curl->errorMessage, 'CURL') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_CONNECTION_ERROR);
|
||||||
|
} else {
|
||||||
|
$this->error->throwError(self::ERROR_HTTP_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$data = $curl->response;
|
||||||
|
if (!isset($data->archived_snapshots->closest->url)) {
|
||||||
|
$this->error->throwError(self::ERROR_NOT_FOUND);
|
||||||
|
}
|
||||||
|
|
||||||
|
$archiveUrl = $data->archived_snapshots->closest->url;
|
||||||
|
$curl = new Curl();
|
||||||
|
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
$curl->setOpt(CURLOPT_TIMEOUT, 10);
|
||||||
|
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
||||||
|
$curl->setUserAgent($this->getRandomUserAgent());
|
||||||
|
|
||||||
|
$curl->get($archiveUrl);
|
||||||
|
|
||||||
|
if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) {
|
||||||
|
$this->error->throwError(self::ERROR_HTTP_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
$content = $curl->response;
|
||||||
|
|
||||||
|
$content = preg_replace('/<!-- BEGIN WAYBACK TOOLBAR INSERT -->.*?<!-- END WAYBACK TOOLBAR INSERT -->/s', '', $content);
|
||||||
|
$content = preg_replace('/https?:\/\/web\.archive\.org\/web\/\d+im_\//', '', $content);
|
||||||
|
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function fetchFromSelenium($url, $browser = 'firefox')
|
||||||
|
{
|
||||||
|
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
|
||||||
|
|
||||||
|
if ($browser === 'chrome') {
|
||||||
|
$options = new ChromeOptions();
|
||||||
|
$options->addArguments([
|
||||||
|
'--headless',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-images',
|
||||||
|
'--blink-settings=imagesEnabled=false'
|
||||||
|
]);
|
||||||
|
|
||||||
|
$capabilities = DesiredCapabilities::chrome();
|
||||||
|
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
|
||||||
|
} else {
|
||||||
|
$profile = new FirefoxProfile();
|
||||||
|
$profile->setPreference("permissions.default.image", 2);
|
||||||
|
$profile->setPreference("javascript.enabled", true);
|
||||||
|
$profile->setPreference("network.http.referer.defaultPolicy", 0);
|
||||||
|
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com");
|
||||||
|
$profile->setPreference("network.http.referer.spoofSource", true);
|
||||||
|
$profile->setPreference("network.http.referer.trimmingPolicy", 0);
|
||||||
|
|
||||||
|
$options = new FirefoxOptions();
|
||||||
|
$options->setProfile($profile);
|
||||||
|
|
||||||
|
$capabilities = DesiredCapabilities::firefox();
|
||||||
|
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $options);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$driver = RemoteWebDriver::create($host, $capabilities);
|
||||||
|
$driver->manage()->timeouts()->pageLoadTimeout(10);
|
||||||
|
$driver->manage()->timeouts()->setScriptTimeout(5);
|
||||||
|
|
||||||
|
$driver->get($url);
|
||||||
|
|
||||||
|
$htmlSource = $driver->executeScript("return document.documentElement.outerHTML;");
|
||||||
|
|
||||||
|
$driver->quit();
|
||||||
|
|
||||||
|
if (empty($htmlSource)) {
|
||||||
|
$this->error->throwError(self::ERROR_CONTENT_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $htmlSource;
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
if (isset($driver)) {
|
||||||
|
$driver->quit();
|
||||||
|
}
|
||||||
|
|
||||||
|
$message = $e->getMessage();
|
||||||
|
if (strpos($message, 'DNS') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_DNS_FAILURE);
|
||||||
|
} elseif (strpos($message, 'timeout') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_CONNECTION_ERROR);
|
||||||
|
} elseif (strpos($message, 'not found') !== false) {
|
||||||
|
$this->error->throwError(self::ERROR_NOT_FOUND);
|
||||||
|
} else {
|
||||||
|
$this->error->throwError(self::ERROR_HTTP_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
314
app/inc/URLAnalyzer/URLAnalyzerProcess.php
Normal file
314
app/inc/URLAnalyzer/URLAnalyzerProcess.php
Normal file
|
@ -0,0 +1,314 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
use DOMDocument;
|
||||||
|
use DOMXPath;
|
||||||
|
use DOMElement;
|
||||||
|
|
||||||
|
class URLAnalyzerProcess extends URLAnalyzerBase
|
||||||
|
{
|
||||||
|
private $error;
|
||||||
|
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
parent::__construct();
|
||||||
|
$this->error = new URLAnalyzerError();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function createDOM($content) {
|
||||||
|
$dom = new DOMDocument();
|
||||||
|
$dom->preserveWhiteSpace = true;
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
@$dom->loadHTML(mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
||||||
|
libxml_clear_errors();
|
||||||
|
return $dom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function processContent($content, $host, $url)
|
||||||
|
{
|
||||||
|
if (strlen($content) < 5120) {
|
||||||
|
$this->error->throwError(self::ERROR_CONTENT_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
$dom = $this->createDOM($content);
|
||||||
|
$xpath = new DOMXPath($dom);
|
||||||
|
|
||||||
|
// Process all modifications in real-time
|
||||||
|
$this->processCanonicalLinks($dom, $xpath, $url);
|
||||||
|
$this->fixRelativeUrls($dom, $xpath, $url);
|
||||||
|
$this->applyDomainRules($dom, $xpath, $host);
|
||||||
|
$this->cleanInlineStyles($xpath);
|
||||||
|
$this->addBrandBar($dom, $xpath);
|
||||||
|
$this->addDebugBar($dom, $xpath);
|
||||||
|
|
||||||
|
return $dom->saveHTML();
|
||||||
|
}
|
||||||
|
|
||||||
|
private function processCanonicalLinks($dom, $xpath, $url)
|
||||||
|
{
|
||||||
|
$canonicalLinks = $xpath->query("//link[@rel='canonical']");
|
||||||
|
if ($canonicalLinks !== false) {
|
||||||
|
foreach ($canonicalLinks as $link) {
|
||||||
|
if ($link->parentNode) {
|
||||||
|
$link->parentNode->removeChild($link);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$head = $xpath->query('//head')->item(0);
|
||||||
|
if ($head) {
|
||||||
|
$newCanonical = $dom->createElement('link');
|
||||||
|
$newCanonical->setAttribute('rel', 'canonical');
|
||||||
|
$newCanonical->setAttribute('href', $url);
|
||||||
|
$head->appendChild($newCanonical);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function applyDomainRules($dom, $xpath, $host)
|
||||||
|
{
|
||||||
|
$domainRules = $this->getDomainRules($host);
|
||||||
|
|
||||||
|
if (isset($domainRules['customStyle'])) {
|
||||||
|
$styleElement = $dom->createElement('style');
|
||||||
|
$styleElement->appendChild($dom->createTextNode($domainRules['customStyle']));
|
||||||
|
$dom->getElementsByTagName('head')[0]->appendChild($styleElement);
|
||||||
|
$this->activatedRules[] = 'customStyle';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['customCode'])) {
|
||||||
|
$scriptElement = $dom->createElement('script');
|
||||||
|
$scriptElement->setAttribute('type', 'text/javascript');
|
||||||
|
$scriptElement->appendChild($dom->createTextNode($domainRules['customCode']));
|
||||||
|
$dom->getElementsByTagName('body')[0]->appendChild($scriptElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->removeUnwantedElements($dom, $xpath, $domainRules);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function removeUnwantedElements($dom, $xpath, $domainRules)
|
||||||
|
{
|
||||||
|
if (isset($domainRules['classAttrRemove'])) {
|
||||||
|
foreach ($domainRules['classAttrRemove'] as $class) {
|
||||||
|
$elements = $xpath->query("//*[contains(@class, '$class')]");
|
||||||
|
if ($elements !== false && $elements->length > 0) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
$this->removeClassNames($element, [$class]);
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "classAttrRemove: $class";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['removeElementsByTag'])) {
|
||||||
|
$tagsToRemove = $domainRules['removeElementsByTag'];
|
||||||
|
foreach ($tagsToRemove as $tag) {
|
||||||
|
$tagElements = $xpath->query("//$tag");
|
||||||
|
if ($tagElements !== false) {
|
||||||
|
foreach ($tagElements as $element) {
|
||||||
|
if ($element->parentNode) {
|
||||||
|
$element->parentNode->removeChild($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "removeElementsByTag: $tag";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['idElementRemove'])) {
|
||||||
|
foreach ($domainRules['idElementRemove'] as $id) {
|
||||||
|
$elements = $xpath->query("//*[@id='$id']");
|
||||||
|
if ($elements !== false && $elements->length > 0) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element->parentNode) {
|
||||||
|
$element->parentNode->removeChild($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "idElementRemove: $id";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['classElementRemove'])) {
|
||||||
|
foreach ($domainRules['classElementRemove'] as $class) {
|
||||||
|
$elements = $xpath->query("//*[contains(@class, '$class')]");
|
||||||
|
if ($elements !== false && $elements->length > 0) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element->parentNode) {
|
||||||
|
$element->parentNode->removeChild($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "classElementRemove: $class";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['scriptTagRemove'])) {
|
||||||
|
foreach ($domainRules['scriptTagRemove'] as $script) {
|
||||||
|
$scriptElements = $xpath->query("//script[contains(@src, '$script')] | //script[contains(text(), '$script')]");
|
||||||
|
if ($scriptElements !== false && $scriptElements->length > 0) {
|
||||||
|
foreach ($scriptElements as $element) {
|
||||||
|
if ($element->parentNode) {
|
||||||
|
$element->parentNode->removeChild($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "scriptTagRemove: $script";
|
||||||
|
}
|
||||||
|
|
||||||
|
$linkElements = $xpath->query("//link[@as='script' and contains(@href, '$script') and @type='application/javascript']");
|
||||||
|
if ($linkElements !== false && $linkElements->length > 0) {
|
||||||
|
foreach ($linkElements as $element) {
|
||||||
|
if ($element->parentNode) {
|
||||||
|
$element->parentNode->removeChild($element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "scriptTagRemove: $script";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($domainRules['removeCustomAttr'])) {
|
||||||
|
foreach ($domainRules['removeCustomAttr'] as $attrPattern) {
|
||||||
|
if (strpos($attrPattern, '*') !== false) {
|
||||||
|
$elements = $xpath->query('//*');
|
||||||
|
if ($elements !== false) {
|
||||||
|
$pattern = '/^' . str_replace('*', '.*', $attrPattern) . '$/';
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element->hasAttributes()) {
|
||||||
|
$attrs = [];
|
||||||
|
foreach ($element->attributes as $attr) {
|
||||||
|
if (preg_match($pattern, $attr->name)) {
|
||||||
|
$attrs[] = $attr->name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
foreach ($attrs as $attr) {
|
||||||
|
$element->removeAttribute($attr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "removeCustomAttr: $attrPattern";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$elements = $xpath->query("//*[@$attrPattern]");
|
||||||
|
if ($elements !== false && $elements->length > 0) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
$element->removeAttribute($attrPattern);
|
||||||
|
}
|
||||||
|
$this->activatedRules[] = "removeCustomAttr: $attrPattern";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function cleanInlineStyles($xpath)
|
||||||
|
{
|
||||||
|
$elements = $xpath->query("//*[@style]");
|
||||||
|
if ($elements !== false) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element instanceof DOMElement) {
|
||||||
|
$style = $element->getAttribute('style');
|
||||||
|
$style = preg_replace('/(max-height|height|overflow|position|display|visibility)\s*:\s*[^;]+;?/', '', $style);
|
||||||
|
$element->setAttribute('style', $style);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function addBrandBar($dom, $xpath)
|
||||||
|
{
|
||||||
|
$body = $xpath->query('//body')->item(0);
|
||||||
|
if ($body) {
|
||||||
|
$brandDiv = $dom->createElement('div');
|
||||||
|
$brandDiv->setAttribute('style', 'z-index: 99999; position: fixed; top: 0; right: 1rem; background: rgba(37,99,235, 0.9); backdrop-filter: blur(8px); color: #fff; font-size: 13px; line-height: 1em; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); padding: 8px 12px; margin: 0px; overflow: hidden; border-bottom-left-radius: 8px; border-bottom-right-radius: 8px; font-family: Tahoma, sans-serif;');
|
||||||
|
$brandHtml = $dom->createDocumentFragment();
|
||||||
|
$brandHtml->appendXML('<a href="'.SITE_URL.'" style="color: #fff; text-decoration: none; font-weight: bold;" target="_blank">'.htmlspecialchars(SITE_DESCRIPTION).'</a>');
|
||||||
|
$brandDiv->appendChild($brandHtml);
|
||||||
|
$body->appendChild($brandDiv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function addDebugBar($dom, $xpath)
|
||||||
|
{
|
||||||
|
if (defined('LOG_LEVEL') && LOG_LEVEL === 'DEBUG') {
|
||||||
|
$body = $xpath->query('//body')->item(0);
|
||||||
|
if ($body) {
|
||||||
|
$debugDiv = $dom->createElement('div');
|
||||||
|
$debugDiv->setAttribute('style', 'position: fixed; bottom: 1rem; right: 1rem; max-width: 400px; padding: 1rem; background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(8px); border: 1px solid #e5e7eb; border-radius: 0.5rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); overflow: auto; max-height: 80vh; z-index: 9999; font-family: monospace; font-size: 13px; line-height: 1.4;');
|
||||||
|
|
||||||
|
if (empty($this->activatedRules)) {
|
||||||
|
$ruleElement = $dom->createElement('div');
|
||||||
|
$ruleElement->textContent = 'No rules activated / Nenhuma regra ativada';
|
||||||
|
$debugDiv->appendChild($ruleElement);
|
||||||
|
} else {
|
||||||
|
foreach ($this->activatedRules as $rule) {
|
||||||
|
$ruleElement = $dom->createElement('div');
|
||||||
|
$ruleElement->textContent = $rule;
|
||||||
|
$debugDiv->appendChild($ruleElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$body->appendChild($debugDiv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function removeClassNames($element, $classesToRemove)
|
||||||
|
{
|
||||||
|
if (!$element->hasAttribute('class')) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$classes = explode(' ', $element->getAttribute('class'));
|
||||||
|
$newClasses = array_filter($classes, function ($class) use ($classesToRemove) {
|
||||||
|
return !in_array(trim($class), $classesToRemove);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (empty($newClasses)) {
|
||||||
|
$element->removeAttribute('class');
|
||||||
|
} else {
|
||||||
|
$element->setAttribute('class', implode(' ', $newClasses));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function fixRelativeUrls($dom, $xpath, $baseUrl)
|
||||||
|
{
|
||||||
|
$parsedBase = parse_url($baseUrl);
|
||||||
|
$baseHost = $parsedBase['scheme'] . '://' . $parsedBase['host'];
|
||||||
|
|
||||||
|
$elements = $xpath->query("//*[@src]");
|
||||||
|
if ($elements !== false) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element instanceof DOMElement) {
|
||||||
|
$src = $element->getAttribute('src');
|
||||||
|
if (strpos($src, 'base64') !== false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
|
||||||
|
$src = ltrim($src, '/');
|
||||||
|
$element->setAttribute('src', $baseHost . '/' . $src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$elements = $xpath->query("//*[@href]");
|
||||||
|
if ($elements !== false) {
|
||||||
|
foreach ($elements as $element) {
|
||||||
|
if ($element instanceof DOMElement) {
|
||||||
|
$href = $element->getAttribute('href');
|
||||||
|
if (strpos($href, 'mailto:') === 0 ||
|
||||||
|
strpos($href, 'tel:') === 0 ||
|
||||||
|
strpos($href, 'javascript:') === 0 ||
|
||||||
|
strpos($href, '#') === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (strpos($href, 'http') !== 0 && strpos($href, '//') !== 0) {
|
||||||
|
$href = ltrim($href, '/');
|
||||||
|
$element->setAttribute('href', $baseHost . '/' . $href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
33
app/inc/URLAnalyzer/URLAnalyzerUtils.php
Normal file
33
app/inc/URLAnalyzer/URLAnalyzerUtils.php
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Inc\URLAnalyzer;
|
||||||
|
|
||||||
|
use Curl\Curl;
|
||||||
|
|
||||||
|
class URLAnalyzerUtils extends URLAnalyzerBase
|
||||||
|
{
|
||||||
|
public function checkStatus($url)
|
||||||
|
{
|
||||||
|
$curl = new Curl();
|
||||||
|
$curl->setFollowLocation();
|
||||||
|
$curl->setOpt(CURLOPT_TIMEOUT, 5);
|
||||||
|
$curl->setOpt(CURLOPT_SSL_VERIFYPEER, false);
|
||||||
|
$curl->setOpt(CURLOPT_NOBODY, true);
|
||||||
|
$curl->setUserAgent($this->getRandomUserAgent());
|
||||||
|
$curl->get($url);
|
||||||
|
|
||||||
|
if ($curl->error) {
|
||||||
|
return [
|
||||||
|
'finalUrl' => $url,
|
||||||
|
'hasRedirect' => false,
|
||||||
|
'httpCode' => $curl->httpStatusCode
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'finalUrl' => $curl->effectiveUrl,
|
||||||
|
'hasRedirect' => ($curl->effectiveUrl !== $url),
|
||||||
|
'httpCode' => $curl->httpStatusCode
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
|
@ -29,7 +29,7 @@ class Router
|
||||||
require_once __DIR__ . '/../inc/Cache.php';
|
require_once __DIR__ . '/../inc/Cache.php';
|
||||||
require_once __DIR__ . '/../inc/Language.php';
|
require_once __DIR__ . '/../inc/Language.php';
|
||||||
|
|
||||||
\Language::init(LANGUAGE);
|
\Inc\Language::init(LANGUAGE);
|
||||||
|
|
||||||
$message = '';
|
$message = '';
|
||||||
$message_type = '';
|
$message_type = '';
|
||||||
|
@ -38,7 +38,7 @@ class Router
|
||||||
// Sanitize and process query string messages
|
// Sanitize and process query string messages
|
||||||
if (isset($_GET['message'])) {
|
if (isset($_GET['message'])) {
|
||||||
$message_key = htmlspecialchars(trim($_GET['message']), ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
$message_key = htmlspecialchars(trim($_GET['message']), ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
$messageData = \Language::getMessage($message_key);
|
$messageData = \Inc\Language::getMessage($message_key);
|
||||||
$message = htmlspecialchars($messageData['message'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
$message = htmlspecialchars($messageData['message'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
$message_type = htmlspecialchars($messageData['type'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
$message_type = htmlspecialchars($messageData['type'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
||||||
}
|
}
|
||||||
|
@ -50,14 +50,14 @@ class Router
|
||||||
header('Location: ' . SITE_URL . '/p/' . $url);
|
header('Location: ' . SITE_URL . '/p/' . $url);
|
||||||
exit;
|
exit;
|
||||||
} else {
|
} else {
|
||||||
$messageData = \Language::getMessage('INVALID_URL');
|
$messageData = \Inc\Language::getMessage('INVALID_URL');
|
||||||
$message = $messageData['message'];
|
$message = $messageData['message'];
|
||||||
$message_type = $messageData['type'];
|
$message_type = $messageData['type'];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize cache for counting
|
// Initialize cache for counting
|
||||||
$cache = new \Cache();
|
$cache = new \Inc\Cache();
|
||||||
$cache_folder = $cache->getCacheFileCount();
|
$cache_folder = $cache->getCacheFileCount();
|
||||||
|
|
||||||
require __DIR__ . '/views/home.php';
|
require __DIR__ . '/views/home.php';
|
||||||
|
@ -201,4 +201,4 @@ class Router
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
namespace App;
|
namespace App;
|
||||||
|
|
||||||
|
use Inc\Language;
|
||||||
|
use Inc\URLAnalyzer;
|
||||||
|
use Inc\URLAnalyzer\URLAnalyzerException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* URL Processor
|
* URL Processor
|
||||||
* Combines functionality for URL processing, handling both web and API responses
|
* Combines functionality for URL processing, handling both web and API responses
|
||||||
|
@ -20,15 +24,13 @@ class URLProcessor
|
||||||
public function __construct(string $url = '', bool $isApi = false)
|
public function __construct(string $url = '', bool $isApi = false)
|
||||||
{
|
{
|
||||||
require_once __DIR__ . '/../config.php';
|
require_once __DIR__ . '/../config.php';
|
||||||
require_once __DIR__ . '/../inc/URLAnalyzer.php';
|
|
||||||
require_once __DIR__ . '/../inc/Language.php';
|
|
||||||
|
|
||||||
$this->url = $url;
|
$this->url = $url;
|
||||||
$this->isApi = $isApi;
|
$this->isApi = $isApi;
|
||||||
$this->analyzer = new \URLAnalyzer();
|
$this->analyzer = new URLAnalyzer();
|
||||||
|
|
||||||
if ($isApi) {
|
if ($isApi) {
|
||||||
\Language::init(LANGUAGE);
|
Language::init(LANGUAGE);
|
||||||
header('Content-Type: application/json');
|
header('Content-Type: application/json');
|
||||||
header('Access-Control-Allow-Origin: *');
|
header('Access-Control-Allow-Origin: *');
|
||||||
header('Access-Control-Allow-Methods: GET');
|
header('Access-Control-Allow-Methods: GET');
|
||||||
|
@ -87,7 +89,7 @@ class URLProcessor
|
||||||
} else {
|
} else {
|
||||||
echo $content;
|
echo $content;
|
||||||
}
|
}
|
||||||
} catch (\URLAnalyzerException $e) {
|
} catch (URLAnalyzerException $e) {
|
||||||
$errorType = $e->getErrorType();
|
$errorType = $e->getErrorType();
|
||||||
$additionalInfo = $e->getAdditionalInfo();
|
$additionalInfo = $e->getAdditionalInfo();
|
||||||
|
|
||||||
|
@ -105,7 +107,7 @@ class URLProcessor
|
||||||
]
|
]
|
||||||
], $e->getCode());
|
], $e->getCode());
|
||||||
} else {
|
} else {
|
||||||
if ($errorType === \URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
|
if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
|
||||||
$this->redirect(trim($additionalInfo), $errorType);
|
$this->redirect(trim($additionalInfo), $errorType);
|
||||||
}
|
}
|
||||||
$this->redirect(SITE_URL, $errorType);
|
$this->redirect(SITE_URL, $errorType);
|
||||||
|
@ -114,13 +116,13 @@ class URLProcessor
|
||||||
if ($this->isApi) {
|
if ($this->isApi) {
|
||||||
$this->sendApiResponse([
|
$this->sendApiResponse([
|
||||||
'error' => [
|
'error' => [
|
||||||
'type' => \URLAnalyzer::ERROR_GENERIC_ERROR,
|
'type' => URLAnalyzer::ERROR_GENERIC_ERROR,
|
||||||
'message' => \Language::getMessage('GENERIC_ERROR')['message']
|
'message' => Language::getMessage('GENERIC_ERROR')['message']
|
||||||
]
|
]
|
||||||
], 500);
|
], 500);
|
||||||
} else {
|
} else {
|
||||||
$this->redirect(SITE_URL, \URLAnalyzer::ERROR_GENERIC_ERROR);
|
$this->redirect(SITE_URL, URLAnalyzer::ERROR_GENERIC_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="<?php echo Language::getCurrentLanguage(); ?>">
|
<html lang="<?php echo \Inc\Language::getCurrentLanguage(); ?>">
|
||||||
|
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
@ -36,7 +36,7 @@
|
||||||
<a target="_blank" href="https://github.com/manualdousuario/marreta/wiki/API-Rest">API Rest</a>
|
<a target="_blank" href="https://github.com/manualdousuario/marreta/wiki/API-Rest">API Rest</a>
|
||||||
<a target="_blank" href="https://github.com/manualdousuario/marreta/">Github</a>
|
<a target="_blank" href="https://github.com/manualdousuario/marreta/">Github</a>
|
||||||
<div class="integration">
|
<div class="integration">
|
||||||
<button class="integration__toggle"><?php echo Language::get('nav_integration'); ?><span class="arrow"></span></button>
|
<button class="integration__toggle"><?php echo \Inc\Language::get('nav_integration'); ?><span class="arrow"></span></button>
|
||||||
<div class="integration__menu">
|
<div class="integration__menu">
|
||||||
<a target="_blank" href="https://bsky.app/profile/marreta.pcdomanual.com"><span class="name">Bsky</span><span class="icon icon--bsky"></span></a>
|
<a target="_blank" href="https://bsky.app/profile/marreta.pcdomanual.com"><span class="name">Bsky</span><span class="icon icon--bsky"></span></a>
|
||||||
<a target="_blank" href="https://t.me/leissoai_bot"><span class="name">Telegram</span><span class="icon icon--telegram"></span></a>
|
<a target="_blank" href="https://t.me/leissoai_bot"><span class="name">Telegram</span><span class="icon icon--telegram"></span></a>
|
||||||
|
@ -45,10 +45,10 @@
|
||||||
</div>
|
</div>
|
||||||
</nav>
|
</nav>
|
||||||
<div class="extension">
|
<div class="extension">
|
||||||
<button class="extension__toggle"><?php echo Language::get('nav_extension'); ?></button>
|
<button class="extension__toggle"><?php echo \Inc\Language::get('nav_extension'); ?></button>
|
||||||
<div class="extension__menu">
|
<div class="extension__menu">
|
||||||
<a target="_blank" href="https://addons.mozilla.org/pt-BR/firefox/addon/marreta/"><span class="name">Firefox</span><span class="icon icon--firefox"></span></a>
|
<a target="_blank" href="https://addons.mozilla.org/pt-BR/firefox/addon/marreta/"><span class="name">Firefox</span><span class="icon icon--firefox"></span></a>
|
||||||
<a target="_blank" href="#"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
|
<a target="_blank" href="https://chromewebstore.google.com/detail/marreta/ipelapagohjgjcgpncpbmaaacemafppe"><span class="name">Chrome</span><span class="icon icon--chrome"></span></a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
@ -56,7 +56,7 @@
|
||||||
<main>
|
<main>
|
||||||
<h2 class="description"><?php echo SITE_DESCRIPTION; ?></h2>
|
<h2 class="description"><?php echo SITE_DESCRIPTION; ?></h2>
|
||||||
<p class="walls_destroyed">
|
<p class="walls_destroyed">
|
||||||
<strong><?php echo number_format($cache_folder, 0, ',', '.'); ?></strong> <span><?php echo Language::get('walls_destroyed'); ?></span>
|
<strong><?php echo number_format($cache_folder, 0, ',', '.'); ?></strong> <span><?php echo \Inc\Language::get('walls_destroyed'); ?></span>
|
||||||
</p>
|
</p>
|
||||||
<form id="urlForm" method="POST" onsubmit="return validateForm()" class="space-y-6">
|
<form id="urlForm" method="POST" onsubmit="return validateForm()" class="space-y-6">
|
||||||
<?php if ($message): ?>
|
<?php if ($message): ?>
|
||||||
|
@ -83,31 +83,31 @@
|
||||||
<input type="url"
|
<input type="url"
|
||||||
name="url"
|
name="url"
|
||||||
id="url"
|
id="url"
|
||||||
placeholder="<?php echo Language::get('url_placeholder'); ?>"
|
placeholder="<?php echo \Inc\Language::get('url_placeholder'); ?>"
|
||||||
value="<?php echo htmlspecialchars($url); ?>"
|
value="<?php echo htmlspecialchars($url); ?>"
|
||||||
required
|
required
|
||||||
pattern="https?://.+"
|
pattern="https?://.+"
|
||||||
title="<?php echo Language::getMessage('INVALID_URL')['message']; ?>">
|
title="<?php echo \Inc\Language::getMessage('INVALID_URL')['message']; ?>">
|
||||||
</div>
|
</div>
|
||||||
<button type="submit" alt="<?php echo Language::get('analyze_button'); ?>">
|
<button type="submit" alt="<?php echo \Inc\Language::get('analyze_button'); ?>">
|
||||||
<span class="icon icon--marreta"></span>
|
<span class="icon icon--marreta"></span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<p class="adblock"><?php echo str_replace('{site_name}', SITE_NAME, Language::get('adblocker_warning')); ?></p>
|
<p class="adblock"><?php echo str_replace('{site_name}', SITE_NAME, \Inc\Language::get('adblocker_warning')); ?></p>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<div class="plus">
|
<div class="plus">
|
||||||
<div class="add_as_app">
|
<div class="add_as_app">
|
||||||
<h2>
|
<h2>
|
||||||
<span class="icon icon--android"></span><?php echo Language::get('add_as_app'); ?>
|
<span class="icon icon--android"></span><?php echo \Inc\Language::get('add_as_app'); ?>
|
||||||
</h2>
|
</h2>
|
||||||
<div class="text">
|
<div class="text">
|
||||||
<div>
|
<div>
|
||||||
<ol>
|
<ol>
|
||||||
<li><?php echo Language::get('add_as_app_step1'); ?></li>
|
<li><?php echo \Inc\Language::get('add_as_app_step1'); ?></li>
|
||||||
<li><?php echo Language::get('add_as_app_step2'); ?></li>
|
<li><?php echo \Inc\Language::get('add_as_app_step2'); ?></li>
|
||||||
<li><?php echo Language::get('add_as_app_step3'); ?></li>
|
<li><?php echo \Inc\Language::get('add_as_app_step3'); ?></li>
|
||||||
<li><?php echo str_replace('{site_name}', SITE_NAME, Language::get('add_as_app_step4')); ?></li>
|
<li><?php echo str_replace('{site_name}', SITE_NAME, \Inc\Language::get('add_as_app_step4')); ?></li>
|
||||||
</ol>
|
</ol>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -115,16 +115,16 @@
|
||||||
|
|
||||||
<div class="bookmarklet">
|
<div class="bookmarklet">
|
||||||
<h2>
|
<h2>
|
||||||
<span class="icon icon--bookmark"></span><?php echo Language::get('bookmarklet_title'); ?>
|
<span class="icon icon--bookmark"></span><?php echo \Inc\Language::get('bookmarklet_title'); ?>
|
||||||
</h2>
|
</h2>
|
||||||
<div class="text">
|
<div class="text">
|
||||||
<p>
|
<p>
|
||||||
<?php echo str_replace('{site_name}', SITE_NAME, Language::get('bookmarklet_description')); ?>
|
<?php echo str_replace('{site_name}', SITE_NAME, \Inc\Language::get('bookmarklet_description')); ?>
|
||||||
</p>
|
</p>
|
||||||
<div>
|
<div>
|
||||||
<a href="javascript:(function(){let currentUrl=window.location.href;window.location.href='<?php echo SITE_URL; ?>/p/'+encodeURIComponent(currentUrl);})()"
|
<a href="javascript:(function(){let currentUrl=window.location.href;window.location.href='<?php echo SITE_URL; ?>/p/'+encodeURIComponent(currentUrl);})()"
|
||||||
onclick="return false;">
|
onclick="return false;">
|
||||||
<?php echo str_replace('{site_name}', SITE_NAME, Language::get('open_in')); ?>
|
<?php echo str_replace('{site_name}', SITE_NAME, \Inc\Language::get('open_in')); ?>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -143,4 +143,4 @@
|
||||||
?>
|
?>
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
* Generates the Web App Manifest (manifest.json) for Progressive Web App (PWA) functionality
|
* Generates the Web App Manifest (manifest.json) for Progressive Web App (PWA) functionality
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use Inc\Language;
|
||||||
|
|
||||||
require_once __DIR__ . '/../../config.php';
|
require_once __DIR__ . '/../../config.php';
|
||||||
require_once __DIR__ . '/../../inc/Language.php';
|
require_once __DIR__ . '/../../inc/Language.php';
|
||||||
|
|
||||||
|
@ -50,4 +52,4 @@ $manifest = [
|
||||||
'dir' => 'ltr'
|
'dir' => 'ltr'
|
||||||
];
|
];
|
||||||
|
|
||||||
echo json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
echo json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||||
|
|
Loading…
Add table
Reference in a new issue