nova classe para erros

This commit is contained in:
Renan Bernordi 2025-01-23 19:13:40 -03:00
parent 897c634f38
commit bf892f9c5c
3 changed files with 155 additions and 128 deletions

View file

@ -69,11 +69,10 @@ if (strpos($path, $prefix) === 0) {
// Basic URL validation // Basic URL validation
// Validação básica da URL // Validação básica da URL
if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) { if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) {
$errorMessage = Language::getMessage('INVALID_URL');
sendResponse([ sendResponse([
'error' => [ 'error' => [
'code' => 'INVALID_URL', 'type' => URLAnalyzer::ERROR_INVALID_URL,
'message' => $errorMessage['message'] 'message' => Language::getMessage('INVALID_URL')['message']
] ]
], 400); ], 400);
} }
@ -92,59 +91,43 @@ if (strpos($path, $prefix) === 0) {
sendResponse([ sendResponse([
'url' => SITE_URL . '/p/' . $url 'url' => SITE_URL . '/p/' . $url
], 200); ], 200);
} catch (Exception $e) { } catch (URLAnalyzerException $e) {
// Get error code from exception or default to 400 // Get error details from the exception
// Obtém o código de erro da exceção ou usa 400 como padrão // Obtém detalhes do erro da exceção
$statusCode = $e->getCode() ?: 400; $errorType = $e->getErrorType();
$message = $e->getMessage(); $additionalInfo = $e->getAdditionalInfo();
// Map error codes to error types
// Mapeia códigos de erro para tipos de erro
switch ($statusCode) {
case 400:
$errorCode = 'INVALID_URL';
break;
case 403:
$errorCode = 'BLOCKED_DOMAIN';
break;
case 404:
$errorCode = 'NOT_FOUND';
break;
case 502:
$errorCode = 'HTTP_ERROR';
break;
case 503:
$errorCode = 'CONNECTION_ERROR';
break;
case 504:
$errorCode = 'DNS_FAILURE';
break;
default:
$errorCode = 'GENERIC_ERROR';
break;
}
$errorMessage = Language::getMessage($errorCode);
// Add error header for better client-side handling // Add error header for better client-side handling
// Adiciona header de erro para melhor tratamento no cliente // Adiciona header de erro para melhor tratamento no cliente
header('X-Error-Message: ' . $message); header('X-Error-Type: ' . $errorType);
if ($additionalInfo) {
header('X-Error-Info: ' . $additionalInfo);
}
sendResponse([ sendResponse([
'error' => [ 'error' => [
'code' => $errorCode, 'type' => $errorType,
'message' => $errorMessage['message'] 'message' => $e->getMessage(),
'details' => $additionalInfo ?: null
] ]
], $statusCode); ], $e->getCode());
} catch (Exception $e) {
// Handle any other unexpected errors
// Trata quaisquer outros erros inesperados
sendResponse([
'error' => [
'type' => URLAnalyzer::ERROR_GENERIC_ERROR,
'message' => Language::getMessage('GENERIC_ERROR')['message']
]
], 500);
} }
} else { } else {
// Return 404 error for endpoints not found // Return 404 error for endpoints not found
// Retorna erro 404 para endpoints não encontrados // Retorna erro 404 para endpoints não encontrados
$errorMessage = Language::getMessage('NOT_FOUND');
sendResponse([ sendResponse([
'error' => [ 'error' => [
'code' => 'NOT_FOUND', 'type' => URLAnalyzer::ERROR_NOT_FOUND,
'message' => $errorMessage['message'] 'message' => Language::getMessage('NOT_FOUND')['message']
] ]
], 404); ], 404);
} }

View file

@ -29,8 +29,71 @@ use Facebook\WebDriver\Firefox\FirefoxProfile;
use Facebook\WebDriver\Chrome\ChromeOptions; use Facebook\WebDriver\Chrome\ChromeOptions;
use Inc\Logger; use Inc\Logger;
/**
* Custom exception class for URL analysis errors
* Classe de exceção personalizada para erros de análise de URL
*/
class URLAnalyzerException extends Exception
{
private $errorType;
private $additionalInfo;
public function __construct($message, $code, $errorType, $additionalInfo = '')
{
parent::__construct($message, $code);
$this->errorType = $errorType;
$this->additionalInfo = $additionalInfo;
}
public function getErrorType()
{
return $this->errorType;
}
public function getAdditionalInfo()
{
return $this->additionalInfo;
}
}
class URLAnalyzer class URLAnalyzer
{ {
// Error type constants
const ERROR_INVALID_URL = 'INVALID_URL';
const ERROR_BLOCKED_DOMAIN = 'BLOCKED_DOMAIN';
const ERROR_NOT_FOUND = 'NOT_FOUND';
const ERROR_HTTP_ERROR = 'HTTP_ERROR';
const ERROR_CONNECTION_ERROR = 'CONNECTION_ERROR';
const ERROR_DNS_FAILURE = 'DNS_FAILURE';
const ERROR_CONTENT_ERROR = 'CONTENT_ERROR';
const ERROR_GENERIC_ERROR = 'GENERIC_ERROR';
// Error mapping
private $errorMap = [
self::ERROR_INVALID_URL => ['code' => 400, 'message_key' => 'INVALID_URL'],
self::ERROR_BLOCKED_DOMAIN => ['code' => 403, 'message_key' => 'BLOCKED_DOMAIN'],
self::ERROR_NOT_FOUND => ['code' => 404, 'message_key' => 'NOT_FOUND'],
self::ERROR_HTTP_ERROR => ['code' => 502, 'message_key' => 'HTTP_ERROR'],
self::ERROR_CONNECTION_ERROR => ['code' => 503, 'message_key' => 'CONNECTION_ERROR'],
self::ERROR_DNS_FAILURE => ['code' => 504, 'message_key' => 'DNS_FAILURE'],
self::ERROR_CONTENT_ERROR => ['code' => 502, 'message_key' => 'CONTENT_ERROR'],
self::ERROR_GENERIC_ERROR => ['code' => 500, 'message_key' => 'GENERIC_ERROR']
];
/**
* Helper method to throw standardized errors
* Método auxiliar para lançar erros padronizados
*/
private function throwError($errorType, $additionalInfo = '')
{
$errorConfig = $this->errorMap[$errorType];
$message = Language::getMessage($errorConfig['message_key'])['message'];
if ($additionalInfo) {
$message;
}
throw new URLAnalyzerException($message, $errorConfig['code'], $errorType, $additionalInfo);
}
/** /**
* @var array List of available User Agents for requests * @var array List of available User Agents for requests
* @var array Lista de User Agents disponíveis para requisições * @var array Lista de User Agents disponíveis para requisições
@ -163,7 +226,7 @@ class URLAnalyzer
* *
* @param string $url URL to be analyzed / URL a ser analisada * @param string $url URL to be analyzed / URL a ser analisada
* @return string Processed content / Conteúdo processado * @return string Processed content / Conteúdo processado
* @throws Exception In case of processing errors / Em caso de erros durante o processamento * @throws URLAnalyzerException In case of processing errors / Em caso de erros durante o processamento
*/ */
public function analyze($url) public function analyze($url)
{ {
@ -174,7 +237,7 @@ class URLAnalyzer
// 1. Clean URL / Limpa a URL // 1. Clean URL / Limpa a URL
$cleanUrl = $this->cleanUrl($url); $cleanUrl = $this->cleanUrl($url);
if (!$cleanUrl) { if (!$cleanUrl) {
throw new Exception(Language::getMessage('INVALID_URL')['message'], 400); $this->throwError(self::ERROR_INVALID_URL);
} }
// 2. Check cache / Verifica cache // 2. Check cache / Verifica cache
@ -185,13 +248,13 @@ class URLAnalyzer
// 3. Check blocked domains / Verifica domínios bloqueados // 3. Check blocked domains / Verifica domínios bloqueados
$host = parse_url($cleanUrl, PHP_URL_HOST); $host = parse_url($cleanUrl, PHP_URL_HOST);
if (!$host) { if (!$host) {
throw new Exception(Language::getMessage('INVALID_URL')['message'], 400); $this->throwError(self::ERROR_INVALID_URL);
} }
$host = preg_replace('/^www\./', '', $host); $host = preg_replace('/^www\./', '', $host);
if (in_array($host, BLOCKED_DOMAINS)) { if (in_array($host, BLOCKED_DOMAINS)) {
Logger::getInstance()->log($cleanUrl, 'BLOCKED_DOMAIN'); Logger::getInstance()->log($cleanUrl, 'BLOCKED_DOMAIN');
throw new Exception(Language::getMessage('BLOCKED_DOMAIN')['message'], 403); $this->throwError(self::ERROR_BLOCKED_DOMAIN);
} }
// Check URL status code before proceeding // Check URL status code before proceeding
@ -199,9 +262,9 @@ class URLAnalyzer
if ($redirectInfo['httpCode'] !== 200) { if ($redirectInfo['httpCode'] !== 200) {
Logger::getInstance()->log($cleanUrl, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}"); Logger::getInstance()->log($cleanUrl, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
if ($redirectInfo['httpCode'] === 404) { if ($redirectInfo['httpCode'] === 404) {
throw new Exception(Language::getMessage('NOT_FOUND')['message'], 404); $this->throwError(self::ERROR_NOT_FOUND);
} else { } else {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], $redirectInfo['httpCode']); $this->throwError(self::ERROR_HTTP_ERROR, "HTTP {$redirectInfo['httpCode']}");
} }
} }
@ -265,34 +328,34 @@ class URLAnalyzer
// If we get here, all strategies failed // If we get here, all strategies failed
Logger::getInstance()->log($cleanUrl, 'GENERAL_FETCH_ERROR'); Logger::getInstance()->log($cleanUrl, 'GENERAL_FETCH_ERROR');
if ($lastError) { if ($lastError) {
// Map the error type based on the last error message $message = $lastError->getMessage();
if (strpos($lastError->getMessage(), 'DNS') !== false) { if (strpos($message, 'DNS') !== false) {
throw new Exception(Language::getMessage('DNS_FAILURE')['message'], 504); $this->throwError(self::ERROR_DNS_FAILURE);
} elseif (strpos($lastError->getMessage(), 'CURL') !== false) { } elseif (strpos($message, 'CURL') !== false) {
throw new Exception(Language::getMessage('CONNECTION_ERROR')['message'], 503); $this->throwError(self::ERROR_CONNECTION_ERROR);
} elseif (strpos($lastError->getMessage(), 'HTTP') !== false) { } elseif (strpos($message, 'HTTP') !== false) {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} elseif (strpos($lastError->getMessage(), 'not found') !== false) { } elseif (strpos($message, 'not found') !== false) {
throw new Exception(Language::getMessage('NOT_FOUND')['message'], 404); $this->throwError(self::ERROR_NOT_FOUND);
} }
} }
throw new Exception(Language::getMessage('CONTENT_ERROR')['message'], 502); $this->throwError(self::ERROR_CONTENT_ERROR);
} catch (Exception $e) { } catch (URLAnalyzerException $e) {
// Re-throw with appropriate error code if not already set
if (!$e->getCode()) {
$code = 502; // Default to bad gateway
if (strpos($e->getMessage(), 'DNS') !== false) {
$code = 504;
} elseif (strpos($e->getMessage(), 'CURL') !== false) {
$code = 503;
} elseif (strpos($e->getMessage(), 'HTTP') !== false) {
$code = 502;
} elseif (strpos($e->getMessage(), 'not found') !== false) {
$code = 404;
}
throw new Exception($e->getMessage(), $code);
}
throw $e; throw $e;
} catch (Exception $e) {
// Map generic exceptions to appropriate error types
$message = $e->getMessage();
if (strpos($message, 'DNS') !== false) {
$this->throwError(self::ERROR_DNS_FAILURE);
} elseif (strpos($message, 'CURL') !== false) {
$this->throwError(self::ERROR_CONNECTION_ERROR);
} elseif (strpos($message, 'HTTP') !== false) {
$this->throwError(self::ERROR_HTTP_ERROR);
} elseif (strpos($message, 'not found') !== false) {
$this->throwError(self::ERROR_NOT_FOUND);
} else {
$this->throwError(self::ERROR_GENERIC_ERROR, $message);
}
} }
} }
@ -306,7 +369,7 @@ class URLAnalyzer
$host = parse_url($url, PHP_URL_HOST); $host = parse_url($url, PHP_URL_HOST);
if (!$host) { if (!$host) {
throw new Exception(Language::getMessage('INVALID_URL')['message'], 400); $this->throwError(self::ERROR_INVALID_URL);
} }
$host = preg_replace('/^www\./', '', $host); $host = preg_replace('/^www\./', '', $host);
$domainRules = $this->getDomainRules($host); $domainRules = $this->getDomainRules($host);
@ -346,18 +409,18 @@ class URLAnalyzer
if ($curl->error) { if ($curl->error) {
$errorMessage = $curl->errorMessage; $errorMessage = $curl->errorMessage;
if (strpos($errorMessage, 'DNS') !== false) { if (strpos($errorMessage, 'DNS') !== false) {
throw new Exception(Language::getMessage('DNS_FAILURE')['message'], 504); $this->throwError(self::ERROR_DNS_FAILURE);
} elseif (strpos($errorMessage, 'CURL') !== false) { } elseif (strpos($errorMessage, 'CURL') !== false) {
throw new Exception(Language::getMessage('CONNECTION_ERROR')['message'], 503); $this->throwError(self::ERROR_CONNECTION_ERROR);
} elseif ($curl->httpStatusCode === 404) { } elseif ($curl->httpStatusCode === 404) {
throw new Exception(Language::getMessage('NOT_FOUND')['message'], 404); $this->throwError(self::ERROR_NOT_FOUND);
} else { } else {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} }
} }
if ($curl->httpStatusCode !== 200 || empty($curl->response)) { if ($curl->httpStatusCode !== 200 || empty($curl->response)) {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} }
return $curl->response; return $curl->response;
@ -382,17 +445,17 @@ class URLAnalyzer
if ($curl->error) { if ($curl->error) {
if (strpos($curl->errorMessage, 'DNS') !== false) { if (strpos($curl->errorMessage, 'DNS') !== false) {
throw new Exception(Language::getMessage('DNS_FAILURE')['message'], 504); $this->throwError(self::ERROR_DNS_FAILURE);
} elseif (strpos($curl->errorMessage, 'CURL') !== false) { } elseif (strpos($curl->errorMessage, 'CURL') !== false) {
throw new Exception(Language::getMessage('CONNECTION_ERROR')['message'], 503); $this->throwError(self::ERROR_CONNECTION_ERROR);
} else { } else {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} }
} }
$data = $curl->response; $data = $curl->response;
if (!isset($data->archived_snapshots->closest->url)) { if (!isset($data->archived_snapshots->closest->url)) {
throw new Exception(Language::getMessage('NOT_FOUND')['message'], 404); $this->throwError(self::ERROR_NOT_FOUND);
} }
$archiveUrl = $data->archived_snapshots->closest->url; $archiveUrl = $data->archived_snapshots->closest->url;
@ -405,7 +468,7 @@ class URLAnalyzer
$curl->get($archiveUrl); $curl->get($archiveUrl);
if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) { if ($curl->error || $curl->httpStatusCode !== 200 || empty($curl->response)) {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} }
$content = $curl->response; $content = $curl->response;
@ -466,7 +529,7 @@ class URLAnalyzer
$driver->quit(); $driver->quit();
if (empty($htmlSource)) { if (empty($htmlSource)) {
throw new Exception(Language::getMessage('CONTENT_ERROR')['message'], 502); $this->throwError(self::ERROR_CONTENT_ERROR);
} }
return $htmlSource; return $htmlSource;
@ -475,16 +538,16 @@ class URLAnalyzer
$driver->quit(); $driver->quit();
} }
// Map Selenium errors to appropriate HTTP status codes // Map Selenium errors to appropriate error types
$message = $e->getMessage(); $message = $e->getMessage();
if (strpos($message, 'DNS') !== false) { if (strpos($message, 'DNS') !== false) {
throw new Exception(Language::getMessage('DNS_FAILURE')['message'], 504); $this->throwError(self::ERROR_DNS_FAILURE);
} elseif (strpos($message, 'timeout') !== false) { } elseif (strpos($message, 'timeout') !== false) {
throw new Exception(Language::getMessage('CONNECTION_ERROR')['message'], 503); $this->throwError(self::ERROR_CONNECTION_ERROR);
} elseif (strpos($message, 'not found') !== false) { } elseif (strpos($message, 'not found') !== false) {
throw new Exception(Language::getMessage('NOT_FOUND')['message'], 404); $this->throwError(self::ERROR_NOT_FOUND);
} else { } else {
throw new Exception(Language::getMessage('HTTP_ERROR')['message'], 502); $this->throwError(self::ERROR_HTTP_ERROR);
} }
} }
} }
@ -535,7 +598,7 @@ class URLAnalyzer
private function processContent($content, $host, $url) private function processContent($content, $host, $url)
{ {
if (strlen($content) < 5120) { if (strlen($content) < 5120) {
throw new Exception(Language::getMessage('CONTENT_ERROR')['message'], 502); $this->throwError(self::ERROR_CONTENT_ERROR);
} }
$dom = new DOMDocument(); $dom = new DOMDocument();

View file

@ -60,55 +60,36 @@ if (strpos($path, $prefix) === 0) {
// Exibe o conteúdo processado // Exibe o conteúdo processado
echo $content; echo $content;
exit; exit;
} catch (Exception $e) { } catch (URLAnalyzerException $e) {
// Get error code from exception or default to 400 // Get error type and additional info from exception
// Obtém o código de erro da exceção ou usa 400 como padrão // Obtém o tipo de erro e informações adicionais da exceção
$statusCode = $e->getCode() ?: 400; $errorType = $e->getErrorType();
$additionalInfo = $e->getAdditionalInfo();
// Map error codes to error types // Handle blocked domain with redirect URL
// Mapeia códigos de erro para tipos de erro // Trata domínio bloqueado com URL de redirecionamento
switch ($statusCode) { if ($errorType === URLAnalyzer::ERROR_BLOCKED_DOMAIN && $additionalInfo) {
case 400: header('Location: ' . trim($additionalInfo) . '?message=' . $errorType);
$errorType = 'INVALID_URL'; exit;
break;
case 403:
$errorType = 'BLOCKED_DOMAIN';
// Extract redirect URL from error message if present
$parts = explode('|', $e->getMessage());
if (count($parts) > 1) {
header('Location: ' . trim($parts[1]) . '?message=' . $errorType);
exit;
}
break;
case 404:
$errorType = 'NOT_FOUND';
break;
case 502:
$errorType = 'HTTP_ERROR';
break;
case 503:
$errorType = 'CONNECTION_ERROR';
break;
case 504:
$errorType = 'DNS_FAILURE';
break;
default:
$errorType = 'GENERIC_ERROR';
break;
} }
// Redirect to home page with error message // Redirect to home page with error message
// Redireciona para a página inicial com mensagem de erro // Redireciona para a página inicial com mensagem de erro
header('Location: /?message=' . $errorType); header('Location: /?message=' . $errorType);
exit; exit;
} catch (Exception $e) {
// Handle any other unexpected errors
// Trata quaisquer outros erros inesperados
header('Location: /?message=' . URLAnalyzer::ERROR_GENERIC_ERROR);
exit;
} }
} else { } else {
// Invalid URL / URL inválida // Invalid URL / URL inválida
header('Location: /?message=INVALID_URL'); header('Location: /?message=' . URLAnalyzer::ERROR_INVALID_URL);
exit; exit;
} }
} else { } else {
// Invalid path / Path inválido // Invalid path / Path inválido
header('Location: /?message=NOT_FOUND'); header('Location: /?message=' . URLAnalyzer::ERROR_NOT_FOUND);
exit; exit;
} }