mirror of
https://github.com/manualdousuario/marreta.git
synced 2026-04-29 20:10:24 +00:00
adicionada documentação na nova estrutura do urlanalyzer
This commit is contained in:
parent
91f58e61c7
commit
db4e512e63
7 changed files with 120 additions and 3 deletions
|
|
@ -1,4 +1,8 @@
|
|||
<?php
|
||||
/**
|
||||
* Fetches content using multiple strategies
|
||||
* Uses cURL, Wayback Machine, and Selenium
|
||||
*/
|
||||
|
||||
namespace Inc\URLAnalyzer;
|
||||
|
||||
|
|
@ -11,14 +15,22 @@ use Facebook\WebDriver\Chrome\ChromeOptions;
|
|||
|
||||
class URLAnalyzerFetch extends URLAnalyzerBase
|
||||
{
|
||||
/** @var URLAnalyzerError Handler for throwing formatted errors */
|
||||
private $error;
|
||||
|
||||
/**
|
||||
* Sets up the fetch handler with error handling capability
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
$this->error = new URLAnalyzerError();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches content using cURL
|
||||
* Handles redirects and custom headers
|
||||
*/
|
||||
public function fetchContent($url)
|
||||
{
|
||||
$curl = new Curl();
|
||||
|
|
@ -79,6 +91,10 @@ class URLAnalyzerFetch extends URLAnalyzerBase
|
|||
return $curl->response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches from Wayback Machine archive
|
||||
* Used when direct access fails
|
||||
*/
|
||||
public function fetchFromWaybackMachine($url)
|
||||
{
|
||||
$url = preg_replace('#^https?://#', '', $url);
|
||||
|
|
@ -128,6 +144,10 @@ class URLAnalyzerFetch extends URLAnalyzerBase
|
|||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches using Selenium for JS-heavy sites
|
||||
* Supports Firefox and Chrome
|
||||
*/
|
||||
public function fetchFromSelenium($url, $browser = 'firefox')
|
||||
{
|
||||
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue