error = new URLAnalyzerError(); } /** * Processes and modifies HTML content * Applies rules and fixes URLs */ public function processContent($content, $host, $url) { if (strlen($content) < 5120) { $this->error->throwError(self::ERROR_CONTENT_ERROR); } $dom = HTMLDocument::createFromString($content, LIBXML_NOERROR); // Process all modifications in real-time $this->processCanonicalLinks($dom, $url); $this->fixRelativeUrls($dom, $url); $this->applyDomainRules($dom, $host); $this->cleanInlineStyles($dom); $this->addBrandBar($dom, $url); $this->addDebugBar($dom); return $dom->saveHTML(); } /** Updates canonical link tags */ private function processCanonicalLinks($dom, $url) { foreach ($dom->querySelectorAll("link[rel='canonical']") as $link) { $link->parentNode->removeChild($link); } $head = $dom->querySelector('head'); if ($head) { $newCanonical = $dom->createElement('link'); $newCanonical->setAttribute('rel', 'canonical'); $newCanonical->setAttribute('href', $url); $head->append($newCanonical); } } /** Applies domain rules to content */ private function applyDomainRules($dom, $host) { $domainRules = $this->getDomainRules($host); if (isset($domainRules['customStyle'])) { $styleElement = $dom->createElement('style'); $styleElement->textContent = $domainRules['customStyle']; $dom->querySelector('head')?->append($styleElement); $this->activatedRules[] = 'customStyle'; } if (isset($domainRules['customCode'])) { $scriptElement = $dom->createElement('script'); $scriptElement->textContent = $domainRules['customCode']; $scriptElement->setAttribute('type', 'text/javascript'); $dom->querySelector('body')?->append($scriptElement); } $this->removeUnwantedElements($dom, $domainRules); } /** Removes unwanted elements by rules */ private function removeUnwantedElements($dom, $domainRules) { if (isset($domainRules['classAttrRemove'])) { foreach ($domainRules['classAttrRemove'] as $class) { $elements = $dom->querySelectorAll("*[class~='$class']"); if ($elements->length > 0) { foreach ($elements as $element) { $this->removeClassNames($element, [$class]); } $this->activatedRules[] = "classAttrRemove: $class"; } } } if (isset($domainRules['removeElementsByTag'])) { foreach ($domainRules['removeElementsByTag'] as $tag) { $elements = $dom->querySelectorAll($tag); if ($elements->length > 0) { foreach ($elements as $element) { $element->parentNode->removeChild($element); } $this->activatedRules[] = "removeElementsByTag: $tag"; } } } if (isset($domainRules['idElementRemove'])) { foreach ($domainRules['idElementRemove'] as $id) { $element = $dom->querySelector("#$id"); if ($element) { $element->parentNode->removeChild($element); $this->activatedRules[] = "idElementRemove: $id"; } } } if (isset($domainRules['classElementRemove'])) { foreach ($domainRules['classElementRemove'] as $class) { $elements = $dom->querySelectorAll(".$class"); if ($elements->length > 0) { foreach ($elements as $element) { $element->parentNode->removeChild($element); } $this->activatedRules[] = "classElementRemove: $class"; } } } if (isset($domainRules['scriptTagRemove'])) { foreach ($domainRules['scriptTagRemove'] as $script) { $found = false; $elements = $dom->querySelectorAll("script[src*='$script']"); if ($elements->length > 0) { $found = true; foreach ($elements as $element) { $element->parentNode->removeChild($element); } } $elements = $dom->querySelectorAll("link[as='script'][href*='$script']"); if ($elements->length > 0) { $found = true; foreach ($elements as $element) { $element->parentNode->removeChild($element); } } $xpath = new XPath($dom); $elements = $xpath->query("//script[contains(text(), '$script')]"); if ($elements->length > 0) { $found = true; foreach ($elements as $element) { $element->parentNode->removeChild($element); } } if ($found) { $this->activatedRules[] = "scriptTagRemove: $script"; } } } if (isset($domainRules['removeCustomAttr'])) { foreach ($domainRules['removeCustomAttr'] as $attrPattern) { $found = false; if (strpos($attrPattern, '*') !== false) { $pattern = '/^' . str_replace('*', '.*', $attrPattern) . '$/'; foreach ($dom->querySelectorAll('*') as $element) { foreach ($element->attributes as $attr) { if (preg_match($pattern, $attr->name)) { $element->removeAttribute($attr->name); $found = true; } } } } else { $elements = $dom->querySelectorAll("[$attrPattern]"); if ($elements->length > 0) { $found = true; foreach ($elements as $element) { $element->removeAttribute($attrPattern); } } } if ($found) { $this->activatedRules[] = "removeCustomAttr: $attrPattern"; } } } } /** Cleans problematic inline styles */ private function cleanInlineStyles($dom) { $elements = $dom->querySelectorAll("[style]"); foreach ($elements as $element) { $style = $element->getAttribute('style'); $style = preg_replace('/(max-height|height|overflow|position|display|visibility)\s*:\s*[^;]+;?/', '', $style); $element->setAttribute('style', $style); } } /** Adds branded bar to page */ private function addBrandBar($dom, $url) { $body = $dom->querySelector('body'); if ($body) { $brandDiv = $dom->createElement('div'); $brandDiv->setAttribute('style', 'z-index: 2147483647; position: fixed; top: 0; right: 1rem; display: flex; gap: 8px;'); $linkHtml = ''; $siteHtml = ''; $brandDiv->innerHTML = $linkHtml . $siteHtml; $body->append($brandDiv); } } /** Adds debug info bar in debug mode */ private function addDebugBar($dom) { if (defined('LOG_LEVEL') && LOG_LEVEL === 'DEBUG') { $body = $dom->querySelector('body'); if ($body) { $debugDiv = $dom->createElement('div'); $debugDiv->setAttribute('style', 'z-index: 2147483647; position: fixed; bottom: 1rem; right: 1rem; max-width: 400px; padding: 1rem; color: #000; background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(8px); border: 1px solid #e5e7eb; border-radius: 0.5rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); overflow: auto; max-height: 80vh; z-index: 2147483647; font-family: monospace; font-size: 13px; line-height: 1.4;'); if (empty($this->activatedRules)) { $ruleElement = $dom->createElement('div'); $ruleElement->textContent = 'No rules activated / Nenhuma regra ativada'; $debugDiv->append($ruleElement); } else { foreach ($this->activatedRules as $rule) { $ruleElement = $dom->createElement('div'); $ruleElement->textContent = $rule; $debugDiv->append($ruleElement); } } $body->append($debugDiv); } } } /** Removes class names from element */ private function removeClassNames($element, $classesToRemove) { if (!$element->hasAttribute('class')) { return; } $classes = explode(' ', $element->getAttribute('class')); $newClasses = array_filter($classes, function ($class) use ($classesToRemove) { return !in_array(trim($class), $classesToRemove); }); if (empty($newClasses)) { $element->removeAttribute('class'); } else { $element->setAttribute('class', implode(' ', $newClasses)); } } /** Converts relative URLs to absolute */ private function fixRelativeUrls($dom, $baseUrl) { $parsedBase = parse_url($baseUrl); $baseHost = ($parsedBase['scheme'] ?? 'http') . '://' . $parsedBase['host']; foreach ($dom->querySelectorAll('[src]') as $element) { $src = $element->getAttribute('src'); if (str_starts_with($src, 'data:')) { continue; } if (!str_starts_with($src, 'http') && !str_starts_with($src, '//')) { $element->setAttribute('src', $baseHost . '/' . ltrim($src, '/')); } } foreach ($dom->querySelectorAll('[href]') as $element) { $href = $element->getAttribute('href'); if ( str_starts_with($href, 'mailto:') || str_starts_with($href, 'tel:') || str_starts_with($href, 'javascript:') || str_starts_with($href, '#') ) { continue; } if (!str_starts_with($href, 'http') && !str_starts_with($href, '//')) { $element->setAttribute('href', $baseHost . '/' . ltrim($href, '/')); } } } }