diff --git a/app/data/domain_rules.php b/app/data/domain_rules.php
index 6460a00..2d327ce 100644
--- a/app/data/domain_rules.php
+++ b/app/data/domain_rules.php
@@ -204,23 +204,6 @@ return [
'scriptTagRemove' => 'zephr',
'classElementRemove' => 'zephr'
],
- 'economist.com' => [
- 'cookies' => [
- 'ec_limit' => 'allow'
- ],
- 'scriptTagRemove' => ['wrapperMessagingWithoutDetection.js'],
- 'customCode' => '
- var artBodyContainer = document.querySelector("article.article");
- var artBody = artBodyContainer.innerHTML;
- checkPaywall();
- function checkPaywall() {
- let paywallBox = document.querySelector(".layout-article-regwall");
- if (paywallBox) {
- artBodyContainer.innerHTML = artBody;
- }
- }
- '
- ],
'nytimes.com' => [
'idElementRemove' => ['gateway-content', 'site-index', 'complianceOverlay'],
'customCode' => '
@@ -722,41 +705,5 @@ return [
}, 1000);
})
'
- ],
- // Test domain
- 'altendorfme.github.io' => [
- 'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
- 'headers' => [
- 'Accept-Language' => 'en-US,en;q=0.9',
- 'Cache-Control' => 'no-cache',
- 'Pragma' => 'no-cache'
- ],
- 'proxy' => true,
- 'idElementRemove' => ['test-id-1', 'paywall'],
- 'classElementRemove' => ['test-class-1'],
- 'scriptTagRemove' => ['analytics.js', 'test-script.js', 'paywall.js'],
- 'cookies' => [
- 'visited' => 'true',
- 'consent' => 'accepted',
- 'session_id' => null
- ],
- 'classAttrRemove' => ['test-attr-1', 'paywall'],
- 'customCode' => '
- console.log("worked");
- ',
- 'customStyle' => '
- .test-style {
- background: red;
- }
- ',
- 'excludeGlobalRules' => [
- 'scriptTagRemove' => ['excluded-script.js'],
- 'classElementRemove' => ['excluded-class']
- ],
- 'fetchStrategies' => 'fetchContent',
- 'socialReferrers' => true,
- 'fromGoogleBot' => true,
- 'removeElementsByTag' => ['iframe'],
- 'removeCustomAttr' => ['data-*']
]
];
diff --git a/app/inc/URLAnalyzer/URLAnalyzerProcess.php b/app/inc/URLAnalyzer/URLAnalyzerProcess.php
index 1a78475..01a678d 100644
--- a/app/inc/URLAnalyzer/URLAnalyzerProcess.php
+++ b/app/inc/URLAnalyzer/URLAnalyzerProcess.php
@@ -33,69 +33,63 @@ class URLAnalyzerProcess extends URLAnalyzerBase
}
$dom = HTMLDocument::createFromString($content, LIBXML_NOERROR);
- $xpath = new XPath($dom);
// Process all modifications in real-time
- $this->processCanonicalLinks($dom, $xpath, $url);
- $this->fixRelativeUrls($dom, $xpath, $url);
- $this->applyDomainRules($dom, $xpath, $host);
- $this->cleanInlineStyles($xpath);
- $this->addBrandBar($dom, $xpath, $url);
- $this->addDebugBar($dom, $xpath);
+ $this->processCanonicalLinks($dom, $url);
+ $this->fixRelativeUrls($dom, $url);
+ $this->applyDomainRules($dom, $host);
+ $this->cleanInlineStyles($dom);
+ $this->addBrandBar($dom, $url);
+ $this->addDebugBar($dom);
return $dom->saveHTML();
}
/** Updates canonical link tags */
- private function processCanonicalLinks($dom, $xpath, $url)
+ private function processCanonicalLinks($dom, $url)
{
- $canonicalLinks = $xpath->query("//link[@rel='canonical']");
- if ($canonicalLinks !== false) {
- foreach ($canonicalLinks as $link) {
- if ($link->parentNode) {
- $link->parentNode->removeChild($link);
- }
- }
+ foreach ($dom->querySelectorAll("link[rel='canonical']") as $link) {
+ $link->parentNode->removeChild($link);
}
- $head = $xpath->query('//head')->item(0);
+ $head = $dom->querySelector('head');
if ($head) {
$newCanonical = $dom->createElement('link');
$newCanonical->setAttribute('rel', 'canonical');
$newCanonical->setAttribute('href', $url);
- $head->appendChild($newCanonical);
+ $head->append($newCanonical);
}
}
/** Applies domain rules to content */
- private function applyDomainRules($dom, $xpath, $host)
+ private function applyDomainRules($dom, $host)
{
$domainRules = $this->getDomainRules($host);
if (isset($domainRules['customStyle'])) {
$styleElement = $dom->createElement('style');
- $styleElement->appendChild($dom->createTextNode($domainRules['customStyle']));
- $dom->getElementsByTagName('head')[0]->appendChild($styleElement);
+ $styleElement->textContent = $domainRules['customStyle'];
+ $dom->querySelector('head')?->append($styleElement);
$this->activatedRules[] = 'customStyle';
}
if (isset($domainRules['customCode'])) {
$scriptElement = $dom->createElement('script');
+ $scriptElement->textContent = $domainRules['customCode'];
$scriptElement->setAttribute('type', 'text/javascript');
- $scriptElement->appendChild($dom->createTextNode($domainRules['customCode']));
- $dom->getElementsByTagName('body')[0]->appendChild($scriptElement);
+ $dom->querySelector('body')?->append($scriptElement);
}
- $this->removeUnwantedElements($dom, $xpath, $domainRules);
+ $this->removeUnwantedElements($dom, $domainRules);
}
/** Removes unwanted elements by rules */
- private function removeUnwantedElements($dom, $xpath, $domainRules)
+ private function removeUnwantedElements($dom, $domainRules)
{
if (isset($domainRules['classAttrRemove'])) {
foreach ($domainRules['classAttrRemove'] as $class) {
- $elements = $xpath->query("//*[contains(@class, '$class')]");
- if ($elements !== false && $elements->length > 0) {
+ $elements = $dom->querySelectorAll("*[class~='$class']");
+ if ($elements->length > 0) {
foreach ($elements as $element) {
$this->removeClassNames($element, [$class]);
}
@@ -105,14 +99,11 @@ class URLAnalyzerProcess extends URLAnalyzerBase
}
if (isset($domainRules['removeElementsByTag'])) {
- $tagsToRemove = $domainRules['removeElementsByTag'];
- foreach ($tagsToRemove as $tag) {
- $tagElements = $xpath->query("//$tag");
- if ($tagElements !== false) {
- foreach ($tagElements as $element) {
- if ($element->parentNode) {
- $element->parentNode->removeChild($element);
- }
+ foreach ($domainRules['removeElementsByTag'] as $tag) {
+ $elements = $dom->querySelectorAll($tag);
+ if ($elements->length > 0) {
+ foreach ($elements as $element) {
+ $element->parentNode->removeChild($element);
}
$this->activatedRules[] = "removeElementsByTag: $tag";
}
@@ -121,13 +112,9 @@ class URLAnalyzerProcess extends URLAnalyzerBase
if (isset($domainRules['idElementRemove'])) {
foreach ($domainRules['idElementRemove'] as $id) {
- $elements = $xpath->query("//*[@id='$id']");
- if ($elements !== false && $elements->length > 0) {
- foreach ($elements as $element) {
- if ($element->parentNode) {
- $element->parentNode->removeChild($element);
- }
- }
+ $element = $dom->querySelector("#$id");
+ if ($element) {
+ $element->parentNode->removeChild($element);
$this->activatedRules[] = "idElementRemove: $id";
}
}
@@ -135,12 +122,10 @@ class URLAnalyzerProcess extends URLAnalyzerBase
if (isset($domainRules['classElementRemove'])) {
foreach ($domainRules['classElementRemove'] as $class) {
- $elements = $xpath->query("//*[contains(@class, '$class')]");
- if ($elements !== false && $elements->length > 0) {
+ $elements = $dom->querySelectorAll(".$class");
+ if ($elements->length > 0) {
foreach ($elements as $element) {
- if ($element->parentNode) {
- $element->parentNode->removeChild($element);
- }
+ $element->parentNode->removeChild($element);
}
$this->activatedRules[] = "classElementRemove: $class";
}
@@ -149,23 +134,33 @@ class URLAnalyzerProcess extends URLAnalyzerBase
if (isset($domainRules['scriptTagRemove'])) {
foreach ($domainRules['scriptTagRemove'] as $script) {
- $scriptElements = $xpath->query("//script[contains(@src, '$script')] | //script[contains(text(), '$script')]");
- if ($scriptElements !== false && $scriptElements->length > 0) {
- foreach ($scriptElements as $element) {
- if ($element->parentNode) {
- $element->parentNode->removeChild($element);
- }
+ $found = false;
+ $elements = $dom->querySelectorAll("script[src*='$script']");
+ if ($elements->length > 0) {
+ $found = true;
+ foreach ($elements as $element) {
+ $element->parentNode->removeChild($element);
}
- $this->activatedRules[] = "scriptTagRemove: $script";
}
- $linkElements = $xpath->query("//link[@as='script' and contains(@href, '$script') and @type='application/javascript']");
- if ($linkElements !== false && $linkElements->length > 0) {
- foreach ($linkElements as $element) {
- if ($element->parentNode) {
- $element->parentNode->removeChild($element);
- }
+ $elements = $dom->querySelectorAll("link[as='script'][href*='$script']");
+ if ($elements->length > 0) {
+ $found = true;
+ foreach ($elements as $element) {
+ $element->parentNode->removeChild($element);
}
+ }
+
+ $xpath = new XPath($dom);
+ $elements = $xpath->query("//script[contains(text(), '$script')]");
+ if ($elements->length > 0) {
+ $found = true;
+ foreach ($elements as $element) {
+ $element->parentNode->removeChild($element);
+ }
+ }
+
+ if ($found) {
$this->activatedRules[] = "scriptTagRemove: $script";
}
}
@@ -173,72 +168,63 @@ class URLAnalyzerProcess extends URLAnalyzerBase
if (isset($domainRules['removeCustomAttr'])) {
foreach ($domainRules['removeCustomAttr'] as $attrPattern) {
+ $found = false;
if (strpos($attrPattern, '*') !== false) {
- $elements = $xpath->query('//*');
- if ($elements !== false) {
- $pattern = '/^' . str_replace('*', '.*', $attrPattern) . '$/';
- foreach ($elements as $element) {
- if ($element->hasAttributes()) {
- $attrs = [];
- foreach ($element->attributes as $attr) {
- if (preg_match($pattern, $attr->name)) {
- $attrs[] = $attr->name;
- }
- }
- foreach ($attrs as $attr) {
- $element->removeAttribute($attr);
- }
+ $pattern = '/^' . str_replace('*', '.*', $attrPattern) . '$/';
+ foreach ($dom->querySelectorAll('*') as $element) {
+ foreach ($element->attributes as $attr) {
+ if (preg_match($pattern, $attr->name)) {
+ $element->removeAttribute($attr->name);
+ $found = true;
}
}
- $this->activatedRules[] = "removeCustomAttr: $attrPattern";
}
} else {
- $elements = $xpath->query("//*[@$attrPattern]");
- if ($elements !== false && $elements->length > 0) {
+ $elements = $dom->querySelectorAll("[$attrPattern]");
+ if ($elements->length > 0) {
+ $found = true;
foreach ($elements as $element) {
$element->removeAttribute($attrPattern);
}
- $this->activatedRules[] = "removeCustomAttr: $attrPattern";
}
}
+ if ($found) {
+ $this->activatedRules[] = "removeCustomAttr: $attrPattern";
+ }
}
}
}
/** Cleans problematic inline styles */
- private function cleanInlineStyles($xpath)
+ private function cleanInlineStyles($dom)
{
- $elements = $xpath->query("//*[@style]");
- if ($elements !== false) {
- foreach ($elements as $element) {
- if ($element instanceof Element) {
- $style = $element->getAttribute('style');
- $style = preg_replace('/(max-height|height|overflow|position|display|visibility)\s*:\s*[^;]+;?/', '', $style);
- $element->setAttribute('style', $style);
- }
- }
+ $elements = $dom->querySelectorAll("[style]");
+ foreach ($elements as $element) {
+ $style = $element->getAttribute('style');
+ $style = preg_replace('/(max-height|height|overflow|position|display|visibility)\s*:\s*[^;]+;?/', '', $style);
+ $element->setAttribute('style', $style);
}
}
/** Adds branded bar to page */
- private function addBrandBar($dom, $xpath, $url)
+ private function addBrandBar($dom, $url)
{
- $body = $dom->getElementsByTagName('body')[0];
+ $body = $dom->querySelector('body');
if ($body) {
$brandDiv = $dom->createElement('div');
$brandDiv->setAttribute('style', 'z-index: 2147483647; position: fixed; top: 0; right: 1rem; display: flex; gap: 8px;');
$linkHtml = '';
$siteHtml = '';
$brandDiv->innerHTML = $linkHtml . $siteHtml;
- $body->appendChild($brandDiv);
+ $body->append($brandDiv);
}
}
/** Adds debug info bar in debug mode */
- private function addDebugBar($dom, $xpath)
+ private function addDebugBar($dom)
{
if (defined('LOG_LEVEL') && LOG_LEVEL === 'DEBUG') {
- $body = $dom->getElementsByTagName('body')[0];
+ $body = $dom->querySelector('body');
if ($body) {
$debugDiv = $dom->createElement('div');
$debugDiv->setAttribute('style', 'z-index: 2147483647; position: fixed; bottom: 1rem; right: 1rem; max-width: 400px; padding: 1rem; color: #000; background: rgba(255, 255, 255, 0.9); backdrop-filter: blur(8px); border: 1px solid #e5e7eb; border-radius: 0.5rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); overflow: auto; max-height: 80vh; z-index: 2147483647; font-family: monospace; font-size: 13px; line-height: 1.4;');
@@ -246,16 +232,16 @@ class URLAnalyzerProcess extends URLAnalyzerBase
if (empty($this->activatedRules)) {
$ruleElement = $dom->createElement('div');
$ruleElement->textContent = 'No rules activated / Nenhuma regra ativada';
- $debugDiv->appendChild($ruleElement);
+ $debugDiv->append($ruleElement);
} else {
foreach ($this->activatedRules as $rule) {
$ruleElement = $dom->createElement('div');
$ruleElement->textContent = $rule;
- $debugDiv->appendChild($ruleElement);
+ $debugDiv->append($ruleElement);
}
}
- $body->appendChild($debugDiv);
+ $body->append($debugDiv);
}
}
}
@@ -280,45 +266,33 @@ class URLAnalyzerProcess extends URLAnalyzerBase
}
/** Converts relative URLs to absolute */
- private function fixRelativeUrls($dom, $xpath, $baseUrl)
+ private function fixRelativeUrls($dom, $baseUrl)
{
$parsedBase = parse_url($baseUrl);
- $baseHost = $parsedBase['scheme'] . '://' . $parsedBase['host'];
+ $baseHost = ($parsedBase['scheme'] ?? 'http') . '://' . $parsedBase['host'];
- $elements = $xpath->query("//*[@src]");
- if ($elements !== false) {
- foreach ($elements as $element) {
- if ($element instanceof Element) {
- $src = $element->getAttribute('src');
- if (strpos($src, 'base64') !== false) {
- continue;
- }
- if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
- $src = ltrim($src, '/');
- $element->setAttribute('src', $baseHost . '/' . $src);
- }
- }
+ foreach ($dom->querySelectorAll('[src]') as $element) {
+ $src = $element->getAttribute('src');
+ if (str_starts_with($src, 'data:')) {
+ continue;
+ }
+ if (!str_starts_with($src, 'http') && !str_starts_with($src, '//')) {
+ $element->setAttribute('src', $baseHost . '/' . ltrim($src, '/'));
}
}
- $elements = $xpath->query("//*[@href]");
- if ($elements !== false) {
- foreach ($elements as $element) {
- if ($element instanceof Element) {
- $href = $element->getAttribute('href');
- if (
- strpos($href, 'mailto:') === 0 ||
- strpos($href, 'tel:') === 0 ||
- strpos($href, 'javascript:') === 0 ||
- strpos($href, '#') === 0
- ) {
- continue;
- }
- if (strpos($href, 'http') !== 0 && strpos($href, '//') !== 0) {
- $href = ltrim($href, '/');
- $element->setAttribute('href', $baseHost . '/' . $href);
- }
- }
+ foreach ($dom->querySelectorAll('[href]') as $element) {
+ $href = $element->getAttribute('href');
+ if (
+ str_starts_with($href, 'mailto:') ||
+ str_starts_with($href, 'tel:') ||
+ str_starts_with($href, 'javascript:') ||
+ str_starts_with($href, '#')
+ ) {
+ continue;
+ }
+ if (!str_starts_with($href, 'http') && !str_starts_with($href, '//')) {
+ $element->setAttribute('href', $baseHost . '/' . ltrim($href, '/'));
}
}
}