From 26fe7537878ddef0e4a45f380a0c9715b024ffad Mon Sep 17 00:00:00 2001 From: Mitchell Krog Date: Tue, 27 Jun 2017 11:07:23 +0200 Subject: [PATCH] Test new updated generate-google-exclude file generator --- travisCI/_backup/generate-google-exclude.php | 98 ++++++++++++++++++++ travisCI/generate-google-exclude.php | 49 +++++----- 2 files changed, 123 insertions(+), 24 deletions(-) create mode 100755 travisCI/_backup/generate-google-exclude.php mode change 100755 => 100644 travisCI/generate-google-exclude.php diff --git a/travisCI/_backup/generate-google-exclude.php b/travisCI/_backup/generate-google-exclude.php new file mode 100755 index 000000000..0cd6f63a7 --- /dev/null +++ b/travisCI/_backup/generate-google-exclude.php @@ -0,0 +1,98 @@ +domainWorker(); + $this->createGoogleExclude($lines); + } + /** + * @return array + */ + public function domainWorker() + { + $domainsFile = "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list"; + $handle = fopen($domainsFile, "r"); + if (!$handle) { + throw new \RuntimeException('Error opening file ' . $domainsFile); + } + $lines = array(); + while (($line = fgets($handle)) !== false) { + $line = trim(preg_replace('/\s\s+/', ' ', $line)); + // convert russian domains + if (preg_match('/[А-Яа-яЁё]/u', $line)) { + $IDN = new IdnaConvert(); + $line = $IDN->encode($line); + } + if (empty($line)) { + continue; + } + $lines[] = $line; + } + fclose($handle); + $uniqueLines = array_unique($lines, SORT_STRING); + sort($uniqueLines, SORT_STRING); + if (is_writable($domainsFile)) { + file_put_contents($domainsFile, implode("\n", $uniqueLines)); + } else { + trigger_error("Permission denied"); + } + return $lines; + } + /** + * @param $file + * @param $data + */ + protected function writeToFile($file, $data) + { + if (is_writable($file)) { + file_put_contents($file, $data); + if (!chmod($file, 0755)) { + trigger_error("Couldn't not set " . basename($file) . " permissions to 755"); + } + } else { + trigger_error("Permission denied"); + } + } + public function createGoogleExclude(array $lines) + { + $regexLines = []; + foreach ($lines as $line) { + $regexLines[] = preg_quote($line); + } + $data = implode('|', $regexLines); + $googleLimit = 30000; + $dataLength = strlen($data); + // keep track of the last split + $lastPosition = 0; + for ($x = 1; $lastPosition < $dataLength; $x++) { + // already in the boundary limits? + if( ($dataLength-$lastPosition) >= $googleLimit){ + // search for the last occurence of | in the boundary limits + $pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|'); + $dataSplit = substr($data, $lastPosition, $pipePosition); + // without trailing pipe at the beginning of next round + $lastPosition = $lastPosition + $pipePosition+1; + }else{ + // Rest of the regex (no pipe at the end) + $dataSplit = substr($data, $lastPosition); + $lastPosition = $dataLength; // Break + } + $file = '/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/google-exclude-0' . $x . '.txt'; + $this->writeToFile($file, $dataSplit); + } + } +} +$generator = new Generate(); +$generator->generateFiles(); \ No newline at end of file diff --git a/travisCI/generate-google-exclude.php b/travisCI/generate-google-exclude.php old mode 100755 new mode 100644 index 0cd6f63a7..6e328d3c9 --- a/travisCI/generate-google-exclude.php +++ b/travisCI/generate-google-exclude.php @@ -6,11 +6,15 @@ * MIT License * Copyright (c) 2017 Mitchell Krog - mitchellkrog@gmail.com */ -class Generate +namespace mitchellkrogza; + +use Mso\IdnaConvert\IdnaConvert; + +class Generator { - private $projectUrl = "https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker"; - public function generateFiles() + + public function generateFiles() { date_default_timezone_set('Africa/Johannesburg'); $date = date('Y-m-d H:i:s'); @@ -18,11 +22,12 @@ class Generate $this->createGoogleExclude($lines); } /** + * Open our input domain list and create our array * @return array */ public function domainWorker() { - $domainsFile = "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list"; + $domainsFile = __DIR__ . "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list"; $handle = fopen($domainsFile, "r"); if (!$handle) { throw new \RuntimeException('Error opening file ' . $domainsFile); @@ -50,22 +55,24 @@ class Generate } return $lines; } - /** - * @param $file + + /** + * Write to File Function + * @param $filename * @param $data */ - protected function writeToFile($file, $data) + protected function writeToFile($filename, $data) { - if (is_writable($file)) { - file_put_contents($file, $data); - if (!chmod($file, 0755)) { - trigger_error("Couldn't not set " . basename($file) . " permissions to 755"); - } - } else { - trigger_error("Permission denied"); - } + $file = __DIR__ . "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/$filename"; + $handle = fopen($file, 'w') or die('Cannot open file: '.$file); + fwrite($handle, $data); } - public function createGoogleExclude(array $lines) + + /** + * Create Google Exclude Files Splitting them at Google's 30,000 Character Limit + * @param $lines + */ + public function createGoogleExclude(array $lines) { $regexLines = []; foreach ($lines as $line) { @@ -74,25 +81,19 @@ class Generate $data = implode('|', $regexLines); $googleLimit = 30000; $dataLength = strlen($data); - // keep track of the last split $lastPosition = 0; for ($x = 1; $lastPosition < $dataLength; $x++) { - // already in the boundary limits? if( ($dataLength-$lastPosition) >= $googleLimit){ - // search for the last occurence of | in the boundary limits $pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|'); $dataSplit = substr($data, $lastPosition, $pipePosition); - // without trailing pipe at the beginning of next round $lastPosition = $lastPosition + $pipePosition+1; }else{ - // Rest of the regex (no pipe at the end) $dataSplit = substr($data, $lastPosition); $lastPosition = $dataLength; // Break } - $file = '/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/google-exclude-0' . $x . '.txt'; - $this->writeToFile($file, $dataSplit); + $this->writeToFile('google-exclude-0' . $x . '.txt', $dataSplit); } } } -$generator = new Generate(); +$generator = new Generator(); $generator->generateFiles(); \ No newline at end of file