mirror of
https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker.git
synced 2025-09-02 18:50:13 +00:00
Test new updated generate-google-exclude file generator
This commit is contained in:
parent
1dcae2a913
commit
26fe753787
2 changed files with 123 additions and 24 deletions
98
travisCI/_backup/generate-google-exclude.php
Executable file
98
travisCI/_backup/generate-google-exclude.php
Executable file
|
@ -0,0 +1,98 @@
|
||||||
|
<?php
|
||||||
|
/* Google Exclude File Generator Script for the Nginx Ultimate Bad Bot Blocker
|
||||||
|
* Created by: Mitchell Krog (mitchellkrog@gmail.com)
|
||||||
|
* Copyright: Mitchell Krog - https://github.com/mitchellkrogza
|
||||||
|
* Repo Url: https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker
|
||||||
|
* MIT License
|
||||||
|
* Copyright (c) 2017 Mitchell Krog - mitchellkrog@gmail.com
|
||||||
|
*/
|
||||||
|
class Generate
|
||||||
|
{
|
||||||
|
|
||||||
|
private $projectUrl = "https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker";
|
||||||
|
public function generateFiles()
|
||||||
|
{
|
||||||
|
date_default_timezone_set('Africa/Johannesburg');
|
||||||
|
$date = date('Y-m-d H:i:s');
|
||||||
|
$lines = $this->domainWorker();
|
||||||
|
$this->createGoogleExclude($lines);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function domainWorker()
|
||||||
|
{
|
||||||
|
$domainsFile = "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list";
|
||||||
|
$handle = fopen($domainsFile, "r");
|
||||||
|
if (!$handle) {
|
||||||
|
throw new \RuntimeException('Error opening file ' . $domainsFile);
|
||||||
|
}
|
||||||
|
$lines = array();
|
||||||
|
while (($line = fgets($handle)) !== false) {
|
||||||
|
$line = trim(preg_replace('/\s\s+/', ' ', $line));
|
||||||
|
// convert russian domains
|
||||||
|
if (preg_match('/[А-Яа-яЁё]/u', $line)) {
|
||||||
|
$IDN = new IdnaConvert();
|
||||||
|
$line = $IDN->encode($line);
|
||||||
|
}
|
||||||
|
if (empty($line)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$lines[] = $line;
|
||||||
|
}
|
||||||
|
fclose($handle);
|
||||||
|
$uniqueLines = array_unique($lines, SORT_STRING);
|
||||||
|
sort($uniqueLines, SORT_STRING);
|
||||||
|
if (is_writable($domainsFile)) {
|
||||||
|
file_put_contents($domainsFile, implode("\n", $uniqueLines));
|
||||||
|
} else {
|
||||||
|
trigger_error("Permission denied");
|
||||||
|
}
|
||||||
|
return $lines;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @param $file
|
||||||
|
* @param $data
|
||||||
|
*/
|
||||||
|
protected function writeToFile($file, $data)
|
||||||
|
{
|
||||||
|
if (is_writable($file)) {
|
||||||
|
file_put_contents($file, $data);
|
||||||
|
if (!chmod($file, 0755)) {
|
||||||
|
trigger_error("Couldn't not set " . basename($file) . " permissions to 755");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
trigger_error("Permission denied");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public function createGoogleExclude(array $lines)
|
||||||
|
{
|
||||||
|
$regexLines = [];
|
||||||
|
foreach ($lines as $line) {
|
||||||
|
$regexLines[] = preg_quote($line);
|
||||||
|
}
|
||||||
|
$data = implode('|', $regexLines);
|
||||||
|
$googleLimit = 30000;
|
||||||
|
$dataLength = strlen($data);
|
||||||
|
// keep track of the last split
|
||||||
|
$lastPosition = 0;
|
||||||
|
for ($x = 1; $lastPosition < $dataLength; $x++) {
|
||||||
|
// already in the boundary limits?
|
||||||
|
if( ($dataLength-$lastPosition) >= $googleLimit){
|
||||||
|
// search for the last occurence of | in the boundary limits
|
||||||
|
$pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|');
|
||||||
|
$dataSplit = substr($data, $lastPosition, $pipePosition);
|
||||||
|
// without trailing pipe at the beginning of next round
|
||||||
|
$lastPosition = $lastPosition + $pipePosition+1;
|
||||||
|
}else{
|
||||||
|
// Rest of the regex (no pipe at the end)
|
||||||
|
$dataSplit = substr($data, $lastPosition);
|
||||||
|
$lastPosition = $dataLength; // Break
|
||||||
|
}
|
||||||
|
$file = '/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/google-exclude-0' . $x . '.txt';
|
||||||
|
$this->writeToFile($file, $dataSplit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$generator = new Generate();
|
||||||
|
$generator->generateFiles();
|
43
travisCI/generate-google-exclude.php
Executable file → Normal file
43
travisCI/generate-google-exclude.php
Executable file → Normal file
|
@ -6,10 +6,14 @@
|
||||||
* MIT License
|
* MIT License
|
||||||
* Copyright (c) 2017 Mitchell Krog - mitchellkrog@gmail.com
|
* Copyright (c) 2017 Mitchell Krog - mitchellkrog@gmail.com
|
||||||
*/
|
*/
|
||||||
class Generate
|
namespace mitchellkrogza;
|
||||||
{
|
|
||||||
|
|
||||||
|
use Mso\IdnaConvert\IdnaConvert;
|
||||||
|
|
||||||
|
class Generator
|
||||||
|
{
|
||||||
private $projectUrl = "https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker";
|
private $projectUrl = "https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker";
|
||||||
|
|
||||||
public function generateFiles()
|
public function generateFiles()
|
||||||
{
|
{
|
||||||
date_default_timezone_set('Africa/Johannesburg');
|
date_default_timezone_set('Africa/Johannesburg');
|
||||||
|
@ -18,11 +22,12 @@ class Generate
|
||||||
$this->createGoogleExclude($lines);
|
$this->createGoogleExclude($lines);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
|
* Open our input domain list and create our array
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function domainWorker()
|
public function domainWorker()
|
||||||
{
|
{
|
||||||
$domainsFile = "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list";
|
$domainsFile = __DIR__ . "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list";
|
||||||
$handle = fopen($domainsFile, "r");
|
$handle = fopen($domainsFile, "r");
|
||||||
if (!$handle) {
|
if (!$handle) {
|
||||||
throw new \RuntimeException('Error opening file ' . $domainsFile);
|
throw new \RuntimeException('Error opening file ' . $domainsFile);
|
||||||
|
@ -50,21 +55,23 @@ class Generate
|
||||||
}
|
}
|
||||||
return $lines;
|
return $lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $file
|
* Write to File Function
|
||||||
|
* @param $filename
|
||||||
* @param $data
|
* @param $data
|
||||||
*/
|
*/
|
||||||
protected function writeToFile($file, $data)
|
protected function writeToFile($filename, $data)
|
||||||
{
|
{
|
||||||
if (is_writable($file)) {
|
$file = __DIR__ . "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/$filename";
|
||||||
file_put_contents($file, $data);
|
$handle = fopen($file, 'w') or die('Cannot open file: '.$file);
|
||||||
if (!chmod($file, 0755)) {
|
fwrite($handle, $data);
|
||||||
trigger_error("Couldn't not set " . basename($file) . " permissions to 755");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
trigger_error("Permission denied");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Google Exclude Files Splitting them at Google's 30,000 Character Limit
|
||||||
|
* @param $lines
|
||||||
|
*/
|
||||||
public function createGoogleExclude(array $lines)
|
public function createGoogleExclude(array $lines)
|
||||||
{
|
{
|
||||||
$regexLines = [];
|
$regexLines = [];
|
||||||
|
@ -74,25 +81,19 @@ class Generate
|
||||||
$data = implode('|', $regexLines);
|
$data = implode('|', $regexLines);
|
||||||
$googleLimit = 30000;
|
$googleLimit = 30000;
|
||||||
$dataLength = strlen($data);
|
$dataLength = strlen($data);
|
||||||
// keep track of the last split
|
|
||||||
$lastPosition = 0;
|
$lastPosition = 0;
|
||||||
for ($x = 1; $lastPosition < $dataLength; $x++) {
|
for ($x = 1; $lastPosition < $dataLength; $x++) {
|
||||||
// already in the boundary limits?
|
|
||||||
if( ($dataLength-$lastPosition) >= $googleLimit){
|
if( ($dataLength-$lastPosition) >= $googleLimit){
|
||||||
// search for the last occurence of | in the boundary limits
|
|
||||||
$pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|');
|
$pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|');
|
||||||
$dataSplit = substr($data, $lastPosition, $pipePosition);
|
$dataSplit = substr($data, $lastPosition, $pipePosition);
|
||||||
// without trailing pipe at the beginning of next round
|
|
||||||
$lastPosition = $lastPosition + $pipePosition+1;
|
$lastPosition = $lastPosition + $pipePosition+1;
|
||||||
}else{
|
}else{
|
||||||
// Rest of the regex (no pipe at the end)
|
|
||||||
$dataSplit = substr($data, $lastPosition);
|
$dataSplit = substr($data, $lastPosition);
|
||||||
$lastPosition = $dataLength; // Break
|
$lastPosition = $dataLength; // Break
|
||||||
}
|
}
|
||||||
$file = '/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/google-exclude-0' . $x . '.txt';
|
$this->writeToFile('google-exclude-0' . $x . '.txt', $dataSplit);
|
||||||
$this->writeToFile($file, $dataSplit);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$generator = new Generate();
|
$generator = new Generator();
|
||||||
$generator->generateFiles();
|
$generator->generateFiles();
|
Loading…
Add table
Reference in a new issue