mirror of
https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker.git
synced 2025-09-04 11:40:22 +00:00
Clean up generate-blacklist script.
This commit is contained in:
parent
14168949a8
commit
84be6acbe4
2 changed files with 0 additions and 123 deletions
|
@ -1,98 +0,0 @@
|
|||
<?php
|
||||
/* Google Exclude File Generator Script for the Nginx Ultimate Bad Bot Blocker
|
||||
* Created by: Mitchell Krog (mitchellkrog@gmail.com)
|
||||
* Copyright: Mitchell Krog - https://github.com/mitchellkrogza
|
||||
* Repo Url: https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker
|
||||
* MIT License
|
||||
* Copyright (c) 2017 Mitchell Krog - mitchellkrog@gmail.com
|
||||
*/
|
||||
class Generate
|
||||
{
|
||||
|
||||
private $projectUrl = "https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker";
|
||||
public function generateFiles()
|
||||
{
|
||||
date_default_timezone_set('Africa/Johannesburg');
|
||||
$date = date('Y-m-d H:i:s');
|
||||
$lines = $this->domainWorker();
|
||||
$this->createGoogleExclude($lines);
|
||||
}
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function domainWorker()
|
||||
{
|
||||
$domainsFile = "/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/_generator_lists/bad-referrers.list";
|
||||
$handle = fopen($domainsFile, "r");
|
||||
if (!$handle) {
|
||||
throw new \RuntimeException('Error opening file ' . $domainsFile);
|
||||
}
|
||||
$lines = array();
|
||||
while (($line = fgets($handle)) !== false) {
|
||||
$line = trim(preg_replace('/\s\s+/', ' ', $line));
|
||||
// convert russian domains
|
||||
if (preg_match('/[А-Яа-яЁё]/u', $line)) {
|
||||
$IDN = new IdnaConvert();
|
||||
$line = $IDN->encode($line);
|
||||
}
|
||||
if (empty($line)) {
|
||||
continue;
|
||||
}
|
||||
$lines[] = $line;
|
||||
}
|
||||
fclose($handle);
|
||||
$uniqueLines = array_unique($lines, SORT_STRING);
|
||||
sort($uniqueLines, SORT_STRING);
|
||||
if (is_writable($domainsFile)) {
|
||||
file_put_contents($domainsFile, implode("\n", $uniqueLines));
|
||||
} else {
|
||||
trigger_error("Permission denied");
|
||||
}
|
||||
return $lines;
|
||||
}
|
||||
/**
|
||||
* @param $file
|
||||
* @param $data
|
||||
*/
|
||||
protected function writeToFile($file, $data)
|
||||
{
|
||||
if (is_writable($file)) {
|
||||
file_put_contents($file, $data);
|
||||
if (!chmod($file, 0755)) {
|
||||
trigger_error("Couldn't not set " . basename($file) . " permissions to 755");
|
||||
}
|
||||
} else {
|
||||
trigger_error("Permission denied");
|
||||
}
|
||||
}
|
||||
public function createGoogleExclude(array $lines)
|
||||
{
|
||||
$regexLines = [];
|
||||
foreach ($lines as $line) {
|
||||
$regexLines[] = preg_quote($line);
|
||||
}
|
||||
$data = implode('|', $regexLines);
|
||||
$googleLimit = 30000;
|
||||
$dataLength = strlen($data);
|
||||
// keep track of the last split
|
||||
$lastPosition = 0;
|
||||
for ($x = 1; $lastPosition < $dataLength; $x++) {
|
||||
// already in the boundary limits?
|
||||
if( ($dataLength-$lastPosition) >= $googleLimit){
|
||||
// search for the last occurence of | in the boundary limits
|
||||
$pipePosition = strrpos(substr($data, $lastPosition, $googleLimit), '|');
|
||||
$dataSplit = substr($data, $lastPosition, $pipePosition);
|
||||
// without trailing pipe at the beginning of next round
|
||||
$lastPosition = $lastPosition + $pipePosition+1;
|
||||
}else{
|
||||
// Rest of the regex (no pipe at the end)
|
||||
$dataSplit = substr($data, $lastPosition);
|
||||
$lastPosition = $dataLength; // Break
|
||||
}
|
||||
$file = '/home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/google-exclude-0' . $x . '.txt';
|
||||
$this->writeToFile($file, $dataSplit);
|
||||
}
|
||||
}
|
||||
}
|
||||
$generator = new Generate();
|
||||
$generator->generateFiles();
|
|
@ -49,7 +49,6 @@ _input1=$TRAVIS_BUILD_DIR/_generator_lists/good-user-agents.list
|
|||
_input2=$TRAVIS_BUILD_DIR/_generator_lists/allowed-user-agents.list
|
||||
_input3=$TRAVIS_BUILD_DIR/_generator_lists/limited-user-agents.list
|
||||
_input4=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list
|
||||
#_input5=$TRAVIS_BUILD_DIR/_generator_lists/bad-referrers.list
|
||||
_input5=$TRAVIS_BUILD_DIR/travisCI/referrers-regex-format.txt
|
||||
_input6=$TRAVIS_BUILD_DIR/_generator_lists/google-ip-ranges.list
|
||||
_input7=$TRAVIS_BUILD_DIR/_generator_lists/bing-ip-ranges.list
|
||||
|
@ -244,30 +243,6 @@ rm $_inputdb4
|
|||
# BAD REFERERS - Create and Insert
|
||||
# ********************************
|
||||
|
||||
#printf '%s\n' "$_start5" >> "$_tmpnginx5"
|
||||
#while IFS= read -r LINE
|
||||
#do
|
||||
#printf '\t"~*%s"\t\t%s\n' "${LINE}" "$_action2" >> "$_tmpnginx5"
|
||||
#done < $_input5
|
||||
#printf '%s\n' "$_end5" >> "$_tmpnginx5"
|
||||
#mv $_tmpnginx5 $_inputdb5
|
||||
#ed -s $_inputdb5<<\IN
|
||||
#1,/# START BAD REFERRERS ### DO NOT EDIT THIS LINE AT ALL ###/d
|
||||
#/# END BAD REFERRERS ### DO NOT EDIT THIS LINE AT ALL ###/,$d
|
||||
#,d
|
||||
#.r /home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/travisCI/globalblacklist.template
|
||||
#/# START BAD REFERRERS ### DO NOT EDIT THIS LINE AT ALL ###/x
|
||||
#.t.
|
||||
#.,/# END BAD REFERRERS ### DO NOT EDIT THIS LINE AT ALL ###/-d
|
||||
#w /home/travis/build/mitchellkrogza/nginx-ultimate-bad-bot-blocker/travisCI/globalblacklist.template
|
||||
#q
|
||||
#IN
|
||||
#rm $_inputdb5
|
||||
|
||||
# ********************************
|
||||
# BAD REFERERS - Create and Insert
|
||||
# ********************************
|
||||
|
||||
printf '%s\n' "$_start5" >> "$_tmpnginx5"
|
||||
while IFS= read -r LINE
|
||||
do
|
||||
|
|
Loading…
Add table
Reference in a new issue