mirror of
https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker.git
synced 2025-09-10 15:34:24 +00:00
Fix robots.txt generator.
This commit is contained in:
parent
bc958f5a8c
commit
1224b15c51
6 changed files with 29 additions and 1 deletions
0
.dev-tools/_robots_input/.keep
Normal file
0
.dev-tools/_robots_input/.keep
Normal file
0
.dev-tools/_robots_input/robots-input.txt
Normal file
0
.dev-tools/_robots_input/robots-input.txt
Normal file
|
@ -33,7 +33,7 @@
|
|||
# Set Input Files
|
||||
# ***************
|
||||
|
||||
_input1=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list
|
||||
_input1=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt
|
||||
_tmprobots=/tmp/robots.txt
|
||||
|
||||
# ******************
|
||||
|
|
|
@ -125,6 +125,7 @@ sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/generate-regex-format-referrers.php
|
|||
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh
|
||||
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-files-and-commit.sh
|
||||
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/run-curl-tests.sh
|
||||
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh
|
||||
|
||||
# *****************************************************************************************
|
||||
# Travis now moves into running the rest of the tests in the script: section of .travis.yml
|
||||
|
|
|
@ -75,6 +75,7 @@ git checkout master
|
|||
php ./.dev-tools/generate-regex-format-referrers.php
|
||||
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-blacklist.sh
|
||||
sudo $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh
|
||||
sudo $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh
|
||||
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-robots.sh
|
||||
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-google-disavow.sh
|
||||
php ./.dev-tools/generate-google-exclude.php
|
||||
|
|
26
.dev-tools/prepare-robots-input.sh
Executable file
26
.dev-tools/prepare-robots-input.sh
Executable file
|
@ -0,0 +1,26 @@
|
|||
#!/bin/bash
|
||||
# Sort Domain into a plain text file with domain names only
|
||||
# Created by: Mitchell Krog (mitchellkrog@gmail.com)
|
||||
# Copyright: Mitchell Krog - https://github.com/mitchellkrogza
|
||||
# Repo Url: https://github.com/mitchellkrogza/The-Big-List-of-Hacked-Malware-Web-Sites
|
||||
|
||||
#Specify Input and Output File
|
||||
# ****************************
|
||||
_input=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list
|
||||
_output=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt
|
||||
|
||||
# Truncate our file
|
||||
# *****************
|
||||
sudo truncate -s 0 $_output
|
||||
|
||||
# Use sed to strip the \ out of the input file
|
||||
# ***************************************************
|
||||
sed 's/\\/ /g' $_input > $_output
|
||||
|
||||
# Use cut to strip the domains out of the url strings
|
||||
# ***************************************************
|
||||
#cut -d'\' -f3 $_input > $_output
|
||||
|
||||
# Sort our output file and remove dupes
|
||||
# *************************************
|
||||
sort -u $_output -o $_output
|
Loading…
Add table
Add a link
Reference in a new issue