diff --git a/.dev-tools/_robots_input/.keep b/.dev-tools/_robots_input/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/.dev-tools/_robots_input/robots-input.txt b/.dev-tools/_robots_input/robots-input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/.dev-tools/generate-robots.sh b/.dev-tools/generate-robots.sh index e9b3a4a73..abeb87115 100755 --- a/.dev-tools/generate-robots.sh +++ b/.dev-tools/generate-robots.sh @@ -33,7 +33,7 @@ # Set Input Files # *************** -_input1=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list +_input1=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt _tmprobots=/tmp/robots.txt # ****************** diff --git a/.dev-tools/install-nginx.sh b/.dev-tools/install-nginx.sh index 314f9a191..d2ac456ff 100755 --- a/.dev-tools/install-nginx.sh +++ b/.dev-tools/install-nginx.sh @@ -125,6 +125,7 @@ sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/generate-regex-format-referrers.php sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-files-and-commit.sh sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/run-curl-tests.sh +sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh # ***************************************************************************************** # Travis now moves into running the rest of the tests in the script: section of .travis.yml diff --git a/.dev-tools/modify-files-and-commit.sh b/.dev-tools/modify-files-and-commit.sh index fed0209fc..b69e48447 100755 --- a/.dev-tools/modify-files-and-commit.sh +++ b/.dev-tools/modify-files-and-commit.sh @@ -75,6 +75,7 @@ git checkout master php ./.dev-tools/generate-regex-format-referrers.php sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-blacklist.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh +sudo $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-robots.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-google-disavow.sh php ./.dev-tools/generate-google-exclude.php diff --git a/.dev-tools/prepare-robots-input.sh b/.dev-tools/prepare-robots-input.sh new file mode 100755 index 000000000..366002085 --- /dev/null +++ b/.dev-tools/prepare-robots-input.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Sort Domain into a plain text file with domain names only +# Created by: Mitchell Krog (mitchellkrog@gmail.com) +# Copyright: Mitchell Krog - https://github.com/mitchellkrogza +# Repo Url: https://github.com/mitchellkrogza/The-Big-List-of-Hacked-Malware-Web-Sites + +#Specify Input and Output File +# **************************** +_input=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list +_output=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt + +# Truncate our file +# ***************** +sudo truncate -s 0 $_output + +# Use sed to strip the \ out of the input file +# *************************************************** +sed 's/\\/ /g' $_input > $_output + +# Use cut to strip the domains out of the url strings +# *************************************************** +#cut -d'\' -f3 $_input > $_output + +# Sort our output file and remove dupes +# ************************************* +sort -u $_output -o $_output