Fix robots.txt generator.

This commit is contained in:
Mitchell Krog 2017-07-21 14:55:53 +02:00
parent bc958f5a8c
commit 1224b15c51
6 changed files with 29 additions and 1 deletions

View file

View file

@ -33,7 +33,7 @@
# Set Input Files # Set Input Files
# *************** # ***************
_input1=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list _input1=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt
_tmprobots=/tmp/robots.txt _tmprobots=/tmp/robots.txt
# ****************** # ******************

View file

@ -125,6 +125,7 @@ sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/generate-regex-format-referrers.php
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-files-and-commit.sh sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/modify-files-and-commit.sh
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/run-curl-tests.sh sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/run-curl-tests.sh
sudo chmod +x $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh
# ***************************************************************************************** # *****************************************************************************************
# Travis now moves into running the rest of the tests in the script: section of .travis.yml # Travis now moves into running the rest of the tests in the script: section of .travis.yml

View file

@ -75,6 +75,7 @@ git checkout master
php ./.dev-tools/generate-regex-format-referrers.php php ./.dev-tools/generate-regex-format-referrers.php
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-blacklist.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-blacklist.sh
sudo $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/modify-config-readme-files.sh
sudo $TRAVIS_BUILD_DIR/.dev-tools/prepare-robots-input.sh
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-robots.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-robots.sh
sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-google-disavow.sh sudo $TRAVIS_BUILD_DIR/.dev-tools/generate-google-disavow.sh
php ./.dev-tools/generate-google-exclude.php php ./.dev-tools/generate-google-exclude.php

View file

@ -0,0 +1,26 @@
#!/bin/bash
# Sort Domain into a plain text file with domain names only
# Created by: Mitchell Krog (mitchellkrog@gmail.com)
# Copyright: Mitchell Krog - https://github.com/mitchellkrogza
# Repo Url: https://github.com/mitchellkrogza/The-Big-List-of-Hacked-Malware-Web-Sites
#Specify Input and Output File
# ****************************
_input=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list
_output=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt
# Truncate our file
# *****************
sudo truncate -s 0 $_output
# Use sed to strip the \ out of the input file
# ***************************************************
sed 's/\\/ /g' $_input > $_output
# Use cut to strip the domains out of the url strings
# ***************************************************
#cut -d'\' -f3 $_input > $_output
# Sort our output file and remove dupes
# *************************************
sort -u $_output -o $_output