nginx-ultimate-bad-bot-blocker/.dev-tools/prepare-robots-input.sh
2017-08-21 11:34:24 +02:00

47 lines
2.1 KiB
Bash
Executable file

#!/bin/bash
# Sort Domain into a plain text file with domain names only
# Created by: Mitchell Krog (mitchellkrog@gmail.com)
# Copyright: Mitchell Krog - https://github.com/mitchellkrogza
# Repo Url: https://github.com/mitchellkrogza/The-Big-List-of-Hacked-Malware-Web-Sites
##############################################################################
# _ __ _ #
# / |/ /__ _(_)__ __ __ #
# / / _ `/ / _ \\ \ / #
# /_/|_/\_, /_/_//_/_\_\ #
# __/___/ __ ___ __ ___ __ __ #
# / _ )___ ____/ / / _ )___ / /_ / _ )/ /__ ____/ /_____ ____ #
# / _ / _ `/ _ / / _ / _ \/ __/ / _ / / _ \/ __/ '_/ -_) __/ #
# /____/\_,_/\_,_/ /____/\___/\__/ /____/_/\___/\__/_/\_\\__/_/ #
# #
##############################################################################
# ******************************
# Specify Input and Output Files
# ******************************
_input=$TRAVIS_BUILD_DIR/_generator_lists/bad-user-agents.list
_output=$TRAVIS_BUILD_DIR/.dev-tools/_robots_input/robots-input.txt
# *****************
# Truncate our file
# *****************
sudo truncate -s 0 $_output
# ********************************************
# Use sed to strip the \ out of the input file
# ********************************************
sed 's/\\ / /g' $_input > $_output
# ***************************************************
# Use cut to strip the domains out of the url strings
# ***************************************************
#cut -d'\' -f3 $_input > $_output
# *************************************
# Sort our output file and remove dupes
# *************************************
sort -u $_output -o $_output