From 24df6f7b78c025d5d1952c4d710b7966d13561a6 Mon Sep 17 00:00:00 2001 From: Mitchell Krog Date: Fri, 21 Jun 2019 08:45:37 +0200 Subject: [PATCH] Fix Regex and User-Agent Logic Order of Test Unit REF: #264 --- .../generate-blacklist-testing-of-changes.sh | 2 +- .dev-tools/globalblacklist-testing.template | 164 +++++++++--------- .travis.yml | 4 +- 3 files changed, 87 insertions(+), 83 deletions(-) diff --git a/.dev-tools/generate-blacklist-testing-of-changes.sh b/.dev-tools/generate-blacklist-testing-of-changes.sh index 80bbffda9..60a8e0e82 100755 --- a/.dev-tools/generate-blacklist-testing-of-changes.sh +++ b/.dev-tools/generate-blacklist-testing-of-changes.sh @@ -237,7 +237,7 @@ rm ${_inputdb3} printf '%s\n' "$_start4" >> ${_tmpnginx4} while IFS= read -r LINE do -printf '\t"~*%s%s"\t\t%s\n' "/${LINE}" "?(\b)/" "$_action4" >> ${_tmpnginx4} +printf '\t"~*%s%s"\t\t%s\n' "${LINE}" "(?:\.|\b|\/)" "$_action4" >> ${_tmpnginx4} #printf '\t"~*%s"\t\t%s\n' "${LINE}" "$_action4" >> ${_tmpnginx4} done < ${_input4} printf '%s\n' "$_end4" >> ${_tmpnginx4} diff --git a/.dev-tools/globalblacklist-testing.template b/.dev-tools/globalblacklist-testing.template index ab897a1ec..80f3d0a59 100644 --- a/.dev-tools/globalblacklist-testing.template +++ b/.dev-tools/globalblacklist-testing.template @@ -130,86 +130,6 @@ map $http_user_agent $bad_bot { # END CUSTOM BLACKLISTED USER AGENTS ### DO NOT EDIT OR REMOVE THIS LINE AT ALL ### # ********************************************************************************* - -# *********************************************** -# Allow Good User-Agent Strings We Know and Trust -# *********************************************** - -# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*\badidxbot\b" 0; - "~*\bAdsBot-Google\b" 0; - "~*\baolbuild\b" 0; - "~*\bbingbot\b" 0; - "~*\bbingpreview\b" 0; - "~*\bDoCoMo\b" 0; - "~*\bduckduckgo\b" 0; - "~*\bfacebookexternalhit\b" 0; - "~*\bFeedfetcher-Google\b" 0; - "~*\bGooglebot\b" 0; - "~*\bGooglebot-Image\b" 0; - "~*\bGooglebot-Mobile\b" 0; - "~*\bGooglebot-News\b" 0; - "~*\bGooglebot/Test\b" 0; - "~*\bGooglebot-Video\b" 0; - "~*\bGoogle-HTTP-Java-Client\b" 0; - "~*\bGravityscan\b" 0; - "~*\bgsa-crawler\b" 0; - "~*\bJakarta\ Commons\b" 0; - "~*\bKraken/0.1\b" 0; - "~*\bLinkedInBot\b" 0; - "~*\bMediapartners-Google\b" 0; - "~*\bmsnbot\b" 0; - "~*\bmsnbot-media\b" 0; - "~*\bSAMSUNG\b" 0; - "~*\bSlackbot\b" 0; - "~*\bSlackbot-LinkExpanding\b" 0; - "~*\bslurp\b" 0; - "~*\bteoma\b" 0; - "~*\bTwitterBot\b" 0; - "~*\bWordpress\b" 0; - "~*\byahoo\b" 0; -# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# *************************************************** -# User-Agent Strings Allowed Through but Rate Limited -# *************************************************** - -# Some people block libwww-perl, it used widely in many valid (non rogue) agents -# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited - -# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*\bjetmon\b" 1; - "~*\blibwww-perl\b" 1; - "~*\bLynx\b" 1; - "~*\bmunin\b" 1; - "~*\bPresto\b" 1; - "~*\bWget/1.15\b" 1; -# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# ************************************************************** -# Rate Limited User-Agents who get a bit aggressive on bandwidth -# ************************************************************** - -# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*\bAlexa\b" 2; - "~*\barchive.org\b" 2; - "~*\bBaidu\b" 2; - "~*\bBUbiNG\b" 2; - "~*\bFlipboardProxy\b" 2; - "~*\bia_archiver\b" 2; - "~*\bMSIE\ 7.0\b" 2; - "~*\bProximic\b" 2; - "~*\bR6_CommentReader\b" 2; - "~*\bR6_FeedFetcher\b" 2; - "~*\bRED/1\b" 2; - "~*\bRPT-HTTPClient\b" 2; - "~*\bsfFeedReader/0.9\b" 2; - "~*\bSpaidu\b" 2; - "~*\bUptimeRobot/2.0\b" 2; - "~*\bYandexBot\b" 2; - "~*\bYandexImages\b" 2; -# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - # ********************************************* # Bad User-Agent Strings That We Block Outright # ********************************************* @@ -776,6 +696,90 @@ map $http_user_agent $bad_bot { "~*/ZumBot?(\b)/" 3; "~*/ZyBorg?(\b)/" 3; # END BAD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# *********************************************** +# Allow Good User-Agent Strings We Know and Trust +# *********************************************** + +# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*\badidxbot\b" 0; + "~*\bAdsBot-Google\b" 0; + "~*\baolbuild\b" 0; + "~*\bbingbot\b" 0; + "~*\bbingpreview\b" 0; + "~*\bDoCoMo\b" 0; + "~*\bduckduckgo\b" 0; + "~*\bfacebookexternalhit\b" 0; + "~*\bFeedfetcher-Google\b" 0; + "~*\bGooglebot\b" 0; + "~*\bGooglebot-Image\b" 0; + "~*\bGooglebot-Mobile\b" 0; + "~*\bGooglebot-News\b" 0; + "~*\bGooglebot/Test\b" 0; + "~*\bGooglebot-Video\b" 0; + "~*\bGoogle-HTTP-Java-Client\b" 0; + "~*\bGravityscan\b" 0; + "~*\bgsa-crawler\b" 0; + "~*\bJakarta\ Commons\b" 0; + "~*\bKraken/0.1\b" 0; + "~*\bLinkedInBot\b" 0; + "~*\bMediapartners-Google\b" 0; + "~*\bmsnbot\b" 0; + "~*\bmsnbot-media\b" 0; + "~*\bSAMSUNG\b" 0; + "~*\bSlackbot\b" 0; + "~*\bSlackbot-LinkExpanding\b" 0; + "~*\bslurp\b" 0; + "~*\bteoma\b" 0; + "~*\bTwitterBot\b" 0; + "~*\bWordpress\b" 0; + "~*\byahoo\b" 0; +# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# *************************************************** +# User-Agent Strings Allowed Through but Rate Limited +# *************************************************** + +# Some people block libwww-perl, it used widely in many valid (non rogue) agents +# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited + +# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*\bjetmon\b" 1; + "~*\blibwww-perl\b" 1; + "~*\bLynx\b" 1; + "~*\bmunin\b" 1; + "~*\bPresto\b" 1; + "~*\bWget/1.15\b" 1; +# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# ************************************************************** +# Rate Limited User-Agents who get a bit aggressive on bandwidth +# ************************************************************** + +# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*\bAlexa\b" 2; + "~*\barchive.org\b" 2; + "~*\bBaidu\b" 2; + "~*\bBUbiNG\b" 2; + "~*\bFlipboardProxy\b" 2; + "~*\bia_archiver\b" 2; + "~*\bMSIE\ 7.0\b" 2; + "~*\bProximic\b" 2; + "~*\bR6_CommentReader\b" 2; + "~*\bR6_FeedFetcher\b" 2; + "~*\bRED/1\b" 2; + "~*\bRPT-HTTPClient\b" 2; + "~*\bsfFeedReader/0.9\b" 2; + "~*\bSpaidu\b" 2; + "~*\bUptimeRobot/2.0\b" 2; + "~*\bYandexBot\b" 2; + "~*\bYandexImages\b" 2; +# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + + + + + } diff --git a/.travis.yml b/.travis.yml index 4bf51bc50..616998b94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,8 +42,8 @@ script: - bash .dev-tools/run-curl-tests-1.sh - bash .dev-tools/install-nginx-testing-of-changes.sh - bash .dev-tools/run-curl-tests-testing-of-changes.sh - - bash .dev-tools/install-nginx-2.sh - - bash .dev-tools/run-curl-tests-2.sh + #- bash .dev-tools/install-nginx-2.sh + #- bash .dev-tools/run-curl-tests-2.sh #- bash .dev-tools/install-nginx-3.sh #- bash .dev-tools/run-curl-tests-3.sh #- bash .dev-tools/install-nginx-4.sh