Fix Regex and User-Agent Logic Order of Test Unit REF: #264

This commit is contained in:
Mitchell Krog 2019-06-21 08:45:37 +02:00
parent 8ebc9af274
commit 24df6f7b78
No known key found for this signature in database
GPG key ID: C243C388553EDE5D
3 changed files with 87 additions and 83 deletions

View file

@ -237,7 +237,7 @@ rm ${_inputdb3}
printf '%s\n' "$_start4" >> ${_tmpnginx4}
while IFS= read -r LINE
do
printf '\t"~*%s%s"\t\t%s\n' "/${LINE}" "?(\b)/" "$_action4" >> ${_tmpnginx4}
printf '\t"~*%s%s"\t\t%s\n' "${LINE}" "(?:\.|\b|\/)" "$_action4" >> ${_tmpnginx4}
#printf '\t"~*%s"\t\t%s\n' "${LINE}" "$_action4" >> ${_tmpnginx4}
done < ${_input4}
printf '%s\n' "$_end4" >> ${_tmpnginx4}

View file

@ -130,86 +130,6 @@ map $http_user_agent $bad_bot {
# END CUSTOM BLACKLISTED USER AGENTS ### DO NOT EDIT OR REMOVE THIS LINE AT ALL ###
# *********************************************************************************
# ***********************************************
# Allow Good User-Agent Strings We Know and Trust
# ***********************************************
# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\badidxbot\b" 0;
"~*\bAdsBot-Google\b" 0;
"~*\baolbuild\b" 0;
"~*\bbingbot\b" 0;
"~*\bbingpreview\b" 0;
"~*\bDoCoMo\b" 0;
"~*\bduckduckgo\b" 0;
"~*\bfacebookexternalhit\b" 0;
"~*\bFeedfetcher-Google\b" 0;
"~*\bGooglebot\b" 0;
"~*\bGooglebot-Image\b" 0;
"~*\bGooglebot-Mobile\b" 0;
"~*\bGooglebot-News\b" 0;
"~*\bGooglebot/Test\b" 0;
"~*\bGooglebot-Video\b" 0;
"~*\bGoogle-HTTP-Java-Client\b" 0;
"~*\bGravityscan\b" 0;
"~*\bgsa-crawler\b" 0;
"~*\bJakarta\ Commons\b" 0;
"~*\bKraken/0.1\b" 0;
"~*\bLinkedInBot\b" 0;
"~*\bMediapartners-Google\b" 0;
"~*\bmsnbot\b" 0;
"~*\bmsnbot-media\b" 0;
"~*\bSAMSUNG\b" 0;
"~*\bSlackbot\b" 0;
"~*\bSlackbot-LinkExpanding\b" 0;
"~*\bslurp\b" 0;
"~*\bteoma\b" 0;
"~*\bTwitterBot\b" 0;
"~*\bWordpress\b" 0;
"~*\byahoo\b" 0;
# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# ***************************************************
# User-Agent Strings Allowed Through but Rate Limited
# ***************************************************
# Some people block libwww-perl, it used widely in many valid (non rogue) agents
# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited
# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\bjetmon\b" 1;
"~*\blibwww-perl\b" 1;
"~*\bLynx\b" 1;
"~*\bmunin\b" 1;
"~*\bPresto\b" 1;
"~*\bWget/1.15\b" 1;
# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# **************************************************************
# Rate Limited User-Agents who get a bit aggressive on bandwidth
# **************************************************************
# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\bAlexa\b" 2;
"~*\barchive.org\b" 2;
"~*\bBaidu\b" 2;
"~*\bBUbiNG\b" 2;
"~*\bFlipboardProxy\b" 2;
"~*\bia_archiver\b" 2;
"~*\bMSIE\ 7.0\b" 2;
"~*\bProximic\b" 2;
"~*\bR6_CommentReader\b" 2;
"~*\bR6_FeedFetcher\b" 2;
"~*\bRED/1\b" 2;
"~*\bRPT-HTTPClient\b" 2;
"~*\bsfFeedReader/0.9\b" 2;
"~*\bSpaidu\b" 2;
"~*\bUptimeRobot/2.0\b" 2;
"~*\bYandexBot\b" 2;
"~*\bYandexImages\b" 2;
# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# *********************************************
# Bad User-Agent Strings That We Block Outright
# *********************************************
@ -776,6 +696,90 @@ map $http_user_agent $bad_bot {
"~*/ZumBot?(\b)/" 3;
"~*/ZyBorg?(\b)/" 3;
# END BAD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# ***********************************************
# Allow Good User-Agent Strings We Know and Trust
# ***********************************************
# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\badidxbot\b" 0;
"~*\bAdsBot-Google\b" 0;
"~*\baolbuild\b" 0;
"~*\bbingbot\b" 0;
"~*\bbingpreview\b" 0;
"~*\bDoCoMo\b" 0;
"~*\bduckduckgo\b" 0;
"~*\bfacebookexternalhit\b" 0;
"~*\bFeedfetcher-Google\b" 0;
"~*\bGooglebot\b" 0;
"~*\bGooglebot-Image\b" 0;
"~*\bGooglebot-Mobile\b" 0;
"~*\bGooglebot-News\b" 0;
"~*\bGooglebot/Test\b" 0;
"~*\bGooglebot-Video\b" 0;
"~*\bGoogle-HTTP-Java-Client\b" 0;
"~*\bGravityscan\b" 0;
"~*\bgsa-crawler\b" 0;
"~*\bJakarta\ Commons\b" 0;
"~*\bKraken/0.1\b" 0;
"~*\bLinkedInBot\b" 0;
"~*\bMediapartners-Google\b" 0;
"~*\bmsnbot\b" 0;
"~*\bmsnbot-media\b" 0;
"~*\bSAMSUNG\b" 0;
"~*\bSlackbot\b" 0;
"~*\bSlackbot-LinkExpanding\b" 0;
"~*\bslurp\b" 0;
"~*\bteoma\b" 0;
"~*\bTwitterBot\b" 0;
"~*\bWordpress\b" 0;
"~*\byahoo\b" 0;
# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# ***************************************************
# User-Agent Strings Allowed Through but Rate Limited
# ***************************************************
# Some people block libwww-perl, it used widely in many valid (non rogue) agents
# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited
# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\bjetmon\b" 1;
"~*\blibwww-perl\b" 1;
"~*\bLynx\b" 1;
"~*\bmunin\b" 1;
"~*\bPresto\b" 1;
"~*\bWget/1.15\b" 1;
# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
# **************************************************************
# Rate Limited User-Agents who get a bit aggressive on bandwidth
# **************************************************************
# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
"~*\bAlexa\b" 2;
"~*\barchive.org\b" 2;
"~*\bBaidu\b" 2;
"~*\bBUbiNG\b" 2;
"~*\bFlipboardProxy\b" 2;
"~*\bia_archiver\b" 2;
"~*\bMSIE\ 7.0\b" 2;
"~*\bProximic\b" 2;
"~*\bR6_CommentReader\b" 2;
"~*\bR6_FeedFetcher\b" 2;
"~*\bRED/1\b" 2;
"~*\bRPT-HTTPClient\b" 2;
"~*\bsfFeedReader/0.9\b" 2;
"~*\bSpaidu\b" 2;
"~*\bUptimeRobot/2.0\b" 2;
"~*\bYandexBot\b" 2;
"~*\bYandexImages\b" 2;
# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
}

View file

@ -42,8 +42,8 @@ script:
- bash .dev-tools/run-curl-tests-1.sh
- bash .dev-tools/install-nginx-testing-of-changes.sh
- bash .dev-tools/run-curl-tests-testing-of-changes.sh
- bash .dev-tools/install-nginx-2.sh
- bash .dev-tools/run-curl-tests-2.sh
#- bash .dev-tools/install-nginx-2.sh
#- bash .dev-tools/run-curl-tests-2.sh
#- bash .dev-tools/install-nginx-3.sh
#- bash .dev-tools/run-curl-tests-3.sh
#- bash .dev-tools/install-nginx-4.sh