mirror of
https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker.git
synced 2025-09-02 10:40:36 +00:00
Fix Regex and User-Agent Logic Order of Test Unit REF: #264
This commit is contained in:
parent
8ebc9af274
commit
24df6f7b78
3 changed files with 87 additions and 83 deletions
|
@ -237,7 +237,7 @@ rm ${_inputdb3}
|
|||
printf '%s\n' "$_start4" >> ${_tmpnginx4}
|
||||
while IFS= read -r LINE
|
||||
do
|
||||
printf '\t"~*%s%s"\t\t%s\n' "/${LINE}" "?(\b)/" "$_action4" >> ${_tmpnginx4}
|
||||
printf '\t"~*%s%s"\t\t%s\n' "${LINE}" "(?:\.|\b|\/)" "$_action4" >> ${_tmpnginx4}
|
||||
#printf '\t"~*%s"\t\t%s\n' "${LINE}" "$_action4" >> ${_tmpnginx4}
|
||||
done < ${_input4}
|
||||
printf '%s\n' "$_end4" >> ${_tmpnginx4}
|
||||
|
|
|
@ -130,86 +130,6 @@ map $http_user_agent $bad_bot {
|
|||
# END CUSTOM BLACKLISTED USER AGENTS ### DO NOT EDIT OR REMOVE THIS LINE AT ALL ###
|
||||
# *********************************************************************************
|
||||
|
||||
|
||||
# ***********************************************
|
||||
# Allow Good User-Agent Strings We Know and Trust
|
||||
# ***********************************************
|
||||
|
||||
# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\badidxbot\b" 0;
|
||||
"~*\bAdsBot-Google\b" 0;
|
||||
"~*\baolbuild\b" 0;
|
||||
"~*\bbingbot\b" 0;
|
||||
"~*\bbingpreview\b" 0;
|
||||
"~*\bDoCoMo\b" 0;
|
||||
"~*\bduckduckgo\b" 0;
|
||||
"~*\bfacebookexternalhit\b" 0;
|
||||
"~*\bFeedfetcher-Google\b" 0;
|
||||
"~*\bGooglebot\b" 0;
|
||||
"~*\bGooglebot-Image\b" 0;
|
||||
"~*\bGooglebot-Mobile\b" 0;
|
||||
"~*\bGooglebot-News\b" 0;
|
||||
"~*\bGooglebot/Test\b" 0;
|
||||
"~*\bGooglebot-Video\b" 0;
|
||||
"~*\bGoogle-HTTP-Java-Client\b" 0;
|
||||
"~*\bGravityscan\b" 0;
|
||||
"~*\bgsa-crawler\b" 0;
|
||||
"~*\bJakarta\ Commons\b" 0;
|
||||
"~*\bKraken/0.1\b" 0;
|
||||
"~*\bLinkedInBot\b" 0;
|
||||
"~*\bMediapartners-Google\b" 0;
|
||||
"~*\bmsnbot\b" 0;
|
||||
"~*\bmsnbot-media\b" 0;
|
||||
"~*\bSAMSUNG\b" 0;
|
||||
"~*\bSlackbot\b" 0;
|
||||
"~*\bSlackbot-LinkExpanding\b" 0;
|
||||
"~*\bslurp\b" 0;
|
||||
"~*\bteoma\b" 0;
|
||||
"~*\bTwitterBot\b" 0;
|
||||
"~*\bWordpress\b" 0;
|
||||
"~*\byahoo\b" 0;
|
||||
# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# ***************************************************
|
||||
# User-Agent Strings Allowed Through but Rate Limited
|
||||
# ***************************************************
|
||||
|
||||
# Some people block libwww-perl, it used widely in many valid (non rogue) agents
|
||||
# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited
|
||||
|
||||
# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\bjetmon\b" 1;
|
||||
"~*\blibwww-perl\b" 1;
|
||||
"~*\bLynx\b" 1;
|
||||
"~*\bmunin\b" 1;
|
||||
"~*\bPresto\b" 1;
|
||||
"~*\bWget/1.15\b" 1;
|
||||
# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# **************************************************************
|
||||
# Rate Limited User-Agents who get a bit aggressive on bandwidth
|
||||
# **************************************************************
|
||||
|
||||
# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\bAlexa\b" 2;
|
||||
"~*\barchive.org\b" 2;
|
||||
"~*\bBaidu\b" 2;
|
||||
"~*\bBUbiNG\b" 2;
|
||||
"~*\bFlipboardProxy\b" 2;
|
||||
"~*\bia_archiver\b" 2;
|
||||
"~*\bMSIE\ 7.0\b" 2;
|
||||
"~*\bProximic\b" 2;
|
||||
"~*\bR6_CommentReader\b" 2;
|
||||
"~*\bR6_FeedFetcher\b" 2;
|
||||
"~*\bRED/1\b" 2;
|
||||
"~*\bRPT-HTTPClient\b" 2;
|
||||
"~*\bsfFeedReader/0.9\b" 2;
|
||||
"~*\bSpaidu\b" 2;
|
||||
"~*\bUptimeRobot/2.0\b" 2;
|
||||
"~*\bYandexBot\b" 2;
|
||||
"~*\bYandexImages\b" 2;
|
||||
# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# *********************************************
|
||||
# Bad User-Agent Strings That We Block Outright
|
||||
# *********************************************
|
||||
|
@ -776,6 +696,90 @@ map $http_user_agent $bad_bot {
|
|||
"~*/ZumBot?(\b)/" 3;
|
||||
"~*/ZyBorg?(\b)/" 3;
|
||||
# END BAD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# ***********************************************
|
||||
# Allow Good User-Agent Strings We Know and Trust
|
||||
# ***********************************************
|
||||
|
||||
# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\badidxbot\b" 0;
|
||||
"~*\bAdsBot-Google\b" 0;
|
||||
"~*\baolbuild\b" 0;
|
||||
"~*\bbingbot\b" 0;
|
||||
"~*\bbingpreview\b" 0;
|
||||
"~*\bDoCoMo\b" 0;
|
||||
"~*\bduckduckgo\b" 0;
|
||||
"~*\bfacebookexternalhit\b" 0;
|
||||
"~*\bFeedfetcher-Google\b" 0;
|
||||
"~*\bGooglebot\b" 0;
|
||||
"~*\bGooglebot-Image\b" 0;
|
||||
"~*\bGooglebot-Mobile\b" 0;
|
||||
"~*\bGooglebot-News\b" 0;
|
||||
"~*\bGooglebot/Test\b" 0;
|
||||
"~*\bGooglebot-Video\b" 0;
|
||||
"~*\bGoogle-HTTP-Java-Client\b" 0;
|
||||
"~*\bGravityscan\b" 0;
|
||||
"~*\bgsa-crawler\b" 0;
|
||||
"~*\bJakarta\ Commons\b" 0;
|
||||
"~*\bKraken/0.1\b" 0;
|
||||
"~*\bLinkedInBot\b" 0;
|
||||
"~*\bMediapartners-Google\b" 0;
|
||||
"~*\bmsnbot\b" 0;
|
||||
"~*\bmsnbot-media\b" 0;
|
||||
"~*\bSAMSUNG\b" 0;
|
||||
"~*\bSlackbot\b" 0;
|
||||
"~*\bSlackbot-LinkExpanding\b" 0;
|
||||
"~*\bslurp\b" 0;
|
||||
"~*\bteoma\b" 0;
|
||||
"~*\bTwitterBot\b" 0;
|
||||
"~*\bWordpress\b" 0;
|
||||
"~*\byahoo\b" 0;
|
||||
# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# ***************************************************
|
||||
# User-Agent Strings Allowed Through but Rate Limited
|
||||
# ***************************************************
|
||||
|
||||
# Some people block libwww-perl, it used widely in many valid (non rogue) agents
|
||||
# I allow libwww-perl as I use it for monitoring systems with Munin but it is rate limited
|
||||
|
||||
# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\bjetmon\b" 1;
|
||||
"~*\blibwww-perl\b" 1;
|
||||
"~*\bLynx\b" 1;
|
||||
"~*\bmunin\b" 1;
|
||||
"~*\bPresto\b" 1;
|
||||
"~*\bWget/1.15\b" 1;
|
||||
# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
# **************************************************************
|
||||
# Rate Limited User-Agents who get a bit aggressive on bandwidth
|
||||
# **************************************************************
|
||||
|
||||
# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
"~*\bAlexa\b" 2;
|
||||
"~*\barchive.org\b" 2;
|
||||
"~*\bBaidu\b" 2;
|
||||
"~*\bBUbiNG\b" 2;
|
||||
"~*\bFlipboardProxy\b" 2;
|
||||
"~*\bia_archiver\b" 2;
|
||||
"~*\bMSIE\ 7.0\b" 2;
|
||||
"~*\bProximic\b" 2;
|
||||
"~*\bR6_CommentReader\b" 2;
|
||||
"~*\bR6_FeedFetcher\b" 2;
|
||||
"~*\bRED/1\b" 2;
|
||||
"~*\bRPT-HTTPClient\b" 2;
|
||||
"~*\bsfFeedReader/0.9\b" 2;
|
||||
"~*\bSpaidu\b" 2;
|
||||
"~*\bUptimeRobot/2.0\b" 2;
|
||||
"~*\bYandexBot\b" 2;
|
||||
"~*\bYandexImages\b" 2;
|
||||
# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ###
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -42,8 +42,8 @@ script:
|
|||
- bash .dev-tools/run-curl-tests-1.sh
|
||||
- bash .dev-tools/install-nginx-testing-of-changes.sh
|
||||
- bash .dev-tools/run-curl-tests-testing-of-changes.sh
|
||||
- bash .dev-tools/install-nginx-2.sh
|
||||
- bash .dev-tools/run-curl-tests-2.sh
|
||||
#- bash .dev-tools/install-nginx-2.sh
|
||||
#- bash .dev-tools/run-curl-tests-2.sh
|
||||
#- bash .dev-tools/install-nginx-3.sh
|
||||
#- bash .dev-tools/run-curl-tests-3.sh
|
||||
#- bash .dev-tools/install-nginx-4.sh
|
||||
|
|
Loading…
Add table
Reference in a new issue