diff --git a/_generator_lists/allowed-user-agents.list b/_generator_lists/allowed-user-agents.list new file mode 100755 index 000000000..b8df05b02 --- /dev/null +++ b/_generator_lists/allowed-user-agents.list @@ -0,0 +1,5 @@ +jetmon +libwww-perl +Lynx +munin +Wget/1.15 diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list new file mode 100755 index 000000000..12e2a2d7f --- /dev/null +++ b/_generator_lists/bad-user-agents.list @@ -0,0 +1,467 @@ +360Spider +80legs +Abonti +Aboundex +Acunetix +AhrefsBot +AIBOT +AiHitBot +Aipbot +Alexibot +Alligator +AllSubmitter +Anarchie +Apexoo +ASPSeek +Asterias +Attach +autoemailspider +BackDoorBot +BackStreet +BackWeb +Badass +Bandit +BatchFTP +Battleztar\ Bazinga +BBBike +BDFetch +BetaBot +Bigfoot +Bitacle +Black\ Hole +BlackWidow +BLEXBot +Blow +BlowFish +Boardreader +Bolt +BotALot +Brandprotect +Brandwatch +Bubing +Buddy +BuiltBotTough +BuiltWith +Bullseye +BunnySlippers +BuzzSumo +Calculon +CazoodleBot +CCBot +Cegbfeieh +CheeseBot +CherryPicker +ChinaClaw +Chlooe +Claritybot +Cliqzbot +Cogentbot +Collector +Copier +CopyRightCheck +Copyscape +Cosmos +Craftbot +CrazyWebCrawler +Crescent +CSHttp +Curious +Custo +DBLBot +Demon +Deusu +Devil +DIIbot +Dirbuster +Disco +Discobot +Discoverybot +DittoSpyder +DomainAppender +DomainSigmaCrawler +Dotbot +Download\ Demon +Download\ Devil +Download\ Wonder +Dragonfly +Drip +DTS\ Agent +EasyDL +Ebingbong +eCatch +ECCP/1.0 +Ecxi +EirGrabber +EMail\ Collector +EMail\ Extractor +EMail\ Siphon +EMail\ Wolf +EroCrawler +Evil +Exabot +Express\ WebPictures +Extractor +ExtractorPro +EyeNetIE +Ezooms +FHscan +Fimap +Findxbot +Firefox/7.0 +FlashGet +Flunky +Foobot +Freeuploader +FrontPage +GalaxyBot +Genieo +Getintent +GetRight +GetWeb +Gigablast +Gigabot +Go-Ahead-Got-It +Gotit +GoZilla +Go!Zilla +Grabber +GrabNet +Grafula +GrapeFX +GrapeshotCrawler +GridBot +GT::WWW +GuzzleHttp +HaosouSpider +Harvest +Havij +Heritrix +Hloader +HMView +HTMLparser +HTTP::Lite +HTTrack +Humanlinks +Iblog +IDBot +Id-search +IlseBot +Image\ Fetch +Image\ Stripper +Image\ Sucker +Indy\ Library +InfoNaviRobot +InfoTekies +Intelliseek +InterGET +Internet\ Ninja +InternetSeer +internetVista\ monitor +Iria +IRLbot +Iskanie +JamesBOT +Jbrofuzz +JennyBot +JetCar +JikeSpider +JOC\ Web\ Spider +Joomla +JustView +Jyxobot +Kenjin\ Spider +Keyword\ Density +Lanshanbot +Larbin +LeechFTP +LeechGet +LexiBot +Lftp +LibWeb +Libwhisker +Lightspeedsystems +Likse +Linkdexbot +LinkextractorPro +LinkpadBot +LinkScan +LinksManager +LinkWalker +LinqiaMetadataDownloaderBot +LinqiaRSSBot +LinqiaScrapeBot +Lipperhey +Litemage_walker +Lmspider +LNSpiderguy +Ltx71 +lwp-request +LWP::Simple +lwp-trivial +Magnet +Mag-Net +magpie-crawler +Mail.ru +Majestic12 +MarkMonitor +MarkWatch +Masscan +Mass\ Downloader +Mata\ Hari +Meanpathbot +MegaIndex.ru +Metauri +MFC_Tear_Sample +Microsoft\ Data\ Access +Microsoft\ URL\ Control +MIDown\ tool +MIIxpc +Mister\ PiX +MJ12bot +Mojeek +MSFrontPage +MSIE\ 6.0 +MSIECrawler +Msrabot +MS\ Web\ Services\ Client\ Protocol +Musobot +Name\ Intelligence +Nameprotect +Navroad +NearSite +Needle +Nessus +NetAnts +Netcraft +netEstate\ NE\ Crawler +NetLyzer +NetMechanic +NetSpider +Nettrack +Net\ Vampire +NetZIP +NextGenSearchBot +Nibbler +NICErsPRO +Niki-bot +Nikto +NimbleCrawler +Ninja +Nmap +NPbot +Nutch +Octopus +Offline\ Explorer +Offline\ Navigator +Openfind +OpenLinkProfiler +Openvas +OrangeBot +OrangeSpider +OutfoxBot +PageAnalyzer +Page\ Analyzer +PageGrabber +Page\ Grabber +page\ scorer +PageScorer +Panscient +Papa\ Foto +Pavuk +pcBrowser +PECL::HTTP +PeoplePal +PHPCrawl +Picscout +Picsearch +Pimonster +Pi-Monster +Pixray +PleaseCrawl +Pockey +POE-Component-Client-HTTP +Probethenet +ProPowerBot +ProWebWalker +Proximic +Psbot +Pump +PyCurl +QueryN\ Metasearch +Qwantify +RankActiveLinkBot +Rankivabot +RealDownload +Reaper +Recorder +RedesScrapy +ReGet +RepoMonkey +Ripper +RocketCrawler +Rogerbot +SBIder +ScanAlert +Scanbot +Scrapy +Screaming\ Frog\ SEO\ Spider +ScreenerBot +Searchestate +SearchmetricsBot +Semrush +SemrushBot +SEOkicks +SEOkicks-Robot +SEOlyticsCrawler +Seomoz +SEOprofiler +SEOstats +Siphon +SISTRIX +SISTRIX\ Crawler +Sitebeam +SiteExplorer +Siteimprove +SiteLockSpider +SiteSnagger +SiteSucker +Site\ Sucker +Sitevigil +Slackbot-LinkExpanding +SlySearch +SmartDownload +Snake +Snapbot +Snoopy +SocialRankIOBot +Sogou\ web\ spider +Sosospider +SpaceBison +Spammen +SpankBot +Spanner +Spbot +Spinn3r +SputnikBot +Sqlmap +Sqlworm +Sqworm +Steeler +Stripper +Sucker +Sucuri +SuperBot +SuperHTTP +Surfbot +SurveyBot +Suzuran +Swiftbot +Szukacz +T0PHackTeam +T8Abot +tAkeOut +Teleport +TeleportPro +Telesoft +Telesphoreo +Telesphorep +The\ Intraformant +TheNomad +TightTwatBot +Titan +Toata +Toweyabot +Trendictionbot +True_Robot +Turingos +TurnitinBot +Turnitin\ Bot +Turnitin\ Robot +TwengaBot +Twice +Typhoeus +URLy.Warning +URLy\ Warning +Vacuum +Vagabondo +VB\ Project +VCI +VeriCiteCrawler +VidibleScraper +VoidEYE +Voil +Voltron +WallpapersHD +WASALive-Bot +WBSearchBot +Webalta +WebAuto +Web\ Auto +WebBandit +Web\ Bandit +WebCollage +Web\ Collage +WebCopier +Web\ Copier +WEBDAV +WEBDAV\ Client +WebEnhancer +Web\ Enhancer +WebFetch +Web\ Fetch +WebFuck +Web\ Fuck +WebGo\ IS +WebImageCollector +Web\ Image\ Collector +WebLeacher +WebmasterWorldForumBot +webmeup-crawler +WebPix +Web\ Pix +WebReaper +Web\ Reaper +WebSauger +Web\ Sauger +Webshag +WebsiteExtractor +Website\ Extractor +WebsiteQuester +Website\ Quester +Webster +WebStripper +Web\ Stripper +WebSucker +Web\ Sucker +WebWhacker +Web\ Whacker +WebZIP +WeSEE +Whack +Whacker +Whatweb +Widow +WinHTTrack +WiseGuys\ Robot +WISENutbot +Wonderbot +Woobot +Wotbox +Wprecon +WPScan +WWW-Collector-E +WWW-Mechanize +WWW::Mechanize +WWWOFFLE +Xaldon_WebSpider +Xaldon\ WebSpider +Xenu +YoudaoBot +Zade +Zermelo +Zeus +Zgrab +Zitebot +ZmEu +ZumBot +ZyBorg diff --git a/_generator_lists/good-user-agents.list b/_generator_lists/good-user-agents.list new file mode 100755 index 000000000..6538b6885 --- /dev/null +++ b/_generator_lists/good-user-agents.list @@ -0,0 +1,29 @@ +adidxbot +AdsBot-Google +aolbuild +bingbot +bingpreview +DoCoMo +duckduckgo +facebookexternalhit +Feedfetcher-Google +Googlebot +Googlebot-Image +Googlebot-Mobile +Googlebot-News +Googlebot/Test +Googlebot-Video +Google-HTTP-Java-Client +gsa-crawler +Jakarta\ Commons +Kraken/0.1 +LinkedInBot +Mediapartners-Google +msnbot +msnbot-media +SAMSUNG +slurp +teoma +TwitterBot +Wordpress +yahoo diff --git a/_generator_lists/limited-user-agents.list b/_generator_lists/limited-user-agents.list new file mode 100755 index 000000000..24473b90d --- /dev/null +++ b/_generator_lists/limited-user-agents.list @@ -0,0 +1,17 @@ +Alexa +archive.org +Baidu +FlipboardProxy +ia_archiver +Mozilla/4.0 +MSIE\ 7.0 +Presto +R6_CommentReader +R6_FeedFetcher +RPT-HTTPClient +SeznamBot +sfFeedReader/0.9 +Spaidu +UptimeRobot/2.0 +YandexBot +YandexImages