diff --git a/Rakefile b/Rakefile
index c638c0b9..5a2cfba8 100644
--- a/Rakefile
+++ b/Rakefile
@@ -16,7 +16,7 @@ task :default => [:test, :test_app]
namespace :impressionist do
require File.dirname(__FILE__) + "/lib/impressionist/bots"
- desc "output the list of bots from http://www.user-agents.org/"
+ desc "output the list of bots from https://github.com/monperrus/crawler-user-agents"
task :bots do
p Impressionist::Bots.consume
diff --git a/app/models/impressionist/bots.rb b/app/models/impressionist/bots.rb
index aee7cf9c..783a4740 100644
--- a/app/models/impressionist/bots.rb
+++ b/app/models/impressionist/bots.rb
@@ -8,1461 +8,12 @@ def self.bot?(user_agent = nil)
WILD_CARDS = ["bot","yahoo","slurp","google","msn","crawler"]
- LIST = [" UnChaos From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)",
- " UnChaos Bot Hybrid Web Search Engine. (vadim_gonchar@unchaos.com)",
- " UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at www.unchaos.com (info@unchaos.com)",
- " http://www.sygol.com",
- "*/Nutch-0.9-dev",
- "+SitiDi.net/SitiDiBot/1.0 (+Have Good Day)",
- "-DIE-KRAEHE- META-SEARCH-ENGINE/1.1 http://www.die-kraehe.de",
- "192.comAgent",
- "4anything.com LinkChecker v2.0",
- "8484 Boston Project v 1.0",
- ":robot/1.0 (linux) ( admin e-mail: undefined http://www.neofonie.de/loesungen/search/robot.html )",
- "A-Online Search",
- "A1 Sitemap Generator/1.0 (+http://www.micro-sys.dk/products/sitemap-generator/) miggibot/2006.01.24",
- "aardvark-crawler",
- "AbachoBOT",
- "AbachoBOT (Mozilla compatible)",
- "ABCdatos BotLink/5.xx.xxx#BBL",
- "Aberja Checkomat",
- "abot/0.1 (abot; http://www.abot.com; abot@abot.com)",
- "About/0.1libwww-perl/5.47",
- "Accelatech RSSCrawler/0.4",
- "accoona",
- "Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)",
- "Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)",
- "Ack (http://www.ackerm.com/)",
- "AcoiRobot",
- "Acoon Robot v1.50.001",
- "Acoon Robot v1.52 (http://www.acoon.de)",
- "Acoon-Robot 4.0.x.[xx] (http://www.acoon.de)",
- "Acoon-Robot v3.xx (http://www.acoon.de and http://www.acoon.com)",
- "Acorn/Nutch-0.9 (Non-Profit Search Engine; acorn.isara.org; acorn at isara dot org)",
- "AESOP_com_SpiderMan",
- "agadine/1.x.x (+http://www.agada.de)",
- "Agent-SharewarePlazaFileCheckBot/2.0+(+http://www.SharewarePlaza.com)",
- "AgentName/0.1 libwww-perl/5.48",
- "AIBOT/2.1 By +(www.21seek.com A Real artificial intelligence search engine China)",
- "aipbot/1.0 (aipbot; http://www.aipbot.com; aipbot@aipbot.com)",
- "aipbot/2-beta (aipbot dev; http://aipbot.com; aipbot@aipbot.com)",
- "Aladin/3.324",
- "Aleksika Spider/1.0 (+http://www.aleksika.com/)",
- "AlkalineBOT/1.3",
- "AlkalineBOT/1.4 (1.4.0326.0 RTM)",
- "Allesklar/0.1 libwww-perl/5.46",
- "Allrati/1.1 (+)",
- "AltaVista Intranet V2.0 AVS EVAL search@freeit.com",
- "AltaVista Intranet V2.0 Compaq Altavista Eval sveand@altavista.net",
- "AltaVista Intranet V2.0 evreka.com crawler@evreka.com",
- "AltaVista V2.0B crawler@evreka.com",
- "AmfibiBOT",
- "Amfibibot/0.06 (Amfibi Web Search; http://www.amfibi.com; agent@amfibi.com)",
- "Amfibibot/0.07 (Amfibi Robot; http://www.amfibi.com; agent@amfibi.com)",
- "amibot",
- "AnnoMille spider 0.1 alpha - http://www.annomille.it",
- "AnswerBus (http://www.answerbus.com/)",
- "antibot-V1.1.5/i586-linux-2.2",
- "AnzwersCrawl/2.0 (anzwerscrawl@anzwers.com.au;Engine)",
- "Apexoo Spider 1.x",
- "Aport",
- "appie 1.1 (www.walhello.com)",
- "ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4; http://www.araby.com;)",
- "ArachBot",
- "Arachnoidea (arachnoidea@euroseek.com)",
- "ArchitextSpider",
- "archive.org_bot",
- "Arikus_Spider",
- "Arquivo-web-crawler (compatible; heritrix/1.12.1 +http://arquivo-web.fccn.pt)",
- "ASAHA Search Engine Turkey V.001 (http://www.asaha.com/)",
- "Asahina-Antenna/1.x",
- "Asahina-Antenna/1.x (libhina.pl/x.x ; libtime.pl/x.x)",
- "ask.24x.info",
- "AskAboutOil/0.06-rcp (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@askaboutoil.com)",
- "asked/Nutch-0.8 (web crawler; http://asked.jp; epicurus at gmail dot com)",
- "ASPSeek/1.2.5",
- "ASPseek/1.2.9d",
- "ASPSeek/1.2.x",
- "ASPSeek/1.2.xa",
- "ASPseek/1.2.xx",
- "ASPSeek/1.2.xxpre",
- "ASSORT/0.10",
- "asterias/2.0",
- "AtlocalBot/1.1 +(http://www.atlocal.com/local-web-site-owner.html)",
- "Atomic_Email_Hunter/4.0",
- "Atomz/1.0",
- "atSpider/1.0",
- "Attentio/Nutch-0.9-dev (Attentio's beta blog crawler; www.attentio.com; info@attentio.com)",
- "augurfind",
- "augurnfind V-1.x",
- "autoemailspider",
- "autowebdir 1.1 (www.autowebdir.com)",
- "AV Fetch 1.0",
- "AVSearch-1.0(peter.turney@nrc.ca)",
- "AVSearch-3.0(AltaVista/AVC)",
- "axadine/ (Axadine Crawler; http://www.axada.de/; )",
- "AxmoRobot - Crawling your site for better indexing on www.axmo.com search engine.",
- "BabalooSpider/1.3 (BabalooSpider; http://www.babaloo.si; spider@babaloo.si)",
- "BaboomBot/1.x.x (+http://www.baboom.us)",
- "BaiduImagespider+(+http://www.baidu.jp/search/s308.html)",
- "BaiDuSpider",
- "Baiduspider+(+http://help.baidu.jp/system/05.html)",
- "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
- "Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
- "Balihoo/Nutch-1.0-dev (Crawler for Balihoo.com search engine - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
- "BarraHomeCrawler (albertof@barrahome.org)",
- "bdcindexer_2.6.2 (research@bdc)",
- "BDFetch",
- "BDNcentral Crawler v2.3 [en] (http://www.bdncentral.com/robot.html) (X11; I; Linux 2.0.44 i686)",
- "beautybot/1.0 (+http://www.uchoose.de/crawler/beautybot/)",
- "BebopBot/2.5.1 ( crawler http://www.apassion4jazz.net/bebopbot.html )",
- "BigCliqueBOT/1.03-dev (bigclicbot; http://www.bigclique.com; bot@bigclique.com)",
- "BIGLOTRON (Beta 2;GNU/Linux)",
- "Bigsearch.ca/Nutch-x.x-dev (Bigsearch.ca Internet Spider; http://www.bigsearch.ca/; info@enhancededge.com)",
- "BilgiBetaBot/0.8-dev (bilgi.com (Beta) ; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
- "BilgiBot/1.0(beta) (http://www.bilgi.com/; bilgi at bilgi dot com)",
- "Bitacle bot/1.1",
- "Bitacle Robot (V:1.0;) (http://www.bitacle.com)",
- "BlackWidow",
- "Blaiz-Bee/1.0 (+http://www.blaiz.net)",
- "Blaiz-Bee/2.00.8222 (BE Internet Search Engine http://www.rawgrunt.com)",
- "Blaiz-Bee/2.00.xxxx (+http://www.blaiz.net)",
- "BlitzBOT@tricus.net",
- "BlitzBOT@tricus.net (Mozilla compatible)",
- "BlogBot/1.x",
- "Bloglines Title Fetch/1.0 (http://www.bloglines.com)",
- "Bloglines-Images/0.1 (http://www.bloglines.com)",
- "Bloglines/3.1 (http://www.bloglines.com)",
- "Blogpulse (info@blogpulse.com)",
- "BlogPulseLive (support@blogpulse.com)",
- "BlogSearch/1.x +http://www.icerocket.com/",
- "blogsearchbot-pumpkin-3",
- "BlogsNowBot, V 2.01 (+http://www.blogsnow.com/)",
- "BlogVibeBot-v1.1 (spider@blogvibe.nl)",
- "blogWatcher_Spider/0.1 (http://www.lr.pi.titech.ac.jp/blogWatcher/)",
- "BlogzIce/1.0 (+http://icerocket.com; rhodes@icerocket.com)",
- "BlogzIce/1.0 +http://www.icerocket.com/",
- "BloobyBot",
- "Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
- "boitho.com-dc/0.xx (http://www.boitho.com/dcbot.html)",
- "boitho.com-robot/1.x",
- "boitho.com-robot/1.x (http://www.boitho.com/bot.html)",
- "BPImageWalker/2.0 (www.bdbrandprotect.com)",
- "BravoBrian SpiderEngine MarcoPolo",
- "BruinBot (+http://webarchive.cs.ucla.edu/bruinbot.html) ",
- "BSDSeek/1.0",
- "BTbot/0.x (+http://www.btbot.com/btbot.html)",
- "BuildCMS crawler (http://www.buildcms.com/crawler)",
- "BullsEye",
- "bumblebee@relevare.com",
- "BurstFindCrawler/1.1 (crawler.burstfind.com; http://crawler.burstfind.com; crawler@burstfind.com)",
- "Buscaplus Robi/1.0 (http://www.buscaplus.com/robi/)",
- "bwh3_user_agent",
- "Cabot/Nutch-0.9 (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
- "Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
- "carleson/1.0",
- "Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->http://www.andrew.cmu.edu/~brgordon/webbot/index.html http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
- "Carnegie_Mellon_University_WebCrawler http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
- "Catall Spider",
- "CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler; http://www.cazoodle.com/cazoodlebot; cazoodlebot@cazoodle.com)",
- "CCBot/1.0 (+http://www.commoncrawl.org/bot.html)",
- "ccubee/x.x",
- "Ceramic Tile Installation Guide (http://www.floorstransformed.com)",
- "cfetch/1.0",
- "China Local Browse 2.6",
- "ChristCRAWLER 2.0",
- "CipinetBot (http://www.cipinet.com/bot.html)",
- "ClariaBot/1.0",
- "Claymont.com",
- "CloakDetect/0.9 (+http://fulltext.seznam.cz/)",
- "Clushbot/2.x (+http://www.clush.com/bot.html)",
- "Clushbot/3.x-BinaryFury (+http://www.clush.com/bot.html)",
- "Clushbot/3.xx-Ajax (+http://www.clush.com/bot.html)",
- "Clushbot/3.xx-Hector (+http://www.clush.com/bot.html)",
- "Clushbot/3.xx-Peleus (+http://www.clush.com/bot.html)",
- "Cogentbot/1.X (+http://www.cogentsoftwaresolutions.com/bot.html)",
- "combine/0.0",
- "Combine/2.0 http://combine.it.lth.se/",
- "Combine/3 http://combine.it.lth.se/",
- "Combine/x.0",
- "cometrics-bot, http://www.cometrics.de",
- "Computer_and_Automation_Research_Institute_Crawler crawler@ilab.sztaki.hu",
- "Comrite/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
- "ContactBot/0.2",
- "ContentSmartz",
- "Convera Internet Spider V6.x",
- "ConveraCrawler/0.2",
- "ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)",
- "ConveraMultiMediaCrawler/0.1 (+http://www.authoritativeweb.com/crawl)",
- "CoolBot",
- "cosmos/0.8_(robot@xyleme.com)",
- "cosmos/0.9_(robot@xyleme.com)",
- "CougarSearch/0.x (+http://www.cougarsearch.com/faq.shtml)",
- "Covac TexAs Arachbot",
- "Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
- "Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
- "CrawlConvera0.1 (CrawlConvera@yahoo.com)",
- "Crawler (cometsearch@cometsystems.com)",
- "Crawler admin@crawler.de",
- "Crawler V 0.2.x admin@crawler.de",
- "crawler@alexa.com",
- "CrawlerBoy Pinpoint.com",
- "Crawllybot/0.1 (Crawllybot; +http://www.crawlly.com; crawler@crawlly.com)",
- "CreativeCommons/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)",
- "CrocCrawler vx.3 [en] (http://www.croccrawler.com) (X11; I; Linux 2.0.44 i686)",
- "csci_b659/0.13",
- "Cuasarbot/0.9b http://www.cuasar.com/spider_beta/ ",
- "CurryGuide SiteScan 1.1",
- "Custom Spider www.bisnisseek.com /1.0",
- "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)",
- "CydralSpider/1.x (Cydral Web Image Search; http://www.cydral.com)",
- "CydralSpider/3.0 (Cydral Image Search; http://www.cydral.com)",
- "DataCha0s/2.0",
- "DataCha0s/2.0",
- "DataFountains/DMOZ Downloader",
- "DataFountains/Dmoz Downloader (http://ivia.ucr.edu/useragents.shtml)",
- "DataFountains/DMOZ Feature Vector Corpus Creator (http://ivia.ucr.edu/useragents.shtml)",
- "DataparkSearch/4.47 (+http://dataparksearch.org/bot)",
- "DataparkSearch/4.xx (http://www.dataparksearch.org/)",
- "DataSpear/1.0 (Spider; http://www.dataspear.com/spider.html; spider@dataspear.com)",
- "DataSpearSpiderBot/0.2 (DataSpear Spider Bot; http://dssb.dataspear.com/bot.html; dssb@dataspear.com)",
- "DatenBot( http://www.sicher-durchs-netz.de/bot.html)",
- "DaviesBot/1.7 (www.wholeweb.net)",
- "daypopbot/0.x",
- "dbDig(http://www.prairielandconsulting.com)",
- "DBrowse 1.4b",
- "DBrowse 1.4d",
- "dCSbot/1.1",
- "de.searchengine.comBot 1.2 (http://de.searchengine.com/spider)",
- "deepak-USC/ISI",
- "DeepIndex",
- "DeepIndex ( http://www.zetbot.com )",
- "DeepIndex (www.en.deepindex.com)",
- "DeepIndexer.ca",
- "Demo Bot DOT 16b",
- "Demo Bot Z 16b",
- "Denmex websearch (http://search.denmex.com)",
- "dev-spider2.searchpsider.com/1.3b",
- "DiaGem/1.1 (http://www.skyrocket.gr.jp/diagem.html)",
- "Diamond/x.0",
- "DiamondBot",
- "Digger/1.0 JDK/1.3.0rc3",
- "DigOut4U",
- "DIIbot/1.2",
- "disco/Nutch-0.9 (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
- "disco/Nutch-1.0-dev (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
- "DittoSpyder",
- "dloader(NaverRobot)/1.0",
- "DoCoMo/1.0/Nxxxi/c10",
- "DoCoMo/1.0/Nxxxi/c10/TB",
- "DoCoMo/2.0 P900iV(c100;TB;W24H11) ",
- "DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
- "DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
- "dodgebot/experimental",
- "Download-Tipp Linkcheck (http://download-tipp.de/)",
- "Drecombot/1.0 (http://career.drecom.jp/bot.html)",
- "DSurf15a 01",
- "DSurf15a 71",
- "DSurf15a 81",
- "DSurf15a VA",
- "dtSearchSpider",
- "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
- "Dumbot(version 0.1 beta - dumbfind.com)",
- "Dumbot(version 0.1 beta - http://www.dumbfind.com/dumbot.html)",
- "Dumbot(version 0.1 beta)",
- "e-sense 1.0 ea(www.vigiltech.com/esensedisclaim.html)",
- "e-SocietyRobot(http://www.yama.info.waseda.ac.jp/~yamana/es/)",
- "eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +http://www.eapollo-opto.com)",
- "EARTHCOM.info/1.x [www.earthcom.info]",
- "EARTHCOM.info/1.xbeta [www.earthcom.info]",
- "EasyDL/3.xx",
- "EasyDL/3.xx http://keywen.com/Encyclopedia/Bot",
- "EBrowse 1.4b",
- "EchO!/2.0",
- "Educate Search VxB",
- "egothor/3.0a (+http://www.xdefine.org/robot.html)",
- "EgotoBot/4.8 (+http://www.egoto.com/about.htm)",
- "ejupiter.com",
- "elfbot/1.0 (+http://www.uchoose.de/crawler/elfbot/)",
- "ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +http://ws.daum.net/aboutkr.html)",
- "EmailSiphon",
- "EmailSpider",
- "EmailWolf 1.00",
- "EnaBot/1.x (http://www.enaball.com/crawler.html)",
- "Enfish Tracker",
- "Enterprise_Search/1.0",
- "Enterprise_Search/1.0.xxx",
- "Enterprise_Search/1.00.xxx;MSSQL (http://www.innerprise.net/es-spider.asp)",
- "envolk/1.7 (+http://www.envolk.com/envolkspiderinfo.php)",
- "envolk[ITS]spider/1.6(+http://www.envolk.com/envolkspider.html)",
- "EroCrawler",
- "ES.NET_Crawler/2.0 (http://search.innerprise.net/)",
- "eseek-larbin_2.6.2 (crawler@exactseek.com)",
- "ESISmartSpider",
- "eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)",
- "ESurf15a 15",
- "EuripBot/0.x (+http://www.eurip.com) GetFile",
- "EuripBot/0.x (+http://www.eurip.com) GetRobots",
- "EuripBot/0.x (+http://www.eurip.com) PreCheck",
- "Eurobot/1.0 (http://www.ayell.eu)",
- "EvaalSE - bot@evaal.com",
- "eventax/1.3 (eventax; http://www.eventax.de/; info@eventax.de)",
- "Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24; http://everest.vulcan.com/crawlerhelp)",
- "Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)",
- "Exabot-Images/1.0",
- "Exabot-Test/1.0",
- "Exabot/2.0",
- "Exabot/3.0",
- "ExactSeek Crawler/0.1",
- "exactseek-crawler-2.63 (crawler@exactseek.com)",
- "exactseek-pagereaper-2.63 (crawler@exactseek.com)",
- "exactseek.com",
- "Exalead NG/MimeLive Client (convert/http/0.120)",
- "Excalibur Internet Spider V6.5.4",
- "Execrawl/1.0 (Execrawl; http://www.execrawl.com/; bot@execrawl.com)",
- "exooba crawler/exooba crawler (crawler for exooba.com; http://www.exooba.com/; info at exooba dot com)",
- "exooba/exooba crawler (exooba; exooba)",
- "ExperimentalHenrytheMiragoRobot",
- "ExtractorPro",
- "EyeCatcher (Download-tipp.de)/1.0",
- "Factbot 1.09 (see http://www.factbites.com/webmasters.php)",
- "factbot : http://www.factbites.com/robots",
- "Fast Crawler Gold Edition",
- "FAST Enterprise Crawler 6 (Experimental)",
- "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/",
- "FAST Enterprise Crawler 6 used by Cobra Development (admin@fastsearch.com)",
- "FAST Enterprise Crawler 6 used by Comperio AS (sts@comperio.no)",
- "FAST Enterprise Crawler 6 used by FAST (FAST)",
- "FAST Enterprise Crawler 6 used by Pages Jaunes (pvincent@pagesjaunes.fr)",
- "FAST Enterprise Crawler 6 used by Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
- "FAST Enterprise Crawler 6 used by Singapore Press Holdings (crawler@sphsearch.sg)",
- "FAST Enterprise Crawler/6 (www.fastsearch.com)",
- "FAST Enterprise Crawler/6.4 (helpdesk at fast.no)",
- "FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)",
- "FAST MetaWeb Crawler (helpdesk at fastsearch dot com)",
- "Fast PartnerSite Crawler",
- "FAST-WebCrawler/2.2.10 (Multimedia Search) (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
- "FAST-WebCrawler/2.2.6 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
- "FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
- "FAST-WebCrawler/2.2.8 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
- "FAST-WebCrawler/3.2 test",
- "FAST-WebCrawler/3.3 (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
- "FAST-WebCrawler/3.4/Nirvana (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
- "FAST-WebCrawler/3.4/PartnerSite (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
- "FAST-WebCrawler/3.5 (atw-crawler at fast dot no; http://fast.no/support.php?c=faqs/crawler)",
- "FAST-WebCrawler/3.6 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
- "FAST-WebCrawler/3.6/FirstPage (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
- "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
- "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)",
- "FAST-WebCrawler/3.8 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
- "FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
- "FAST-WebCrawler/3.x Multimedia",
- "FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)",
- "fastbot crawler beta 2.0 (+http://www.fastbot.de)",
- "FastBug http://www.ay-up.com",
- "FastCrawler 3.0.1 (crawler@1klik.dk)",
- "FastSearch Web Crawler for Verizon SuperPages (kevin.watters@fastsearch.com)",
- "Favcollector/2.0 (info@favcollector.com http://www.favcollector.com/)",
- "favo.eu crawler/0.6 (http://www.favo.eu)",
- "Faxobot/1.0",
- "Feed Seeker Bot (RSS Feed Seeker http://www.MyNewFavoriteThing.com/fsb.php)",
- "Feed24.com",
- "FeedChecker/0.01",
- "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)",
- "FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)",
- "FeedHub MetaDataFetcher/1.0 (http://www.feedhub.com)",
- "Feedjit Favicon Crawler 1.0",
- "Feedster Crawler/3.0; Feedster, Inc.",
- "Felix - Mixcat Crawler (+http://mixcat.com)",
- "FFC Trap Door Spider",
- "Filtrbox/1.0",
- "Findexa Crawler (http://www.findexa.no/gulesider/article26548.ece)",
- "findlinks/x.xxx (+http://wortschatz.uni-leipzig.de/findlinks/) ",
- "FineBot",
- "Firefly/1.0",
- "Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)",
- "Firefox (kastaneta03@hotmail.com)",
- "Firefox_1.0.6 (kasparek@naparek.cz)",
- "FirstGov.gov Search - POC:firstgov.webmasters@gsa.gov",
- "firstsbot",
- "Flapbot/0.7.2 (Flaptor Crawler; http://www.flaptor.com; crawler at flaptor period com)",
- "Flexum spider",
- "Flexum/2.0",
- "FlickBot 2.0 RPT-HTTPClient/0.3-3",
- "flunky",
- "FnooleBot/2.5.2 (+http://www.fnoole.com/addurl.html)",
- "FocusedSampler/1.0",
- "Folkd.com Spider/0.1 beta 1 (www.folkd.com)",
- "Fooky.com/ScorpionBot/ScoutOut; http://www.fooky.com/scorpionbots",
- "Francis/1.0 (francis@neomo.de http://www.neomo.de/)",
- "Franklin Locator 1.8",
- "FreeFind.com-SiteSearchEngine/1.0 (http://freefind.com; spiderinfo@freefind.com)",
- "FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com",
- "FSurf15a 01",
- "FTB-Bot http://www.findthebest.co.uk/",
- "Full Web Bot 0416B",
- "Full Web Bot 0516B",
- "Full Web Bot 2816B",
- "FuseBulb.Com",
- "FyberSpider (+http://www.fybersearch.com/fyberspider.php)",
- "GAIS Robot/1.0B2",
- "Gaisbot/3.0 (indexer@gais.cs.ccu.edu.tw; http://gais.cs.ccu.edu.tw/robot.php)",
- "Gaisbot/3.0+(robot06@gais.cs.ccu.edu.tw;+http://gais.cs.ccu.edu.tw/robot.php)",
- "GalaxyBot/1.0 (http://www.galaxy.com/galaxybot.html)",
- "Gallent Search Spider v1.4 Robot 2 (http://robot.GallentSearch.com)",
- "gamekitbot/1.0 (+http://www.uchoose.de/crawler/gamekitbot/)",
- "GammaSpider/1.0",
- "gazz/x.x (gazz@nttrd.com)",
- "generic_crawler/01.0217/",
- "genieBot (",
- "geniebot wgao@genieknows.com",
- "GeonaBot 1.x; http://www.geona.com/",
- "gigabaz/3.1x (baz@gigabaz.com; http://gigabaz.com/gigabaz/)",
- "Gigabot/2.0 (gigablast.com)",
- "Gigabot/2.0/gigablast.com/spider.html",
- "Gigabot/2.0; http://www.gigablast.com/spider.html",
- "Gigabot/2.0att",
- "Gigabot/3.0 (http://www.gigablast.com/spider.html)",
- "Gigabot/x.0",
- "GigabotSiteSearch/2.0 (sitesearch.gigablast.com)",
- "GNODSPIDER (www.gnod.net)",
- "Goblin/0.9 (http://www.goguides.org/)",
- "Goblin/0.9.x (http://www.goguides.org/goblin-info.html)",
- "GoForIt.com",
- "GOFORITBOT ( http://www.goforit.com/about/ )",
- "gonzo1[P] +http://www.suchen.de/popups/faq.jsp",
- "gonzo2[P] +http://www.suchen.de/faq.html",
- "Goofer/0.2",
- "Googlebot-Image/1.0",
- "Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)",
- "Googlebot/2.1 ( http://www.google.com/bot.html)",
- "Googlebot/2.1 ( http://www.googlebot.com/bot.html)",
- "Googlebot/Test ( http://www.googlebot.com/bot.html)",
- "GrapeFX/0.3 libwww/5.4.0",
- "great-plains-web-spider/flatlandbot (Flatland Industries Web Spider; http://www.flatlandindustries.com/flatlandbot.php; jason@flatlandindustries.com)",
- "GrigorBot 0.8 (http://www.grigor.biz/bot.html)",
- "Gromit/1.0",
- "grub crawler(http://www.grub.org)",
- "grub-client",
- "gsa-crawler (Enterprise; GID-01422; jplastiras@google.com)",
- "gsa-crawler (Enterprise; GID-01742;gsatesting@rediffmail.com)",
- "gsa-crawler (Enterprise; GIX-02057; dm@enhesa.com)",
- "gsa-crawler (Enterprise; GIX-03519; cknuetter@stubhub.com)",
- "gsa-crawler (Enterprise; GIX-0xxxx; enterprise-training@google.com)",
- "Guestbook Auto Submitter",
- "Gulliver/1.3",
- "Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
- "Gungho/0.08004 (http://code.google.com/p/gungho-crawler/wiki/Index)",
- "GurujiBot/1.0 (+http://www.guruji.com/WebmasterFAQ.html)",
- "GurujiImageBot/1.0 (+http://www.guruji.com/en/WebmasterFAQ.html)",
- "HappyFunBot/1.1",
- "Harvest-NG/1.0.2",
- "Hatena Antenna/0.4 (http://a.hatena.ne.jp/help#robot)",
- "Hatena Pagetitle Agent/1.0",
- "Hatena RSS/0.3 (http://r.hatena.ne.jp)",
- "hbtronix.spider.2 -- http://hbtronix.de/spider.php",
- "HeinrichderMiragoRobot",
- "HeinrichderMiragoRobot (http://www.miragorobot.com/scripts/deinfo.asp)",
- "Helix/1.x ( http://www.sitesearch.ca/helix/)",
- "HenriLeRobotMirago (http://www.miragorobot.com/scripts/frinfo.asp)",
- "HenrytheMiragoRobot",
- "HenryTheMiragoRobot (http://www.miragorobot.com/scripts/mrinfo.asp)",
- "Hi! I'm CsCrawler my homepage: http://www.kde.cs.uni-kassel.de/lehre/ss2005/googlespam/crawler.html RPT-HTTPClient/0.3-3",
- "Hippias/0.9 Beta",
- "HitList",
- "Hitwise Spider v1.0 http://www.hitwise.com",
- "holmes/3.11 (http://morfeo.centrum.cz/bot)",
- "holmes/3.9 (onet.pl)",
- "holmes/3.xx (OnetSzukaj/5.0; +http://szukaj.onet.pl)",
- "holmes/x.x",
- "HolmesBot (http://holmes.ge)",
- "HomePageSearch(hpsearch.uni-trier.de)",
- "Homerbot: www.homerweb.com",
- "Honda-Search/0.7.2 (Nutch; http://lucene.apache.org/nutch/bot.html; search@honda-search.com)",
- "HooWWWer/2.1.3 (debugging run) (+http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)",
- "HooWWWer/2.1.x ( http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)",
- "HPL/Nutch-0.9 -",
- "htdig/3.1.6 (http://computerorgs.com)",
- "htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)",
- "htdig/3.1.x (root@localhost)",
- "http://Ask.24x.Info/ (http://narres.it/)",
- "http://hilfe.acont.de/bot.html ACONTBOT",
- "http://www.almaden.ibm.com/cs/crawler",
- "http://www.almaden.ibm.com/cs/crawler [rc1.wf.ibm.com]",
- "http://www.almaden.ibm.com/cs/crawler [wf216]",
- "http://www.istarthere.com_spider@istarthere.com",
- "http://www.monogol.de",
- "http://www.trendtech.dk/spider.asp)",
- "i1searchbot/2.0 (i1search web crawler; http://www.i1search.com; crawler@i1search.com)",
- "IAArchiver-1.0",
- "iaskspider2 (iask@staff.sina.com.cn)",
- "ia_archiver",
- "ia_archiver-web.archive.org",
- "ia_archiver/1.6",
- "ICC-Crawler(Mozilla-compatible; http://kc.nict.go.jp/icc/crawl.html; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)",
- "ICC-Crawler(Mozilla-compatible;http://kc.nict.go.jp/icc/crawl.html;icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)",
- "iCCrawler (http://www.iccenter.net)",
- "ICCrawler - ICjobs (http://www.icjobs.de/bot.htm)",
- "ichiro/x.0 (http://help.goo.ne.jp/door/crawler.html)",
- "ichiro/x.0 (ichiro@nttr.co.jp)",
- "IconSurf/2.0 favicon finder (see http://iconsurf.com/robot.html)",
- "IconSurf/2.0 favicon monitor (see http://iconsurf.com/robot.html)",
- "ICRA_label_spider/x.0",
- "icsbot-0.1",
- "ideare - SignSite/1.x",
- "iFeed.jp/2.0 (www.psychedelix.com/agents/agents.rss; 0 subscribers)",
- "igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html)",
- "IIITBOT/1.1 (Indian Language Web Search Engine; http://webkhoj.iiit.net; pvvpr at iiit dot ac dot in)",
- "ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit http://www.ilial.com/crawler; http://www.ilial.com/crawler; crawl@ilial.com)",
- "ilial/Nutch-0.9-dev",
- "IlseBot/1.x",
- "IlTrovatore-Setaccio ( http://www.iltrovatore.it)",
- "Iltrovatore-Setaccio/0.3-dev (Indexing; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
- "IlTrovatore-Setaccio/1.2 ( http://www.iltrovatore.it/aiuto/faq.html)",
- "Iltrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
- "iltrovatore-setaccio/1.2-dev (spidering; http://www.iltrovatore.it/aiuto/.....)",
- "IlTrovatore/1.2 (IlTrovatore; http://www.iltrovatore.it/bot.html; bot@iltrovatore.it)",
- "ImageWalker/2.0 (www.bdbrandprotect.com)",
- "IncyWincy data gatherer(webmaster@loopimprovements.com",
- "IncyWincy page crawler(webmaster@loopimprovements.com",
- "IncyWincy(http://www.look.com)",
- "IncyWincy(http://www.loopimprovements.com/robot.html)",
- "IncyWincy/2.1(loopimprovements.com/robot.html)",
- "IndexTheWeb.com Crawler7",
- "Industry Program 1.0.x",
- "Inet library",
- "info@pubblisito.com- (http://www.pubblisito.com) il Sud dei Motori di Ricerca",
- "InfoFly/1.0 (http://www.versions-project.org/)",
- "INFOMINE/8.0 Adders",
- "INFOMINE/8.0 RemoteServices",
- "INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)",
- "InfoNaviRobot(F107)",
- "InfoSeek Sidewinder/0.9",
- "InfoSeek Sidewinder/1.0A",
- "InfoSeek Sidewinder/1.1A",
- "Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)",
- "Infoseek SideWinder/2.0B (Linux 2.4 i686)",
- "INGRID/3.0 MT (webcrawler@NOSPAMexperimental.net; http://webmaster.ilse.nl/jsp/webmaster.jsp)",
- "Inktomi Search",
- "InnerpriseBot/1.0 (http://www.innerprise.com/)",
- "Insitor.com search and find world wide!",
- "Insitornaut",
- "Internet Ninja x.0",
- "InternetArchive/0.8-dev(Nutch;http://lucene.apache.org/nutch/bot.html;nutch-agent@lucene.apache",
- "InternetSeer.com",
- "IOI/2.0 (ISC Open Index crawler; http://index.isc.org/; bot@index.isc.org)",
- "IPiumBot laurion(dot)com",
- "IpselonBot/0.xx-beta (Ipselon; http://www.ipselon.com; ipselonbot@ipselon.com)",
- "IRLbot/1.0 ( http://irl.cs.tamu.edu/crawler)",
- "IRLbot/3.0 (compatible; MSIE 6.0; http://irl.cs.tamu.edu/crawler/)",
- "ISC Systems iRc Search 2.1",
- "IUPUI Research Bot v 1.9a",
- "IWAgent/ 1.0 - www.brandprotect.com",
- "Jabot/6.x (http://odin.ingrid.org/)",
- "Jabot/7.x.x (http://odin.ingrid.org/)",
- "Jack",
- "Jambot/0.1.x (Jambot; http://www.jambot.com/blog; crawler@jambot.com)",
- "Jambot/0.2.1 (Jambot; http://www.jambot.com/blog/static.php?page=webmaster-robot; crawler@jambot.com)",
- "Jayde Crawler. http://www.jayde.com",
- "Jetbot/1.0",
- "JobSpider_BA/1.1",
- "Jyxobot/x",
- "k2spider",
- "KAIST AITrc Crawler",
- "KakleBot - www.kakle.com/0.1 (KakleBot - www.kakle.com; http:// www.kakle.com/bot.html; support@kakle.com)",
- "kalooga/kalooga-4.0-dev-datahouse (Kalooga; http://www.kalooga.com; info@kalooga.com)",
- "kalooga/KaloogaBot (Kalooga; http://www.kalooga.com/info.html?page=crawler; crawler@kalooga.com)",
- "Kenjin Spider",
- "Kevin http://dznet.com/kevin/",
- "Kevin http://websitealert.net/kevin/",
- "KE_1.0/2.0 libwww/5.2.8",
- "KFSW-Bot (Version: 1.01 powered by KFSW www.kfsw.de)",
- "kinja-imagebot (http://www.kinja.com/)",
- "kinjabot (http://www.kinja.com)",
- "KIT-Fireball/2.0",
- "KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)",
- "KnowItAll(knowitall@cs.washington.edu)",
- "Knowledge.com/0.x",
- "Krugle/Krugle,Nutch/0.8+ (Krugle web crawler; http://www.krugle.com/crawler/info.html; webcrawler@krugle.com)",
- "KSbot/1.0 (KnowledgeStorm crawler; http://www.knowledgestorm.com/resources/content/crawler/index.html; crawleradmin@knowledgestorm.com)",
- "kuloko-bot/0.x",
- "kulokobot www.kuloko.com kuloko@backweave.com",
- "kulturarw3/0.1",
- "LapozzBot/1.4 ( http://robot.lapozz.com)",
- "LapozzBot/1.5 (+http://robot.lapozz.hu)",
- "larbin (samualt9@bigfoot.com)",
- "LARBIN-EXPERIMENTAL (efp@gmx.net)",
- "larbin_2.1.1 larbin2.1.1@somewhere.com",
- "larbin_2.2.0 (crawl@compete.com)",
- "larbin_2.2.1_de_Viennot (Laurent.Viennot@inria.fr)",
- "larbin_2.2.2 (sugayama@lab7.kuis.kyoto-u.ac.jp)",
- "larbin_2.2.2_guillaume (guillaume@liafa.jussieu.fr)",
- "larbin_2.6.0 (larbin2.6.0@unspecified.mail)",
- "larbin_2.6.1 (larbin2.6.1@unspecified.mail)",
- "larbin_2.6.2 (hamasaki@grad.nii.ac.jp)",
- "larbin_2.6.2 (larbin2.6.2@unspecified.mail)",
- "larbin_2.6.2 (listonATccDOTgatechDOTedu)",
- "larbin_2.6.2 (pimenas@systems.tuc.gr)",
- "larbin_2.6.2 (tom@lemurconsulting.com)",
- "larbin_2.6.2 (vitalbox1@hotmail.com)",
- "larbin_2.6.3 (ltaa_web_crawler@groupes.epfl.ch)",
- "larbin_2.6.3 (wgao@genieknows.com)",
- "larbin_2.6.3_for_(http://cosco.hiit.fi/search/) tsilande@hiit.fi",
- "larbin_2.6_basileocaml (basile.starynkevitch@cea.fr)",
- "larbin_devel (http://pauillac.inria.fr/~ailleret/prog/larbin/)",
- "lawinfo-crawler/Nutch-0.9-dev (Crawler for lawinfo.com pages; http://www.lawinfo.com; webmaster@lawinfo.com)",
- "LECodeChecker/3.0 libgetdoc/1.0",
- "LEIA/2.90",
- "LEIA/3.01pr (LEIAcrawler; [SNIP])",
- "LetsCrawl.com/1.0 +http://letscrawl.com/",
- "LexiBot/1.00",
- "Libby_1.1/libwww-perl/5.47",
- "LibertyW (+http://www.lw01.com)",
- "libWeb/clsHTTP -- hiongun@kt.co.kr",
- "libwww-perl/5.41",
- "libwww-perl/5.45",
- "libwww-perl/5.48",
- "libwww-perl/5.52 FP/2.1",
- "libwww-perl/5.52 FP/4.0",
- "libwww-perl/5.65",
- "libwww-perl/5.800",
- "libwww/5.3.2",
- "LijitSpider/Nutch-0.9 (Reports crawler; http://www.lijit.com/; info(a)lijit(d)com)",
- "Lincoln State Web Browser",
- "linkbot",
- "linknzbot",
- "Links 2.0 (http://gossamer-threads.com/scripts/links/)",
- "Links SQL (http://gossamer-threads.com/scripts/links-sql/)",
- "LinkScan/11.0beta2 UnixShareware robot from Elsop.com (used by Indiafocus/Indiainfo)",
- "LinkScan/9.0g Unix",
- "LinkScan/x.x Unix",
- "LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw; http://wkd.iis.sinica.edu.tw/LiveTrans/)",
- "Llaut/1.0 (http://mnm.uib.es/~gallir/llaut/bot.html)",
- "LMQueueBot/0.2",
- "lmspider (lmspider@scansoft.com)",
- "LNSpiderguy",
- "LocalBot/1.0 ( http://www.localbot.co.uk/)",
- "LocalcomBot/1.2.x ( http://www.local.com/bot.htm)",
- "Lockstep Spider/1.0",
- "Look.com",
- "Lovel as 1.0 ( +http://www.everatom.com)",
- "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU; http://www.lti.cs.cmu.edu; changkuk at cmu dot edu)",
- "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch; http://www.lemurproject.org; mhoy@cs.cmu.edu)",
- "lwp-trivial/1.32",
- "lwp-trivial/1.34",
- "lwp-trivial/1.34",
- "LWP::Simple/5.22",
- "LWP::Simple/5.36",
- "LWP::Simple/5.48",
- "LWP::Simple/5.50",
- "LWP::Simple/5.51",
- "LWP::Simple/5.53",
- "LWP::Simple/5.63",
- "LWP::Simple/5.803",
- "Lycos_Spider_(modspider)",
- "Lycos_Spider_(T-Rex)",
- "Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (human-guided@lerly.net)",
- "Mac Finder 1.0.xx",
- "Mackster( http://www.ukwizz.com )",
- "Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; http://mahiti.com ; mahiti.com)",
- "Mail.Ru/1.0",
- "mailto:webcraft@bea.com",
- "mammoth/1.0 ( http://www.sli-systems.com/)",
- "MantraAgent",
- "MapoftheInternet.com ( http://MapoftheInternet.com)",
- "Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)",
- "Marketwave Hit List",
- "Martini",
- "Marvin v0.3",
- "MaSagool/1.0 (MaSagool; http://sagool.jp/; info@sagool.jp)",
- "MasterSeek",
- "Mata Hari/2.00 ",
- "Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)",
- "maxomobot/dev-20051201 (maxomo;; maxomobot@maxomo.com)",
- "MDbot/1.0 (+http://www.megadownload.net/bot.html)",
- "MediaCrawler-1.0 (Experimental)",
- "Mediapartners-Google/2.1 ( http://www.googlebot.com/bot.html)",
- "MediaSearch/0.1",
- "MegaSheep v1.0 (www.searchuk.com internet sheep)",
- "Megite2.0 (http://www.megite.com)",
- "Mercator-1.x",
- "Mercator-2.0",
- "Mercator-Scrub-1.1",
- "Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine; http://www.metaeuro.com; crawler at metaeuro dot com)",
- "MetaGer-LinkChecker",
- "MetagerBot/0.8-dev (MetagerBot; http://metager.de; )",
- "MetaGer_PreChecker0.1",
- "Metaspinner/0.01 (Metaspinner; http://www.meta-spinner.de/; support@meta-spinner.de/)",
- "metatagsdir/0.7 (+http://metatagsdir.com/directory/)",
- "MFC Foundation Class Library 4.0",
- "MicroBaz",
- "Microsoft Small Business Indexer",
- "Microsoft URL Control - 6.00.8xxx",
- "MicrosoftPrototypeCrawler (How's my crawling? mailto:newbiecrawler@hotmail.com)",
- "Missauga Locate 1.0.0",
- "Missigua Locator 1.9",
- "Missouri College Browse",
- "Misterbot-Nutch/0.7.1 (Misterbot-Nutch; http://www.misterbot.fr; admin@misterbot.fr)",
- "Miva (AlgoFeedback@miva.com)",
- "Mizzu Labs 2.2",
- "MJ12bot/vx.x.x (http://majestic12.co.uk/bot.php?+)",
- "MJ12bot/vx.x.x (http://www.majestic12.co.uk/projects/dsearch/mj12bot.php)",
- "MJBot (SEO assessment)",
- "MLBot (www.metadatalabs.com)",
- "MnogoSearch/3.2.xx",
- "Mo College 1.9",
- "moget/x.x (moget@goo.ne.jp)",
- "mogimogi/1.0",
- "MojeekBot/0.x (archi; http://www.mojeek.com/bot.html)",
- "Morris - Mixcat Crawler ( http://mixcat.com)",
- "Mouse-House/7.4 (spider_monkey spider info at www.mobrien.com/sm.shtml)",
- "mozDex/0.xx-dev (mozDex; http://www.mozdex.com/en/bot.html; spider@mozdex.com)",
- "Mozilla (Mozilla@somewhere.com)",
- "Mozilla 4.0(compatible; BotSeer/1.0; +http://botseer.ist.psu.edu)",
- "Mozilla/2.0 (compatible; Ask Jeeves)",
- "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)",
- "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml) ",
- "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://sp.ask.com/docs/about/tech_crawling.html)",
- "Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)",
- "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)",
- "Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)",
- "Mozilla/3.0 (compatible; Fluffy the spider; http://www.searchhippo.com/; info@searchhippo.com)",
- "Mozilla/3.0 (compatible; Indy Library)",
- "Mozilla/3.0 (compatible; MuscatFerret/1.5.4; claude@euroferret.com)",
- "Mozilla/3.0 (compatible; MuscatFerret/1.5; olly@muscat.co.uk)",
- "Mozilla/3.0 (compatible; MuscatFerret/1.6.x; claude@euroferret.com)",
- "Mozilla/3.0 (compatible; scan4mail (advanced version) http://www.peterspages.net/?scan4mail)",
- "Mozilla/3.0 (compatible; ScollSpider; http://www.webwobot.com)",
- "Mozilla/3.0 (compatible; Webinator-DEV01.home.iprospect.com/2.56)",
- "Mozilla/3.0 (compatible; Webinator-indexer.cyberalert.com/2.56)",
- "Mozilla/3.0 (INGRID/3.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
- "Mozilla/3.0 (Slurp.so/Goo; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Mozilla/3.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Mozilla/3.0 (Vagabondo/1.1 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
- "Mozilla/3.0 (Vagabondo/1.x MT; webagent@wise-guys.nl; http://webagent.wise-guys.nl/)",
- "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
- "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
- "Mozilla/3.01 (Compatible; Links2Go Similarity Engine)",
- "Mozilla/4.0",
- "Mozilla/4.0 (agadine3.0) www.agada.de",
- "Mozilla/4.0 (compatible: AstraSpider V.2.1 : astrafind.com)",
- "Mozilla/4.0 (compatible; Vagabondo/2.2; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
- "Mozilla/4.0 (compatible; Vagabondo/4.0Beta; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
- "Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)",
- "Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)",
- "Mozilla/4.0 (compatible; ChristCrawler.com ChristCrawler@ChristCENTRAL.com)",
- "Mozilla/4.0 (compatible; crawlx, crawler@trd.overture.com)",
- "Mozilla/4.0 (compatible; DAUMOA-video; +http://ws.daum.net/aboutkr.html)",
- "Mozilla/4.0 (compatible; FastCrawler3 support-fastcrawler3@fast.no)",
- "Mozilla/4.0 (compatible; FDSE robot)",
- "Mozilla/4.0 (compatible; GPU p2p crawler http://gpu.sourceforge.net/search_engine.php)",
- "Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with http://grub.org)",
- "Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with http://grub.org)",
- "Mozilla/4.0 (compatible; grub-client-2.x)",
- "Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)",
- "Mozilla/4.0 (compatible; MSIE 4.01; Vonna.com b o t)",
- "Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer OrangeBot-Mobile 2008.0 (mobilesearch.support@orange-ftgroup.com)",
- "Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive",
- "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (samualt9@bigfoot.com)",
- "Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience: http://www.netnose.com)",
- "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5",
- "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)",
- "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6",
- "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent",
- "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com)",
- "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com/; http://www.galaxy.com/info/crawler.html)",
- "Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)",
- "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)",
- "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)",
- "Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +http://www.singingfish.com/help/spider.html; webmaster@singingfish.com); SpiderThread Revision: 3.10",
- "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]",
- "Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +http://www.targetgroups.net/TargetSeek.html)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st; http://tuezilla.de/t_st-odp-entries-agent.html)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test; http://tuezilla.de/test-odp-links-agent.html)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0 http://www.cs.washington.edu/research/networking/websys/)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)",
- "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)",
- "Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +http://ws.daum.net/aboutkr.html)",
- "Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)",
- "Mozilla/4.0 (compatible; NaverBot/1.0; http://help.naver.com/delete_main.asp)",
- "Mozilla/4.0 (compatible; SpeedySpider; www.entireweb.com)",
- "Mozilla/4.0 (compatible; www.galaxy.com)",
- "Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)",
- "Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)",
- "Mozilla/4.0 (JemmaTheTourist;http://www.activtourist.com)",
- "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
- "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
- "Mozilla/4.0 (Mozilla; http://www.mozilla.org/docs/en/bot.html; master@mozilla.com)",
- "Mozilla/4.0 (Sleek Spider/1.2)",
- "Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot; http://www.furl.net; wn.furlbot@looksmart.net)",
- "Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
- "Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
- "Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
- "Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
- "Mozilla/4.0 efp@gmx.net",
- "Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)",
- "Mozilla/4.0(compatible; Zealbot 1.0)",
- "Mozilla/4.04 (compatible; Dulance bot; +http://www.dulance.com/bot.jsp)",
- "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8",
- "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2",
- "Mozilla/4.6 [en] (http://www.cnet.com/)",
- "Mozilla/4.7",
- "Mozilla/4.7 (compatible; http://eidetica.com/spider)",
- "Mozilla/4.7 (compatible; Intelliseek; http://www.intelliseek.com)",
- "Mozilla/4.7 (compatible; Whizbang)",
- "Mozilla/4.7 (compatible; WhizBang; http://www.whizbang.com/crawler)",
- "Mozilla/4.7 [en](BecomeBot@exava.com)",
- "Mozilla/4.7 [en](Exabot@exava.com)",
- "Mozilla/4.72 [en] (BACS http://www.ba.be)",
- "Mozilla/5.0",
- "Mozilla/5.0 (+http://www.eurekster.com/mammoth) Mammoth/0.1",
- "Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1",
- "Mozilla/5.0 (Clustered-Search-Bot/1.0; support@clush.com; http://www.clush.com/)",
- "Mozilla/5.0 (compatible; +http://www.evri.com/evrinid)",
- "Mozilla/5.0 (compatible; 008/0.83; http://www.80legs.com/spider.html;) Gecko/2008032620",
- "Mozilla/5.0 (compatible; Abonti/0.8 - http://www.abonti.com)",
- "Mozilla/5.0 (compatible; aiHitBot/1.0; +http://www.aihit.com/)",
- "Mozilla/5.0 (compatible; AnsearchBot/1.x; +http://www.ansearch.com.au/)",
- "Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +http://www.loc.gov/minerva/crawl.html)",
- "Mozilla/5.0 (compatible; archive.org_bot/1.13.1x http://crawler.archive.org)",
- "Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 http://crawler.archive.org) Hurricane Katrina",
- "Mozilla/5.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)",
- "Mozilla/5.0 (compatible; BecomeBot/1.23; http://www.become.com/webmasters.html)",
- "Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible; http://www.become.com/webmasters.html)",
- "Mozilla/5.0 (compatible; BecomeBot/2.0beta; http://www.become.com/webmasters.html)",
- "Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible; http://www.become.com/site_owners.html)",
- "Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +http://www.become.co.jp/site_owners.html)",
- "Mozilla/5.0 (compatible; BlogRefsBot/0.1; http://www.blogrefs.com/about/bloggers)",
- "Mozilla/5.0 (compatible; Bot; +http://pressemitteilung.ws/spamfilter",
- "Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +http://www.buzzrankingbot.com/)",
- "Mozilla/5.0 (compatible; Charlotte/1.0b; charlotte@betaspider.com)",
- "Mozilla/5.0 (compatible; Charlotte/1.0b; http://www.searchme.com/support/)",
- "Mozilla/5.0 (compatible; Crawling jpeg; http://www.yama.info.waseda.ac.jp)",
- "Mozilla/5.0 (compatible; de/1.13.2 +http://www.de.com)",
- "Mozilla/5.0 (compatible; Diffbot/0.1; +http://www.diffbot.com)",
- "Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +http://www.dnsdigger.com)",
- "Mozilla/5.0 (compatible; DNS-Digger/1.0; +http://www.dnsdigger.com)",
- "Mozilla/5.0 (compatible; EARTHCOM.info/2.01; http://www.earthcom.info)",
- "Mozilla/5.0 (compatible; EARTHCOM/2.2; +http://enter4u.eu)",
- "Mozilla/5.0 (compatible; Exabot Test/3.0; +http://www.exabot.com/go/robot)",
- "Mozilla/5.0 (compatible; FatBot 2.0; http://www.thefind.com/main/CrawlerFAQs.fhtml)",
- "Mozilla/5.0 (compatible; Galbot/1.0; +http://www.galbot.com/bot.html)",
- "mozilla/5.0 (compatible; genevabot http://www.healthdash.com)",
- "Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
- "mozilla/5.0 (compatible; heritrix/1.0.4 http://innovationblog.com)",
- "Mozilla/5.0 (compatible; heritrix/1.10.2 +http://i.stanford.edu/)",
- "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://newstin.com/)",
- "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com)",
- "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com) [email:paul@page-store.com]",
- "mozilla/5.0 (compatible; heritrix/1.3.0 http://archive.crawler.org)",
- "Mozilla/5.0 (compatible; heritrix/1.4.0 +http://www.chepi.net)",
- "Mozilla/5.0 (compatible; heritrix/1.4t http://www.truveo.com/)",
- "Mozilla/5.0 (compatible; heritrix/1.5.0 http://www.l3s.de/~kohlschuetter/projects/crawling/)",
- "Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921 http://pandora.nla.gov.au/crawl.html)",
- "Mozilla/5.0 (compatible; heritrix/1.6.0 http://www.worio.com/)",
- "Mozilla/5.0 (compatible; heritrix/1.7.0 +http://www.greaterera.com/)",
- "Mozilla/5.0 (compatible; heritrix/1.x.x +http://www.accelobot.com)",
- "Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +http://www.aol.com)",
- "Mozilla/5.0 (compatible; Hermit Search. Com; +http://www.hermitsearch.com)",
- "Mozilla/5.0 (compatible; HyperixScoop/1.3; +http://www.hyperix.com)",
- "Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html)",
- "Mozilla/5.0 (compatible; InterseekWeb/3.x)",
- "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)",
- "Mozilla/5.0 (compatible; LemSpider 0.1)",
- "Mozilla/5.0 (compatible; MojeekBot/2.0; http://www.mojeek.com/bot.html)",
- "Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network; crawler_admin@podtech.net)",
- "Mozilla/5.0 (compatible; OnetSzukaj/5.0; http://szukaj.onet.pl)",
- "Mozilla/5.0 (compatible; PalmeraBot; http://www.links24h.com/help/palmera) Version 0.001",
- "Mozilla/5.0 (compatible; pogodak.ba/3.x)",
- "Mozilla/5.0 (compatible; Pogodak.hr/3.1)",
- "Mozilla/5.0 (compatible; PWeBot/3.1; http://www.programacionweb.net/robot.php)",
- "Mozilla/5.0 (compatible; Quantcastbot/1.0; www.quantcast.com)",
- "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)",
- "Mozilla/5.0 (compatible; Scrubby/2.2; http://www.scrubtheweb.com/)",
- "Mozilla/5.0 (compatible; ShunixBot/1.x.x +http://www.shunix.com/robot.htm)",
- "Mozilla/5.0 (compatible; ShunixBot/1.x; http://www.shunix.com/bot.htm)",
- "Mozilla/5.0 (compatible; SkreemRBot +http://skreemr.com)",
- "Mozilla/5.0 (compatible; SummizeBot +http://www.summize.com)",
- "Mozilla/5.0 (compatible; Synoobot/0.9; http://www.synoo.com/search/bot.html)",
- "Mozilla/5.0 (compatible; Theophrastus/x.x; http://users.cs.cf.ac.uk/N.A.Smith/theophrastus.php)",
- "Mozilla/5.0 (compatible; TridentSpider/3.1)",
- "Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
- "Mozilla/5.0 (compatible; Webduniabot/1.0; +http://search.webdunia.com/bot.aspx)",
- "Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com)",
- "Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;http://www.wowlemmings.com/kathune.html)",
- "Mozilla/5.0 (compatible; Yahoo! DE Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
- "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
- "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
- "Mozilla/5.0 (compatible; Yoono; http://www.yoono.com/)",
- "Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )",
- "Mozilla/5.0 (compatible; Zenbot/1.3; +http://zen.co.za/webmasters/)",
- "Mozilla/5.0 (compatible; zermelo +http://www.powerset.com) [email:paul@page-store.com,crawl@powerset.com]",
- "Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +http://www.archive-it.org)",
- "Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +http://pandora.nla.gov.au/crawl.html)",
- "Mozilla/5.0 (compatible;MAINSEEK_BOT)",
- "Mozilla/5.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Mozilla/5.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)",
- "Mozilla/5.0 (Version: xxxx Type:xx)",
- "Mozilla/5.0 (wgao@genieknows.com)",
- "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact: crawler_at_dataalchemy.com",
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
- "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health",
- "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@healthline.com",
- "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/20021130",
- "Mozilla/5.0 URL-Spider",
- "Mozilla/5.0 usww.com-Spider-for-w8.net",
- "Mozilla/5.0 wgao@genieknows.com",
- "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
- "MQbot metaquerier.cs.uiuc.edu/crawler",
- "MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://falcon.cs.uiuc.edu; mqbot@cs.uiuc.edu)",
- "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)",
- "msnbot-Products/1.0 (+http://search.msn.com/msnbot.htm)",
- "MSNBOT/0.xx (http://search.msn.com/msnbot.htm)",
- "msnbot/x.xx ( http://search.msn.com/msnbot.htm)",
- "MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)",
- "MSNPTC/1.0",
- "MSRBOT (http://research.microsoft.com/research/sv/msrbot)",
- "multicrawler ( http://sw.deri.org/2006/04/multicrawler/robots.html)",
- "MultiText/0.1",
- "MusicWalker2.0 ( http://www.somusical.com)",
- "MVAClient",
- "Mylinea.com Crawler 2.0",
- "Naamah 1.0.1/Blogbot (http://blogbot.de/)",
- "Naamah 1.0a/Blogbot (http://blogbot.de/)",
- "NABOT/5.0",
- "nabot_1.0",
- "NameOfAgent (CMS Spider)",
- "NASA Search 1.0",
- "NationalDirectory-WebSpider/1.3",
- "NationalDirectoryAddURL/1.0",
- "NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
- "NaverBot_dloader/1.5",
- "NavissoBot",
- "NavissoBot/1.7 (+http://navisso.com/)",
- "NCSA Beta 1 (http://vias.ncsa.uiuc.edu/viasarchivinginformation.html)",
- "Nebullabot/2.2 (http://bot.nebulla.info)",
- "NEC Research Agent -- compuman at research.nj.nec.com",
- "Net-Seekr Bot/Net-Seekr Bot V1 (http://www.net-seekr.com)",
- "NetinfoBot/1.0 (http://netinfo.bg/netinfobot.html)",
- "NetLookout/2.24",
- "Netluchs/0.8-dev ( ; http://www.netluchs.de/; ___don't___spam_me_@netluchs.de)",
- "NetNoseCrawler/v1.0",
- "Netprospector JavaCrawler",
- "NetResearchServer(http://www.look.com)",
- "NetResearchServer/x.x(loopimprovements.com/robot.html)",
- "NetSeer/Nutch-0.9 (NetSeer Crawler; http://www.netseer.com; crawler@netseer.com)",
- "NetSprint -- 2.0",
- "NetWhatCrawler/0.06-dev (NetWhatCrawler from NetWhat.com; http://www.netwhat.com; support@netwhat.com)",
- "NetZippy",
- "NextGenSearchBot 1 (for information visit http://www.eliyon.com/NextGenSearchBot)",
- "NextopiaBOT (+http://www.nextopia.com) distributed crawler client beta v0.x",
- "NG-Search/0.90 (NG-SearchBot; http://www.ng-search.com; )",
- "NG/1.0",
- "NG/4.0.1229",
- "NITLE Blog Spider/0.01",
- "Noago Spider",
- "Nokia-WAPToolkit/1.2 googlebot(at)googlebot.com",
- "Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2; http://help.yahoo.com/help/us/ysearch/crawling/crawling-01.html)",
- "NokodoBot/1.x (+http://nokodo.com/bot.htm)",
- "Norbert the Spider(Burf.com)",
- "noxtrumbot/1.0 (crawler@noxtrum.com)",
- "noyona_0_1",
- "NP/0.1 (NP; http://www.nameprotect.com; npbot@nameprotect.com)",
- "NPBot (http://www.nameprotect.com/botinfo.html)",
- "NPBot-1/2.0",
- "Nsauditor/1.x",
- "nsyght.com/Nutch-1.0-dev (nsyght.com; Nsyght.com)",
- "nsyght.com/Nutch-x.x (nsyght.com; search.nsyght.com)",
- "nttdirectory_robot/0.9 (super-robot@super.navi.ocn.ne.jp)",
- "nuSearch Spider www.nusearch.com (compatible; MSIE 4.01)",
- "NuSearch Spider (compatible; MSIE 6.0)",
- "NuSearch Spider www.nusearch.com",
- "Nutch",
- "Nutch crawler/Nutch-0.9 (picapage.com; admin@picapage.com)",
- "Nutch/Nutch-0.9 (Eurobot; http://www.ayell.eu )",
- "NutchCVS/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
- "NutchCVS/0.7.1 (Nutch running at UW; http://www.nutch.org/docs/en/bot.html; sycrawl@cs.washington.edu)",
- "NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.; http://lucene.apache.org/nutch/bot.html; ec2test at lucene.com)",
- "NutchOrg/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
- "nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)",
- "NutchVinegarCrawl/Nutch-0.8.1 (Vinegar; http://www.cs.washington.edu; eytanadar at gmail dot com)",
- "obidos-bot (just looking for books.)",
- "ObjectsSearch/0.01-dev (ObjectsSearch;http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
- "ObjectsSearch/0.0x (ObjectsSearch; http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
- "oBot ((compatible;Win32))",
- "Ocelli/1.x (http://www.globalspec.com/Ocelli)",
- "Octora Beta - www.octora.com",
- "Octora Beta Bot - www.octora.com",
- "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Internet CategorizerOmniExplorer http://www.omni-explorer.com/ car & shopping search (64.62.175.xxx)",
- "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Job Crawler",
- "OmniExplorer_Bot/1.1x (+http://www.omni-explorer.com) Torrent Crawler",
- "OmniExplorer_Bot/x.xx (+http://www.omni-explorer.com) WorldIndexer",
- "Onet.pl SA- http://szukaj.onet.pl",
- "OntoSpider/1.0 libwww-perl/5.65",
- "OOZBOT/0.20 ( http://www.setooz.com/oozbot.html ; agentname at setooz dot_com )",
- "OpenAcoon v4.0.x (www.openacoon.de)",
- "Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
- "Openfind data gatherer- Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
- "Openfind Robot/1.1A2",
- "OpenISearch/1.x (www.openisearch.com)",
- "OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)",
- "OpenTextSiteCrawler/2.9.2",
- "OpenWebSpider/0.x.x (http://www.openwebspider.org)",
- "OpenWebSpider/x",
- "OpidooBOT (larbin2.6.3@unspecified.mail)",
- "Oracle Ultra Search",
- "OrangeSpider",
- "Orbiter/T-2.0 (+http://www.dailyorbit.com/bot.htm)",
- "Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
- "ozelot/2.7.3 (Search engine indexer; www.flying-cat.de/ozelot; ozelot@flying-cat.de)",
- "PADLibrary Spider",
- "PageBitesHyperBot/600 (http://www.pagebites.com/)",
- "Pagebull http://www.pagebull.com/",
- "page_verifier (http://www.securecomputing.com/goto/pv)",
- "parallelContextFocusCrawler1.1parallelContextFocusCrawler1.1",
- "ParaSite/1.0b (http://www.ianett.com/parasite/)",
- "Patwebbot (http://www.herz-power.de/technik.html)",
- "PBrowse 1.4b",
- "pd02_1.0.0 pd02_1.0.0@dzimi@post.sk",
- "PEERbot www.peerbot.com",
- "PEval 1.4b",
- "PicoSearch/1.0",
- "Piffany_Web_Scraper_v0.x",
- "Piffany_Web_Spider_v0.x",
- "pipeLiner/0.3a (PipeLine Spider;http://www.pipeline-search.com/webmaster.html; webmaster'at'pipeline-search.com)",
- "pipeLiner/0.xx (PipeLine Spider; http://www.pipeline-search.com/webmaster.html)",
- "Pita",
- "PJspider/3.0 (pjspider@portaljuice.com; http://www.portaljuice.com)",
- "PlagiarBot/1.0",
- "PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://www.pluck.com; 1 subscribers)",
- "Pluggd/Nutch-0.9 (automated crawler http://www.pluggd.com;support at pluggd dot com)",
- "Poirot",
- "polybot 1.0 (http://cis.poly.edu/polybot/)",
- "Pompos/1.x http://dir.com/pompos.html",
- "Pompos/1.x pompos@iliad.fr",
- "Popdexter/1.0",
- "Port Huron Labs",
- "PortalBSpider/2.0 (spider@portalb.com)",
- "potbot 1.0",
- "PRCrawler/Nutch-0.9 (data mining development project; crawler@projectrialto.com)",
- "PrivacyFinder Cache Bot v1.0",
- "PrivacyFinder/1.1",
- "Production Bot 0116B",
- "Production Bot 2016B",
- "Production Bot DOT 3016B",
- "Program Shareware 1.0.2",
- "Project XP5 [2.03.07-111203]",
- "PROve AnswerBot 4.0",
- "ProWebGuide Link Checker (http://www.prowebguide.com)",
- "psbot/0.1 (+http://www.picsearch.com/bot.html)",
- "PSurf15a 11",
- "PSurf15a 51",
- "PSurf15a VA",
- "psycheclone",
- "PubCrawl (pubcrawl.stanford.edu)",
- "pulseBot (pulse Web Miner)",
- "PWeBot/1.2 Inspector (http://www.programacionweb.net/robot.php)",
- "PycURL",
- "Python-urllib/1.1x",
- "Python-urllib/2.0a1",
- "Qango.com Web Directory (http://www.qango.com/)",
- "QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility; http://nlp.uned.es/qeavis",
- "QPCreep Test Rig ( We are not indexing- just testing )",
- "QuepasaCreep ( crawler@quepasacorp.com )",
- "QuepasaCreep v0.9.1x",
- "QueryN Metasearch",
- "QweeryBot/3.01 ( http://qweerybot.qweery.nl)",
- "Qweery_robot.txt_CheckBot/3.01 (http://qweerybot.qweery.com)",
- "R6_CommentReader_(www.radian6.com/crawler)",
- "R6_FeedFetcher_(www.radian6.com/crawler)",
- "rabaz (rabaz at gigabaz dot com)",
- "RaBot/1.0 Agent-admin/phortse@hanmail.net",
- "ramBot xtreme x.x",
- "RAMPyBot - www.giveRAMP.com/0.1 (RAMPyBot - www.giveRAMP.com; http://www.giveramp.com/bot.html; support@giveRAMP.com)",
- "RAMPyBot/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
- "Rankivabot/3.2 (www.rankiva.com; 3.2; vzmxikn)",
- "Rational SiteCheck (Windows NT)",
- "Reaper [2.03.10-031204] (http://www.sitesearch.ca/reaper/)",
- "Reaper/2.0x (+http://www.sitesearch.ca/reaper)",
- "RedCarpet/1.2 (http://www.redcarpet-inc.com/robots.html)",
- "RedCell/0.1 (InfoSec Search Bot (Coming Soon); http://www.telegenetic.net/bot.html; lhall@telegenetic.net)",
- "RedCell/0.1 (RedCell; telegenetic.net/bot.html; lhall_at_telegenetic.net)",
- "RedKernel WWW-Spider 2/0 (+http://www-spider.redkernel-softwares.com/)",
- "rico/0.1",
- "RixBot (http://babelserver.org/rix)",
- "RoboCrawl (http://www.canadiancontent.net)",
- "RoboCrawl (www.canadiancontent.net)",
- "RoboPal (http://www.findpal.com/)",
- "Robot/www.pj-search.com",
- "Robot: NutchCrawler- Owner: wdavies@acm.org",
- "Robot@SuperSnooper.Com",
- "Robozilla/1.0",
- "Rotondo/3.1 libwww/5.3.1",
- "RRC (crawler_admin@bigfoot.com)",
- "RSSMicro.com RSS/Atom Feed Robot",
- "RSurf15a 41",
- "RSurf15a 51",
- "RSurf15a 81",
- "RufusBot (Rufus Web Miner;",
- "RufusBot (Rufus Web Miner; http://www.webaroo.com/rooSiteOwners.html)",
- "sait/Nutch-0.9 (SAIT Research; http://www.samsung.com)",
- "SandCrawler - Compatibility Testing",
- "SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
- "SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
- "savvybot/0.2",
- "SBIder/0.7 (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
- "SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
- "ScanWeb",
- "ScholarUniverse/0.8 (Nutch;+http://scholaruniverse.com/bot.jsp; fetch-agent@scholaruniverse.com)",
- "schwarzmann.biz-Spider_for_paddel.org+(http://www.innerprise.net/usp-spider.asp)",
- "ScollSpider/2.0 (+http://www.webwobot.com/ScollSpider.php)",
- "Scooter-3.0.EU",
- "Scooter-3.0.FS",
- "Scooter-3.0.HD",
- "Scooter-3.0.VNS",
- "Scooter-3.0QI",
- "Scooter-3.2",
- "Scooter-3.2.BT",
- "Scooter-3.2.DIL",
- "Scooter-3.2.EX",
- "Scooter-3.2.JT",
- "Scooter-3.2.NIV",
- "Scooter-3.2.SF0",
- "Scooter-3.2.snippet",
- "Scooter-3.3dev",
- "Scooter-ARS-1.1",
- "Scooter-ARS-1.1-ih",
- "scooter-venus-3.0.vns",
- "Scooter-W3-1.0",
- "Scooter-W3.1.2",
- "Scooter/1.0",
- "Scooter/1.0 scooter@pa.dec.com",
- "Scooter/1.1 (custom)",
- "Scooter/2.0 G.R.A.B. V1.1.0",
- "Scooter/2.0 G.R.A.B. X2.0",
- "Scooter/3.3",
- "Scooter/3.3.QA.pczukor",
- "Scooter/3.3.vscooter",
- "Scooter/3.3_SF",
- "Scooter2_Mercator_x-x.0",
- "Scooter_bh0-3.0.3",
- "Scooter_trk3-3.0.3",
- "ScoutAbout",
- "ScoutAnt/0.1; +http://www.ant.com/what_is_ant.com/",
- "scoutmaster",
- "Scrubby/2.x (http://www.scrubtheweb.com/)",
- "Scrubby/3.0 (+http://www.scrubtheweb.com/help/technology.html)",
- "Search+",
- "Search-Engine-Studio",
- "search.ch V1.4",
- "search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch)",
- "Search/1.0 (http://www.innerprise.net/es-spider.asp)",
- "searchbot admin@google.com",
- "SearchByUsa/2 (SearchByUsa; http://www.SearchByUsa.com/bot.html; info@SearchByUsa.com)",
- "SearchdayBot",
- "SearchExpress Spider0.99",
- "SearchGuild/DMOZ/Experiment (searchguild@gmail.com)",
- "SearchGuild_DMOZ_Experiment (chris@searchguild.com)",
- "Searchit-Now Robot/2.2 (+http://www.searchit-now.co.uk)",
- "Searchmee! Spider v0.98a",
- "SearchSight/2.0 (http://SearchSight.com/)",
- "SearchSpider.com/1.1",
- "Searchspider/1.2 (SearchSpider; http://www.searchspider.com; webmaster@searchspider.com)",
- "SearchTone2.0 - IDEARE",
- "Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3",
- "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.0 (XDF)",
- "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2",
- "Seeker.lookseek.com",
- "Semager/1.1 (http://www.semager.de/blog/semager-bots/)",
- "Semager/1.x (http://www.semager.de)",
- "Sensis Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
- "Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
- "SeznamBot/1.0",
- "SeznamBot/1.0 (+http://fulltext.seznam.cz/)",
- "SeznamBot/2.0-test (+http://fulltext.sblog.cz/)",
- "ShablastBot 1.0",
- "Shim Crawler",
- "Shim-Crawler(Mozilla-compatible; http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp)",
- "ShopWiki/1.0 ( +http://www.shopwiki.com/)",
- "ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot)",
- "Shoula.com Crawler 2.0",
- "SietsCrawler/1.1 (+http://www.siets.biz)",
- "Sigram/Nutch-1.0-dev (Test agent for Nutch development; http://www.sigram.com/bot.html; bot at sigram dot com)",
- "Siigle Orumcex v.001 Turkey (http://www.siigle.com)",
- "silk/1.0",
- "silk/1.0 (+http://www.slider.com/silk.htm)/3.7",
- "Sirketcebot/v.01 (http://www.sirketce.com/bot.html)",
- "SiteSpider +(http://www.SiteSpider.com/)",
- "SiteTruth.com site rating system",
- "SiteXpert",
- "Skampy/0.9.x (http://www.skaffe.com/skampy-info.html)",
- "Skimpy/0.x (http://www.skaffe.com/skampy-info.html)",
- "Skywalker/0.1 (Skywalker; anonymous; anonymous)",
- "Slarp/0.1",
- "Slider_Search_v1-de",
- "Slurp/2.0 (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Slurp/2.0-KiteWeekly (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
- "Slurpy Verifier/1.0",
- "SlySearch (slysearch@slysearch.com)",
- "SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html",
- "SlySearch/1.x http://www.slysearch.com",
- "smartwit.com",
- "SmiffyDCMetaSpider/1.0",
- "snap.com beta crawler v0",
- "Snapbot/1.0",
- "Snapbot/1.0 (Snap Shots, +http://www.snap.com)",
- "SnykeBot/0.6 (http://www.snyke.com)",
- "SocSciBot ()",
- "SoftHypermarketFileCheckBot/1.0+(+http://www.softhypermaket.com)",
- "sogou develop spider",
- "Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
- "sogou spider",
- "Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
- "sohu agent",
- "sohu-search",
- "Sosospider+(+http://help.soso.com/webspider.htm)",
- "speedfind ramBot xtreme 8.1",
- "Speedy Spider (Beta/x.x; speedy@entireweb.com)",
- "Speedy Spider (Entireweb; Beta/1.0; http://www.entireweb.com/about/search_tech/speedyspider/)",
- "Speedy_Spider (http://www.entireweb.com)",
- "Sphere Scout&v4.0 - scout at sphere dot com",
- "Sphider",
- "Spida/0.1",
- "Spider-Sleek/2.0 (+http://search-info.com/linktous.html)",
- "spider.batsch.com",
- "spider.yellopet.com - www.yellopet.com",
- "Spider/maxbot.com admin@maxbot.com",
- "SpiderKU/0.x",
- "SpiderMan",
- "SpiderMonkey/7.0x (SpiderMonkey.ca info at http://spidermonkey.ca/sm.shtml)",
- "Spinne/2.0",
- "Spinne/2.0 med",
- "Spinne/2.0 med_AH",
- "Spock Crawler (http://www.spock.com/crawler)",
- "sportsuchmaschine.de-Robot (Version: 1.02- powered by www.sportsuchmaschine.de)",
- "sproose/0.1-alpha (sproose crawler; http://www.sproose.com/bot.html; crawler@sproose.com)",
- "Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)",
- "Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)",
- "SSurf15a 11 ",
- "StackRambler/x.x ",
- "stat statcrawler@gmail.com",
- "Steeler/1.x (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
- "Steeler/3.3 (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
- "Strategic Board Bot (+http://www.strategicboard.com)",
- "Strategic Board Bot (+http://www.strategicboard.com)",
- "Submission Spider at surfsafely.com",
- "suchbaer.de",
- "suchbaer.de (CrawlerAgent v0.103)",
- "suchbot",
- "Suchknecht.at-Robot",
- "suchpadbot/1.0 (+http://www.suchpad.de)",
- "SurferF3 1/0",
- "suzuran",
- "Swooglebot/2.0. (+http://swoogle.umbc.edu/swooglebot.htm)",
- "SWSBot-Images/1.2 http://www.smartwaresoft.com/swsbot12.html",
- "SygolBot http://www.sygol.net",
- "SynoBot",
- "Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler",
- "Szukacz/1.x",
- "Szukacz/1.x (robot; www.szukacz.pl/jakdzialarobot.html; szukacz@proszynski.pl)",
- "tags2dir.com/0.8 (+http://tags2dir.com/directory/)",
- "Tagword (http://tagword.com/dmoz_survey.php)",
- "Talkro Web-Shot/1.0 (E-mail: webshot@daumsoft.com- Home:",
- "TCDBOT/Nutch-0.8 (PhD student research;http://www.tcd.ie; mcgettrs at t c d dot IE)",
- "TECOMAC-Crawler/0.x",
- "Tecomi Bot (http://www.tecomi.com/bot.htm)",
- "Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.; http://www.netseer.com/crawler.html; crawler@netseer.com)",
- "Teoma MP",
- "teomaagent crawler-admin@teoma.com",
- "teomaagent1 [crawler-admin@teoma.com]",
- "teoma_agent1",
- "Teradex Mapper; mapper@teradex.com; http://www.teradex.com",
- "terraminds-bot/1.0 (support@terraminds.de)",
- "TerrawizBot/1.0 (+http://www.terrawiz.com/bot.html)",
- "Test spider",
- "TestCrawler/Nutch-0.9 (Testing Crawler for Research ; http://balihoo.com/index.aspx; tgautier at balihoo dot com)",
- "TheRarestParser/0.2a (http://therarestwords.com/)",
- "TheSuBot/0.1 (www.thesubot.de)",
- "thumbshots-de-Bot (Version: 1.02- powered by www.thumbshots.de)",
- "timboBot/0.9 http://www.breakingblogs.com/timbo_bot.html",
- "TinEye/1.1 (http://tineye.com/crawler.html)",
- "tivraSpider/1.0 (crawler@tivra.com)",
- "TJG/Spider",
- "Tkensaku/x.x(http://www.tkensaku.com/q.html)",
- "Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing; http://www.topodia.com/; support@topodia.com)",
- "Toutatis x-xx.x (hoppa.com)",
- "Toutatis x.x (hoppa.com)",
- "Toutatis x.x-x",
- "traazibot/testengine (+http://www.traazi.de)",
- "Trampelpfad-Spider",
- "Trampelpfad-Spider-v0.1",
- "TSurf15a 11",
- "Tumblr/1.0 RSS syndication (+http://www.tumblr.com/) (support@tumblr.com)",
- "TurnitinBot/x.x (http://www.turnitin.com/robot/crawlerinfo.html)",
- "Turnpike Emporium LinkChecker/0.1",
- "TutorGig/1.5 (+http://www.tutorgig.com/crawler)",
- "Tutorial Crawler 1.4 (http://www.tutorgig.com/crawler)",
- "Twiceler www.cuill.com/robots.html",
- "Twiceler-0.9 http://www.cuill.com/twiceler/robot.html",
- "Tycoon Agent/Nutch-1.0-dev",
- "TygoBot",
- "TygoProwler",
- "UIowaCrawler/1.0",
- "UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler; http://www.ukwizz.com/)",
- "Ultraseek",
- "Under the Rainbow 2.2",
- "UofTDB_experiment (leehyun@cs.toronto.edu)",
- "updated/0.1-alpha (updated crawler; http://www.updated.com; crawler@updated.com)",
- "updated/0.1beta (updated.com; http://www.updated.com; crawler@updated.om)",
- "Uptimebot",
- "UptimeBot(www.uptimebot.com)",
- "URL Spider Pro/x.xx (innerprise.net)",
- "urlfan-bot/1.0; +http://www.urlfan.com/site/bot/350.html",
- "URL_Spider_Pro/x.x",
- "URL_Spider_Pro/x.x+(http://www.innerprise.net/usp-spider.asp)",
- "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
- "User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 - www.SKIZZLE.com)",
- "USyd-NLP-Spider (http://www.it.usyd.edu.au/~vinci/bot.html)",
- "VadixBot",
- "Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)/1.0 Profile",
- "Vagabondo/1.x MT (webagent@wise-guys.nl)",
- "Vagabondo/2.0 MT",
- "Vagabondo/2.0 MT (webagent at wise-guys dot nl)",
- "Vagabondo/2.0 MT (webagent@NOSPAMwise-guys.nl)",
- "Vagabondo/3.0 (webagent at wise-guys dot nl)",
- "Vakes/0.01 (Vakes; http://www.vakes.com/; search@vakes.com)",
- "versus 0.2 (+http://versus.integis.ch)",
- "versus crawler eda.baykan@epfl.ch",
- "VeryGoodSearch.com.DaddyLongLegs",
- "verzamelgids.nl - Networking4all Bot/x.x",
- "Verzamelgids/2.2 (http://www.verzamelgids.nl)",
- "Vespa Crawler",
- "VisBot/2.0 (Visvo.com Crawler; http://www.visvo.com/bot.html; bot@visvo.com)",
- "Vision Research Lab image spider at vision.ece.ucsb.edu",
- "VMBot/0.x.x (VMBot; http://www.VerticalMatch.com/; vmbot@tradedot.com)",
- "Vortex/2.2 (+http://marty.anstey.ca/robots/vortex/)",
- "voyager-hc/1.0",
- "voyager/1.0",
- "voyager/2.0 (http://www.kosmix.com/html/crawler.html)",
- "VSE/1.0 (testcrawler@hotmail.com)",
- "VSE/1.0 (testcrawler@vivisimo.com)",
- "vspider",
- "vspider/3.x",
- "VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler; http://vwbot.cs.uiuc.edu;+vwbot@cs.uiuc.edu",
- "W3SiteSearch Crawler_v1.1 http://www.w3sitesearch.de",
- "wadaino.jp-crawler 0.2 (http://wadaino.jp/)",
- "Wavefire/0.8-dev (Wavefire; http://www.wavefire.com; info@wavefire.com)",
- "Waypath development crawler - info at waypath dot com",
- "Waypath Scout v2.x - info at waypath dot com",
- "Web Snooper",
- "web2express.org/Nutch-0.9-dev (leveled playing field; http://web2express.org/; info at web2express.org)",
- "WebAlta Crawler/1.2.1 (http://www.webalta.ru/bot.html)",
- "WebarooBot (Webaroo Bot;",
- "WebarooBot (Webaroo Bot; http://www.webaroo.com/rooSiteOwners.html)",
- "webbandit/4.xx.0",
- "Webclipping.com",
- "WebCompass 2.0",
- "WebCorp/1.0",
- "webcrawl.net",
- "WebFindBot(http://www.web-find.com)",
- "Webglimpse 2.xx.x (http://webglimpse.net)",
- "Weblog Attitude Diffusion 1.0",
- "webmeasurement-bot, http://rvs.informatik.uni-leipzig.de",
- "WebRankSpider/1.37 (+http://ulm191.server4you.de/crawler/)",
- "WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)",
- "WebSearchBench WebCrawler v0.1(Experimental)",
- "WebsiteWorth v1.0",
- "Webspinne/1.0 webmaster@webspinne.de",
- "Websquash.com (Add url robot)",
- "WebStat/1.0 (Unix; beta; 20040314)",
- "Webster v0.3 ( http://webster.healeys.net/ )",
- "WebVac (webmaster@pita.stanford.edu)",
- "Webverzeichnis.de - Telefon: 01908 / 26005",
- "WebVulnCrawl.unknown/1.0 libwww-perl/5.803",
- "Wells Search II",
- "WEP Search 00",
- "WFARC",
- "whatUseek_winona/3.0",
- "WhizBang! Lab",
- "Willow Internet Crawler by Twotrees V2.1",
- "WinHTTP Example/1.0",
- "WinkBot/0.06 (Wink.com search engine web crawler; http://www.wink.com/Wink:WinkBot; winkbot@wink.com)",
- "WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,aromano@cli.di.unipi.it)",
- "WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)",
- "WISEbot/1.0 (WISEbot@koreawisenut.com; http://wisebot.koreawisenut.com)",
- "worio heritrix bot (+http://worio.com/)",
- "woriobot ( http://www.worio.com/)",
- "WorldLight",
- "Wotbox/alpha0.6 (bot@wotbox.com; http://www.wotbox.com)",
- "Wotbox/alpha0.x.x (bot@wotbox.com; http://www.wotbox.com) Java/1.4.1_02",
- "WSB WebCrawler V1.0 (Beta)- cl@cs.uni-dortmund.de",
- "WSB, http://websearchbench.cs.uni-dortmund.de",
- "wume_crawler/1.1 (http://wume.cse.lehigh.edu/~xiq204/crawler/)",
- "Wwlib/Linux",
- "www.arianna.it",
- "WWWeasel Robot v1.00 (http://wwweasel.de)",
- "wwwster/1.x (Beta- mailto:gue@cis.uni-muenchen.de)",
- "X-Crawler ",
- "xirq/0.1-beta (xirq; http://www.xirq.com; xirq@xirq.com)",
- "xyro_(xcrawler@cosmos.inria.fr)",
- "Y!J-BSC/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
- "Y!J-SRD/1.0",
- "Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
- "yacy (www.yacy.net; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)",
- "yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de) yacy.net",
- "Yahoo Pipes 1.0",
- "Yahoo! Mindset",
- "Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
- "Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)",
- "Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash inc.com ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)",
- "Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)",
- "Yahoo-Test/4.0",
- "Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler",
- "YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://publisher.yahoo.com/rssguide)",
- "YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)",
- "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
- "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/yahooseeker.html)",
- "YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
- "YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
- "YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ;cafekelsa-dev-webmaster@yahoo-inc.com )",
- "Yandex/1.01.001 (compatible; Win16; I)",
- "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)",
- "yarienavoir.net/0.2",
- "Yeti",
- "Yeti/0.01 (nhn/1noon, yetibot@naver.com, check robots.txt daily and follows it)",
- "Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)",
- "yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)",
- "YodaoBot/1.0 (http://www.yodao.com/help/webmaster/spider/; )",
- "yoofind/yoofind-0.1-dev (yoono webcrawler; http://www.yoono.com ; MyEmail)",
- "yoogliFetchAgent/0.1",
- "yoono/1.0 web-crawler/1.0",
- "YottaCars_Bot/4.12 (+http://www.yottacars.com) Car Search Engine ",
- "YottaShopping_Bot/4.12 (+http://www.yottashopping.com) Shopping Search Engine",
- "Zao-Crawler",
- "Zao-Crawler 0.2b",
- "Zao/0.1 (http://www.kototoi.org/zao/)",
- "ZBot/1.00 (icaulfield@zeus.com)",
- "Zearchit",
- "ZeBot_lseek.net (bot@ze.bz)",
- "ZeBot_www.ze.bz (ze.bz@hotmail.com)",
- "zedzo.digest/0.1 (http://www.zedzo.com/)",
- "zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+http://www.powerset.com) [email:crawl@powerset.com,email:paul@page-store.com]",
- "zerxbot/Version 0.6 libwww-perl/5.79",
- "Zeus ThemeSite Viewer Webster Pro V2.9 Win32",
- "Zeus xxxxx Webster Pro V2.9 Win32",
- "Zeusbot/0.07 (Ulysseek's web-crawling robot; http://www.zeusbot.com; agent@zeusbot.com)",
- "ZipppBot/0.xx (ZipppBot; http://www.zippp.net; webmaster@zippp.net)",
- "ZIPPPCVS/0.xx (ZipppBot/.xx;http://www.zippp.net; webmaster@zippp.net)",
- "Zippy v2.0 - Zippyfinder.com",
- "ZoomSpider - wrensoft.com",
- "zspider/0.9-dev http://feedback.redkolibri.com/",
- "ZyBorg/1.0 (ZyBorg@WISEnut.com; http://www.WISEnut.com)"]
+ file = File.open(File.join(File.dirname(__FILE__), '../../../lib/botlist/') + 'crawler-user-agents.json')
+ my_hash = JSON.parse(file.read)
+ list = []
+ my_hash.each do |agent|
+ list << agent['instances']
+ end
+ LIST = list.flatten
diff --git a/lib/botlist/crawler-user-agents.json b/lib/botlist/crawler-user-agents.json
new file mode 100644
index 00000000..7c675e90
--- /dev/null
+++ b/lib/botlist/crawler-user-agents.json
@@ -0,0 +1,3723 @@
+ {
+ "pattern": "Googlebot\\/",
+ "url": "http://www.google.com/bot.html",
+ "instances": [
+ "Googlebot/2.1 (+http://www.google.com/bot.html)",
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36",
+ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview Analytics) Chrome/27.0.1453 Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Googlebot-Mobile",
+ "instances": [
+ "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)",
+ "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)",
+ "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/ (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Googlebot-Image",
+ "instances": [
+ "Googlebot-Image/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "Googlebot-News",
+ "instances": [
+ "Googlebot-News"
+ ]
+ }
+ ,
+ {
+ "pattern": "Googlebot-Video",
+ "instances": [
+ "Googlebot-Video/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "AdsBot-Google([^-]|$)",
+ "url": "https://support.google.com/webmasters/answer/1061943?hl=en",
+ "instances": [
+ "AdsBot-Google (+http://www.google.com/adsbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "AdsBot-Google-Mobile",
+ "addition_date": "2017/08/21",
+ "url": "https://support.google.com/adwords/answer/2404197",
+ "instances": [
+ "AdsBot-Google-Mobile-Apps",
+ "Mozilla/5.0 (Linux; Android 5.0; SM-G920A) AppleWebKit (KHTML, like Gecko) Chrome Mobile Safari (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Feedfetcher-Google",
+ "addition_date": "2018/06/27",
+ "url": "https://support.google.com/webmasters/answer/178852",
+ "instances": [
+ "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers; feed-id=728742641706423)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Mediapartners-Google",
+ "url": "https://support.google.com/webmasters/answer/1061943?hl=en",
+ "instances": [
+ "Mediapartners-Google",
+ "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)",
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 10_0 like Mac OS X; en-us) AppleWebKit/602.1.38 (KHTML, like Gecko) Version/10.0 Mobile/14A5297c Safari/602.1 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)",
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Mediapartners \\(Googlebot\\)",
+ "addition_date": "2017/08/08",
+ "url": "https://support.google.com/webmasters/answer/1061943?hl=en",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "APIs-Google",
+ "addition_date": "2017/08/08",
+ "url": "https://support.google.com/webmasters/answer/1061943?hl=en",
+ "instances": [
+ "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "bingbot",
+ "url": "http://www.bing.com/bingbot.htm",
+ "instances": [
+ "Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm",
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) SitemapProbe",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
+ "Mozilla/5.0 (seoanalyzer; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Slurp",
+ "url": "http://help.yahoo.com/help/us/ysearch/slurp",
+ "instances": [
+ "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)",
+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
+ "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "[wW]get",
+ "instances": [
+ "WGETbot/1.0 (+http://wget.alanreed.org)",
+ "Wget/1.14 (linux-gnu)"
+ ]
+ }
+ ,
+ {
+ "pattern": "curl",
+ "instances": [
+ "eCairn-Grabber/1.0 (+http://ecairn.com/grabber) curl/7.15"
+ ]
+ }
+ ,
+ {
+ "pattern": "LinkedInBot",
+ "instances": [
+ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)",
+ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Python-urllib",
+ "instances": [
+ "Python-urllib/2.5",
+ "Python-urllib/2.6",
+ "Python-urllib/2.7",
+ "Python-urllib/3.1",
+ "Python-urllib/3.2",
+ "Python-urllib/3.3",
+ "Python-urllib/3.4",
+ "Python-urllib/3.5",
+ "Python-urllib/3.6"
+ ]
+ }
+ ,
+ {
+ "pattern": "python-requests",
+ "addition_date": "2018/05/27",
+ "instances": [
+ "python-requests/2.18.4"
+ ]
+ }
+ ,
+ {
+ "pattern": "libwww",
+ "instances": [
+ "2Bone_LinkChecker/1.0 libwww-perl/6.03",
+ "2Bone_LinkChkr/1.0 libwww-perl/6.03",
+ "W3C-checklink/2.90 libwww-perl/5.64",
+ "W3C-checklink/ libwww-perl/5.64",
+ "W3C-checklink/4.2 [4.20] libwww-perl/5.803",
+ "W3C-checklink/4.2.1 [4.21] libwww-perl/5.803",
+ "W3C-checklink/4.3 [4.42] libwww-perl/5.805",
+ "W3C-checklink/4.3 [4.42] libwww-perl/5.808",
+ "W3C-checklink/4.3 [4.42] libwww-perl/5.820",
+ "W3C-checklink/4.5 [4.154] libwww-perl/5.823",
+ "W3C-checklink/4.5 [4.160] libwww-perl/5.823",
+ "amibot - http://www.amidalla.de - tech@amidalla.com libwww-perl/5.831"
+ ]
+ }
+ ,
+ {
+ "pattern": "httpunit",
+ "instances": [
+ "httpunit/1.x"
+ ]
+ }
+ ,
+ {
+ "pattern": "nutch",
+ "instances": [
+ "NutchCVS/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
+ "istellabot-nutch/Nutch-1.10"
+ ]
+ }
+ ,
+ {
+ "pattern": "Go-http-client",
+ "addition_date": "2016/03/26",
+ "url": "https://golang.org/pkg/net/http/",
+ "instances": [
+ "Go-http-client/1.1"
+ ]
+ }
+ ,
+ {
+ "pattern": "phpcrawl",
+ "addition_date": "2012-09/17",
+ "url": "http://phpcrawl.cuab.de/",
+ "instances": [
+ "phpcrawl"
+ ]
+ }
+ ,
+ {
+ "pattern": "msnbot",
+ "url": "http://search.msn.com/msnbot.htm",
+ "instances": [
+ "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)",
+ "adidxbot/2.0 (+http://search.msn.com/msnbot.htm)",
+ "librabot/1.0 (+http://search.msn.com/msnbot.htm)",
+ "librabot/2.0 (+http://search.msn.com/msnbot.htm)",
+ "msnbot-NewsBlogs/2.0b (+http://search.msn.com/msnbot.htm)",
+ "msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)",
+ "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)",
+ "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)",
+ "msnbot-media/2.0b (+http://search.msn.com/msnbot.htm)",
+ "msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
+ "msnbot/1.1 (+http://search.msn.com/msnbot.htm)",
+ "msnbot/2.0b (+http://search.msn.com/msnbot.htm)",
+ "msnbot/2.0b (+http://search.msn.com/msnbot.htm).",
+ "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._"
+ ]
+ }
+ ,
+ {
+ "pattern": "jyxobot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "FAST-WebCrawler",
+ "instances": [
+ "FAST-WebCrawler/3.6/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.8"
+ ]
+ }
+ ,
+ {
+ "pattern": "FAST Enterprise Crawler",
+ "instances": [
+ "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/",
+ "FAST Enterprise Crawler 6 used by Schibsted (webcrawl@schibstedsok.no)"
+ ]
+ }
+ ,
+ {
+ "pattern": "BIGLOTRON",
+ "instances": [
+ "BIGLOTRON (Beta 2;GNU/Linux)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Teoma",
+ "instances": [
+ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)",
+ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml)"
+ ],
+ "url": "http://about.ask.com/en/docs/about/webmasters.shtml"
+ }
+ ,
+ {
+ "pattern": "convera",
+ "instances": [
+ "ConveraCrawler/0.9e (+http://ews.converasearch.com/crawl.htm)"
+ ],
+ "url": "http://ews.converasearch.com/crawl.htm"
+ }
+ ,
+ {
+ "pattern": "seekbot",
+ "instances": [
+ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2"
+ ],
+ "url": "http://www.seekbot.net/bot.html"
+ }
+ ,
+ {
+ "pattern": "Gigabot",
+ "instances": [
+ "Gigabot/1.0",
+ "Gigabot/2.0 (http://www.gigablast.com/spider.html)"
+ ],
+ "url": "http://www.gigablast.com/spider.html"
+ }
+ ,
+ {
+ "pattern": "Gigablast",
+ "instances": [
+ "GigablastOpenSource/1.0"
+ ],
+ "url": "https://github.com/gigablast/open-source-search-engine"
+ }
+ ,
+ {
+ "pattern": "exabot",
+ "instances": [
+ "Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)",
+ "Mozilla/5.0 (compatible; Exabot PyExalead/3.0; +http://www.exabot.com/go/robot)",
+ "Mozilla/5.0 (compatible; Exabot-Images/3.0; +http://www.exabot.com/go/robot)",
+ "Mozilla/5.0 (compatible; Exabot/3.0 (BiggerBetter); +http://www.exabot.com/go/robot)",
+ "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)",
+ "Mozilla/5.0 (compatible; Exabot/3.0; http://www.exabot.com/go/robot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "ia_archiver",
+ "instances": [
+ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
+ "ia_archiver-web.archive.org"
+ ]
+ }
+ ,
+ {
+ "pattern": "GingerCrawler",
+ "instances": [
+ "GingerCrawler/1.0 (Language Assistant for Dyslexics; www.gingersoftware.com/crawler_agent.htm; support at ginger software dot com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "webmon ",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "HTTrack",
+ "instances": [
+ "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)"
+ ]
+ }
+ ,
+ {
+ "pattern": "grub.org",
+ "instances": [
+ "Mozilla/4.0 (compatible; grub-client-0.3.0; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.0.4; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.0.5; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.0.7; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.1.1; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.2.1; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.3.1; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.3.7; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.4.3; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-1.5.3; Crawl your own stuff with http://grub.org)"
+ ]
+ }
+ ,
+ {
+ "pattern": "UsineNouvelleCrawler",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "antibot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "netresearchserver",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "speedy",
+ "instances": [
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider for SpeedyAds (http://www.entireweb.com/about/search_tech/speedy_spider/)",
+ "Mozilla/5.0 (compatible; Speedy Spider; http://www.entireweb.com/about/search_tech/speedy_spider/)",
+ "Speedy Spider (Entireweb; Beta/1.2; http://www.entireweb.com/about/search_tech/speedyspider/)",
+ "Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "fluffy",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "bibnum.bnf",
+ "instances": [
+ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "findlink",
+ "instances": [
+ "findlinks/1.0 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.3-beta8 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.3-beta9 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.5-beta7 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/; YaCy 0.1; yacy.net)",
+ "findlinks/1.1.6-beta2 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta3 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta5 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/1.1.6-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0.1 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0.2 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0.4 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0.5 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.0.9 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.1 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.1.3 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.1.5 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.2 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.5 (+http://wortschatz.uni-leipzig.de/findlinks/)",
+ "findlinks/2.6 (+http://wortschatz.uni-leipzig.de/findlinks/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "msrbot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "panscient",
+ "instances": [
+ "panscient.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "yacybot",
+ "instances": [
+ "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html",
+ "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html",
+ "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html"
+ ]
+ }
+ ,
+ {
+ "pattern": "AISearchBot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "ips-agent",
+ "instances": [
+ "BlackBerry9000/ Profile/MIDP-2.0 Configuration/CLDC-1.1 VendorID/102 ips-agent",
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12; ips-agent) Gecko/20050922 Fedora/1.0.7-1.1.fc4 Firefox/1.0.7",
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3",
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:; ips-agent) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24",
+ "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1"
+ ]
+ }
+ ,
+ {
+ "pattern": "tagoobot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "MJ12bot",
+ "instances": [
+ "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.2.5; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.3.0; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.3.1; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.3.2; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.3.3; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.0; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.1; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.2; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.3; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.4 (domain ownership verifier); http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.6; http://mj12bot.com/)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)",
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "woriobot",
+ "instances": [
+ "Mozilla/5.0 (compatible; woriobot +http://worio.com)",
+ "Mozilla/5.0 (compatible; woriobot support [at] zite [dot] com +http://zite.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "yanga",
+ "instances": [
+ "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "buzzbot",
+ "instances": [
+ "Buzzbot/1.0 (Buzzbot; http://www.buzzstream.com; buzzbot@buzzstream.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "mlbot",
+ "instances": [
+ "MLBot (www.metadatalabs.com/mlbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "YandexBot",
+ "url": "http://yandex.com/bots",
+ "instances": [
+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"
+ ],
+ "addition_date": "2015/04/14"
+ }
+ ,
+ {
+ "pattern": "yandex.com\\/bots",
+ "url": "https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml#robot-in-logs",
+ "instances": [
+ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)"
+ ],
+ "addition_date": "2016/12/01"
+ }
+ ,
+ {
+ "pattern": "purebot",
+ "addition_date": "2010/01/19",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "Linguee Bot",
+ "addition_date": "2010/01/26",
+ "url": "http://www.linguee.com/bot",
+ "instances": [
+ "Linguee Bot (http://www.linguee.com/bot)",
+ "Linguee Bot (http://www.linguee.com/bot; bot@linguee.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "CyberPatrol",
+ "addition_date": "2010/02/11",
+ "url": "http://www.cyberpatrol.com/cyberpatrolcrawler.asp",
+ "instances": [
+ "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)"
+ ]
+ }
+ ,
+ {
+ "pattern": "voilabot",
+ "addition_date": "2010/05/18",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 5.1; U; Win64; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
+ "Mozilla/5.0 (compatible; OrangeBot/2.0; support.voilabot@orange.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Baiduspider",
+ "addition_date": "2010/07/15",
+ "url": "http://www.baidu.jp/spider/",
+ "instances": [
+ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "citeseerxbot",
+ "addition_date": "2010/07/17",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "spbot",
+ "addition_date": "2010/07/31",
+ "url": "http://www.seoprofiler.com/bot",
+ "instances": [
+ "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )",
+ "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.1; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.2; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.3; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.7; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.8; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0.9; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.1.0; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.2.0; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.3.0; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.4.0; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.4.1; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/4.4.2; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/5.0.1; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/5.0.2; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/5.0.3; +http://OpenLinkProfiler.org/bot )",
+ "Mozilla/5.0 (compatible; spbot/5.0; +http://OpenLinkProfiler.org/bot )"
+ ]
+ }
+ ,
+ {
+ "pattern": "twengabot",
+ "addition_date": "2010/08/03",
+ "url": "http://www.twenga.com/bot.html",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "postrank",
+ "addition_date": "2010/08/03",
+ "url": "http://www.postrank.com",
+ "instances": [
+ "PostRank/2.0 (postrank.com)",
+ "PostRank/2.0 (postrank.com; 1 subscribers)"
+ ]
+ }
+ ,
+ {
+ "pattern": "turnitinbot",
+ "addition_date": "2010/09/26",
+ "url": "http://www.turnitin.com",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "scribdbot",
+ "addition_date": "2010/09/28",
+ "url": "http://www.scribd.com",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "page2rss",
+ "addition_date": "2010/10/07",
+ "url": "http://www.page2rss.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; Page2RSS/0.7; +http://page2rss.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "sitebot",
+ "addition_date": "2010/12/15",
+ "url": "http://www.sitebot.org",
+ "instances": [
+ "Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)"
+ ]
+ }
+ ,
+ {
+ "pattern": "linkdex",
+ "addition_date": "2011/01/06",
+ "url": "http://www.linkdex.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)",
+ "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)",
+ "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)",
+ "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)",
+ "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)",
+ "linkdex.com/v2.0",
+ "linkdexbot/Nutch-1.0-dev (http://www.linkdex.com/; crawl at linkdex dot com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Adidxbot",
+ "url": "http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "blekkobot",
+ "url": "http://blekko.com/about/blekkobot",
+ "instances": [
+ "Mozilla/5.0 (compatible; Blekkobot; ScoutJet; +http://blekko.com/about/blekkobot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "ezooms",
+ "addition_date": "2011/04/27",
+ "url": "http://www.phpbb.com/community/viewtopic.php?f=64&t=935605&start=450#p12948289",
+ "instances": [
+ "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "dotbot",
+ "addition_date": "2011/04/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)",
+ "dotbot"
+ ]
+ }
+ ,
+ {
+ "pattern": "Mail.RU_Bot",
+ "addition_date": "2011/04/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/",
+ "Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/"
+ ]
+ }
+ ,
+ {
+ "pattern": "discobot",
+ "addition_date": "2011/05/03",
+ "url": "http://discoveryengine.com/discobot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; discobot/1.0; +http://discoveryengine.com/discobot.html)",
+ "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)",
+ "mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "heritrix",
+ "addition_date": "2011/06/21",
+ "url": "http://crawler.archive.org/",
+ "instances": [
+ "Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)",
+ "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.webarchiv.cz)",
+ "Mozilla/5.0 (compatible; heritrix/1.12.1b +http://netarkivet.dk/website/info.html)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://rjpower.org)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://www.webarchiv.cz)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://archive.org)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.accelobot.com)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.webarchiv.cz)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.3.r6601 +http://www.buddybuzz.net/yptrino)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://parsijoo.ir)",
+ "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://www.exif-search.com)",
+ "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://aihit.com)",
+ "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://seekda.com)",
+ "Mozilla/5.0 (compatible; heritrix/3.0.0-SNAPSHOT-20091120.021634 +http://crawler.archive.org)",
+ "Mozilla/5.0 (compatible; heritrix/3.1.0-RC1 +http://boston.lti.cs.cmu.edu/crawler_12/)",
+ "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://places.tomtom.com/crawlerinfo)",
+ "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://www.mixdata.com)",
+ "Mozilla/5.0 (compatible; heritrix/3.1.1-SNAPSHOT-20120116.200628 +http://www.archive.org/details/archive.org_bot)",
+ "Mozilla/5.0 (compatible; heritrix/3.1.1; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)",
+ "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.crim.ca)",
+ "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.exif-search.com)",
+ "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.mixdata.com)",
+ "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)",
+ "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)",
+ "Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "findthatfile",
+ "addition_date": "2011/06/21",
+ "url": "http://www.findthatfile.com/",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "europarchive.org",
+ "addition_date": "2011/06/21",
+ "url": "",
+ "instances": [
+ "Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)"
+ ]
+ }
+ ,
+ {
+ "pattern": "NerdByNature.Bot",
+ "addition_date": "2011/07/12",
+ "url": "http://www.nerdbynature.net/bot",
+ "instances": [
+ "Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "sistrix crawler",
+ "addition_date": "2011/08/02",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "Ahrefs(Bot|SiteAudit)",
+ "addition_date": "2011/08/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; AhrefsBot/5.2; News; +http://ahrefs.com/robot/)",
+ "Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)",
+ "Mozilla/5.0 (compatible; AhrefsSiteAudit/5.2; +http://ahrefs.com/robot/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "fuelbot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "fuelbot"
+ ]
+ }
+ ,
+ {
+ "pattern": "CrunchBot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "CrunchBot/1.0 (+http://www.leadcrunch.com/crunchbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "centurybot9",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; Go-http-client/1.1; +centurybot9@gmail.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "IndeedBot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0 (IndeedBot 1.1)"
+ ]
+ }
+ ,
+ {
+ "pattern": "mappydata",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; Mappy/1.0; +http://mappydata.net/bot/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "woobot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "woobot"
+ ]
+ }
+ ,
+ {
+ "pattern": "ZoominfoBot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "ZoominfoBot (zoominfobot at zoominfo dot com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "PrivacyAwareBot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; PrivacyAwareBot/1.1; +http://www.privacyaware.org)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Multiviewbot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Multiviewbot"
+ ]
+ }
+ ,
+ {
+ "pattern": "SWIMGBot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36 SWIMGBot"
+ ]
+ }
+ ,
+ {
+ "pattern": "Grobbot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; Grobbot/2.2; +https://grob.it)"
+ ]
+ }
+ ,
+ {
+ "pattern": "eright",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; eright/1.0; +bot@eright.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Apercite",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; Apercite; +http://www.apercite.fr/robot/index.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "semanticbot",
+ "addition_date": "2018/06/28",
+ "instances": [
+ "semanticbot",
+ "semanticbot (info@semanticaudience.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Aboundex",
+ "addition_date": "2011/09/28",
+ "url": "http://www.aboundex.com/crawler/",
+ "instances": [
+ "Aboundex/0.2 (http://www.aboundex.com/crawler/)",
+ "Aboundex/0.3 (http://www.aboundex.com/crawler/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "domaincrawler",
+ "addition_date": "2011/10/21",
+ "instances": [
+ "CipaCrawler/3.0 (info@domaincrawler.com; http://www.domaincrawler.com/www.example.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "wbsearchbot",
+ "addition_date": "2011/12/21",
+ "url": "http://www.warebay.com/bot.html",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "summify",
+ "addition_date": "2012/01/04",
+ "url": "http://summify.com",
+ "instances": [
+ "Summify (Summify/1.0.1; +http://summify.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "CCBot",
+ "addition_date": "2012/02/05",
+ "url": "http://www.commoncrawl.org/bot.html",
+ "instances": [
+ "CCBot/2.0 (http://commoncrawl.org/faq/)",
+ "CCBot/2.0 (https://commoncrawl.org/faq/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "edisterbot",
+ "addition_date": "2012/02/25",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "seznambot",
+ "addition_date": "2012/03/14",
+ "instances": [
+ "Mozilla/5.0 (compatible; SeznamBot/3.2-test1-1; +http://napoveda.seznam.cz/en/seznambot-intro/)",
+ "Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://napoveda.seznam.cz/en/seznambot-intro/)",
+ "Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://napoveda.seznam.cz/en/seznambot-intro/)",
+ "Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://napoveda.seznam.cz/en/seznambot-intro/)",
+ "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "ec2linkfinder",
+ "addition_date": "2012/03/22",
+ "instances": [
+ "ec2linkfinder"
+ ]
+ }
+ ,
+ {
+ "pattern": "gslfbot",
+ "addition_date": "2012/04/03",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "aiHitBot",
+ "addition_date": "2012/04/16",
+ "instances": [
+ "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)"
+ ]
+ }
+ ,
+ {
+ "pattern": "intelium_bot",
+ "addition_date": "2012/05/07",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "facebookexternalhit",
+ "addition_date": "2012/05/07",
+ "instances": [
+ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)",
+ "facebookexternalhit/1.1",
+ "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Yeti",
+ "addition_date": "2012/05/07",
+ "url": "http://naver.me/bot",
+ "instances": [
+ "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "RetrevoPageAnalyzer",
+ "addition_date": "2012/05/07",
+ "instances": [
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; RetrevoPageAnalyzer; +http://www.retrevo.com/content/about-us)"
+ ]
+ }
+ ,
+ {
+ "pattern": "lb-spider",
+ "addition_date": "2012/05/07",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "Sogou",
+ "addition_date": "2012/05/13",
+ "url": "http://www.sogou.com/docs/help/webmasters.htm#07",
+ "instances": [
+ "Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+ "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+ "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
+ ]
+ }
+ ,
+ {
+ "pattern": "lssbot",
+ "addition_date": "2012/05/15",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "careerbot",
+ "addition_date": "2012/05/23",
+ "url": "http://www.career-x.de/bot.html",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "wotbox",
+ "addition_date": "2012/06/12",
+ "url": "http://www.wotbox.com",
+ "instances": [
+ "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)",
+ "Wotbox/2.01 (+http://www.wotbox.com/bot/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "wocbot",
+ "addition_date": "2012/07/25",
+ "url": "http://www.wocodi.com/crawler",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "ichiro",
+ "addition_date": "2012/08/28",
+ "url": "http://help.goo.ne.jp/help/article/1142",
+ "instances": [
+ "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)",
+ "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)",
+ "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)",
+ "DoCoMo/2.0 P900i(c100;TB;W24H11)(compatible; ichiro/mobile goo;+http://help.goo.ne.jp/door/crawler.html)",
+ "DoCoMo/2.0 P901i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/door/crawler.html)",
+ "KDDI-CA31 UP.Browser/ (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)",
+ "KDDI-CA31 UP.Browser/ (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)",
+ "KDDI-CA31 UP.Browser/ (GUI) MMP/2.0 (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)",
+ "ichiro/2.0 (http://help.goo.ne.jp/door/crawler.html)",
+ "ichiro/2.0 (ichiro@nttr.co.jp)",
+ "ichiro/3.0 (http://help.goo.ne.jp/door/crawler.html)",
+ "ichiro/3.0 (http://help.goo.ne.jp/help/article/1142)",
+ "ichiro/3.0 (http://search.goo.ne.jp/option/use/sub4/sub4-1/)",
+ "ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html)",
+ "ichiro/5.0 (http://help.goo.ne.jp/door/crawler.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "DuckDuckBot",
+ "addition_date": "2012/09/19",
+ "url": "http://duckduckgo.com/duckduckbot.html",
+ "instances": [
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
+ "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "lssrocketcrawler",
+ "addition_date": "2012/09/24",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "drupact",
+ "addition_date": "2012/09/27",
+ "url": "http://www.arocom.de/drupact",
+ "instances": [
+ "drupact/0.7; http://www.arocom.de/drupact"
+ ]
+ }
+ ,
+ {
+ "pattern": "webcompanycrawler",
+ "addition_date": "2012/10/03",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "acoonbot",
+ "addition_date": "2012/10/07",
+ "url": "http://www.acoon.de/robot.asp",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "openindexspider",
+ "addition_date": "2012/10/26",
+ "url": "http://www.openindex.io/en/webmasters/spider.html",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "gnam gnam spider",
+ "addition_date": "2012/10/31",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "web-archive-net.com.bot",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "backlinkcrawler",
+ "addition_date": "2013/01/04",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "coccoc",
+ "addition_date": "2013/01/04",
+ "url": "http://help.coccoc.vn/",
+ "instances": [
+ "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)",
+ "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)",
+ "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)",
+ "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)",
+ "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)",
+ "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)",
+ "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)",
+ "coccoc",
+ "coccoc/1.0 ()",
+ "coccoc/1.0 (http://help.coccoc.com/)",
+ "coccoc/1.0 (http://help.coccoc.vn/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "integromedb",
+ "addition_date": "2013/01/10",
+ "url": "http://www.integromedb.org/Crawler",
+ "instances": [
+ "www.integromedb.org/Crawler"
+ ]
+ }
+ ,
+ {
+ "pattern": "content crawler spider",
+ "addition_date": "2013/01/11",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "toplistbot",
+ "addition_date": "2013/02/05",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "it2media-domain-crawler",
+ "addition_date": "2013/03/12",
+ "instances": [
+ "it2media-domain-crawler/1.0 on crawler-prod.it2media.de",
+ "it2media-domain-crawler/2.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "ip-web-crawler.com",
+ "addition_date": "2013/03/22",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "siteexplorer.info",
+ "addition_date": "2013/05/01",
+ "instances": [
+ "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +http://siteexplorer.info/)",
+ "Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "elisabot",
+ "addition_date": "2013/06/27",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "proximic",
+ "addition_date": "2013/09/12",
+ "url": "http://www.proximic.com/info/spider.php",
+ "instances": [
+ "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com)",
+ "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)"
+ ]
+ }
+ ,
+ {
+ "pattern": "changedetection",
+ "addition_date": "2013/09/13",
+ "url": "http://www.changedetection.com/bot.html",
+ "instances": [
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )"
+ ]
+ }
+ ,
+ {
+ "pattern": "arabot",
+ "addition_date": "2013/10/09",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "WeSEE:Search",
+ "addition_date": "2013/11/18",
+ "instances": [
+ "WeSEE:Search",
+ "WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "niki-bot",
+ "addition_date": "2014/01/01",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "CrystalSemanticsBot",
+ "addition_date": "2014/02/17",
+ "url": "http://www.crystalsemantics.com/user-agent/",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "rogerbot",
+ "addition_date": "2014/02/28",
+ "url": "http://moz.com/help/pro/what-is-rogerbot-",
+ "instances": [
+ "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)",
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+partager@moz.com)",
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+shiny@moz.com)",
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com",
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)",
+ "rogerbot/1.0 (http://www.moz.com/dp/rogerbot, rogerbot-crawler@moz.com)",
+ "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler+shiny@seomoz.org)",
+ "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler@seomoz.org)",
+ "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-wherecat@moz.com)",
+ "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr2-crawler-05@moz.com)",
+ "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-11@moz.com)",
+ "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-15@moz.com)",
+ "rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "360Spider",
+ "addition_date": "2014/03/14",
+ "url": "http://needs-be.blogspot.co.uk/2013/02/how-to-block-spider360.html",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider",
+ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)",
+ "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 QIHU 360SE; 360Spider",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; ) Firefox/; 360Spider",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv: Firefox/; 360Spider",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv: Firefox/ 360Spider;",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv: Gecko/20070312 Firefox/; 360Spider",
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider",
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "psbot",
+ "addition_date": "2014/03/31",
+ "url": "http://www.picsearch.com/bot.html",
+ "instances": [
+ "psbot-image (+http://www.picsearch.com/bot.html)",
+ "psbot-page (+http://www.picsearch.com/bot.html)",
+ "psbot/0.1 (+http://www.picsearch.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "InterfaxScanBot",
+ "addition_date": "2014/03/31",
+ "url": "http://scan-interfax.ru",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "CC Metadata Scaper",
+ "addition_date": "2014/04/01",
+ "url": "http://wiki.creativecommons.org/Metadata_Scraper",
+ "instances": [
+ "CC Metadata Scaper http://wiki.creativecommons.org/Metadata_Scraper"
+ ]
+ }
+ ,
+ {
+ "pattern": "g00g1e.net",
+ "addition_date": "2014/04/01",
+ "url": "http://www.g00g1e.net/",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "GrapeshotCrawler",
+ "addition_date": "2014/04/01",
+ "url": "http://www.grapeshot.co.uk/crawler.php",
+ "instances": [
+ "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)"
+ ]
+ }
+ ,
+ {
+ "pattern": "urlappendbot",
+ "addition_date": "2014/05/10",
+ "url": "http://www.profound.net/urlappendbot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "brainobot",
+ "addition_date": "2014/06/24",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "fr-crawler",
+ "addition_date": "2014/07/31",
+ "instances": [
+ "Mozilla/5.0 (compatible; fr-crawler/1.1)"
+ ]
+ }
+ ,
+ {
+ "pattern": "binlar",
+ "addition_date": "2014/09/12",
+ "instances": [
+ "binlar_2.6.3 binlar2.6.3@unspecified.mail",
+ "binlar_2.6.3 binlar_2.6.3@unspecified.mail",
+ "binlar_2.6.3 larbin2.6.3@unspecified.mail",
+ "binlar_2.6.3 phanendra_kalapala@McAfee.com",
+ "binlar_2.6.3 test@mgmt.mic"
+ ]
+ }
+ ,
+ {
+ "pattern": "SimpleCrawler",
+ "addition_date": "2014/09/12",
+ "instances": [
+ "SimpleCrawler/0.1"
+ ]
+ }
+ ,
+ {
+ "pattern": "Twitterbot",
+ "addition_date": "2014/09/12",
+ "url": "https://dev.twitter.com/cards/getting-started",
+ "instances": [
+ "Twitterbot/0.1",
+ "Twitterbot/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "cXensebot",
+ "addition_date": "2014/10/05",
+ "instances": [
+ "cXensebot/1.1a"
+ ],
+ "url": "http://www.cxense.com/bot.html"
+ }
+ ,
+ {
+ "pattern": "smtbot",
+ "addition_date": "2014/10/04",
+ "instances": [
+ "Mozilla/5.0 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)",
+ "SMTBot (similartech.com/smtbot)"
+ ],
+ "url": "http://www.similartech.com/smtbot"
+ }
+ ,
+ {
+ "pattern": "bnf.fr_bot",
+ "addition_date": "2014/11/18",
+ "url": "http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "A6-Indexer",
+ "addition_date": "2014/12/05",
+ "url": "http://www.a6corp.com/a6-web-scraping-policy/",
+ "instances": [
+ "A6-Indexer"
+ ]
+ }
+ ,
+ {
+ "pattern": "ADmantX",
+ "addition_date": "2014/12/05",
+ "url": "http://www.admantx.com",
+ "instances": [
+ "ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "Facebot",
+ "url": "https://developers.facebook.com/docs/sharing/best-practices#crawl",
+ "addition_date": "2014/12/30",
+ "instances": [
+ "Facebot/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "OrangeBot\\/",
+ "instances": [
+ "Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com"
+ ],
+ "addition_date": "2015/01/12"
+ }
+ ,
+ {
+ "pattern": "memorybot",
+ "url": "http://mignify.com/bot.htm",
+ "instances": [
+ "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)"
+ ],
+ "addition_date": "2015/02/01"
+ }
+ ,
+ {
+ "pattern": "AdvBot",
+ "url": "http://advbot.net/bot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; AdvBot/2.0; +http://advbot.net/bot.html)"
+ ],
+ "addition_date": "2015/02/01"
+ }
+ ,
+ {
+ "pattern": "MegaIndex",
+ "url": "https://www.megaindex.ru/?tab=linkAnalyze",
+ "instances": [
+ "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +https://www.megaindex.ru/?tab=linkAnalyze)"
+ ],
+ "addition_date": "2015/03/28"
+ }
+ ,
+ {
+ "pattern": "SemanticScholarBot",
+ "url": "http://s2.allenai.org/bot.html",
+ "instances": [
+ "SemanticScholarBot/1.0 (+http://s2.allenai.org/bot.html)"
+ ],
+ "addition_date": "2015/03/28"
+ }
+ ,
+ {
+ "pattern": "ltx71",
+ "url": "http://ltx71.com/",
+ "instances": [
+ "ltx71 - (http://ltx71.com/)"
+ ],
+ "addition_date": "2015/04/04"
+ }
+ ,
+ {
+ "pattern": "nerdybot",
+ "url": "http://nerdybot.com/",
+ "instances": [
+ "nerdybot"
+ ],
+ "addition_date": "2015/04/05"
+ }
+ ,
+ {
+ "pattern": "xovibot",
+ "url": "http://www.xovibot.net/",
+ "instances": [
+ "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)"
+ ],
+ "addition_date": "2015/04/05"
+ }
+ ,
+ {
+ "pattern": "BUbiNG",
+ "url": "http://law.di.unimi.it/BUbiNG.html",
+ "instances": [
+ "BUbiNG (+http://law.di.unimi.it/BUbiNG.html)"
+ ],
+ "addition_date": "2015/04/06"
+ }
+ ,
+ {
+ "pattern": "Qwantify",
+ "url": "https://www.qwant.com/",
+ "instances": [
+ "Mozilla/5.0 (compatible; Qwantify/2.0n; +https://www.qwant.com/)/*",
+ "Mozilla/5.0 (compatible; Qwantify/2.4w; +https://www.qwant.com/)/2.4w"
+ ],
+ "addition_date": "2015/04/06"
+ }
+ ,
+ {
+ "pattern": "archive.org_bot",
+ "url": "http://www.archive.org/details/archive.org_bot",
+ "instances": [
+ "Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)"
+ ],
+ "addition_date": "2015/04/14"
+ }
+ ,
+ {
+ "pattern": "Applebot",
+ "url": "http://www.apple.com/go/applebot",
+ "addition_date": "2015/04/15",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1)",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)",
+ "Mozilla/5.0 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "TweetmemeBot",
+ "url": "http://datasift.com/bot.html",
+ "instances": [
+ "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0"
+ ],
+ "addition_date": "2015/04/15"
+ }
+ ,
+ {
+ "pattern": "crawler4j",
+ "url": "https://github.com/yasserg/crawler4j",
+ "instances": [
+ "crawler4j (http://code.google.com/p/crawler4j/)"
+ ],
+ "addition_date": "2015/05/07"
+ }
+ ,
+ {
+ "pattern": "findxbot",
+ "url": "http://www.findxbot.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)"
+ ],
+ "addition_date": "2015/05/07"
+ }
+ ,
+ {
+ "pattern": "S[eE][mM]rushBot",
+ "url": "http://www.semrush.com/bot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; SemrushBot/0.98~bl; +http://www.semrush.com/bot.html)",
+ "SEMrushBot"
+ ],
+ "addition_date": "2015/05/26"
+ }
+ ,
+ {
+ "pattern": "yoozBot",
+ "url": "http://yooz.ir",
+ "instances": [
+ "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)"
+ ],
+ "addition_date": "2015/05/26"
+ }
+ ,
+ {
+ "pattern": "lipperhey",
+ "url": "http://www.lipperhey.com/",
+ "instances": [
+ "Mozilla/5.0 (compatible; Lipperhey Link Explorer; http://www.lipperhey.com/)",
+ "Mozilla/5.0 (compatible; Lipperhey SEO Service; http://www.lipperhey.com/)",
+ "Mozilla/5.0 (compatible; Lipperhey Site Explorer; http://www.lipperhey.com/)",
+ "Mozilla/5.0 (compatible; Lipperhey-Kaus-Australis/5.0; +https://www.lipperhey.com/en/about/)"
+ ],
+ "addition_date": "2015/08/26"
+ }
+ ,
+ {
+ "pattern": "Y!J",
+ "url": "https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/%E3%82%A6%E3%82%A7%E3%83%96%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AB%E3%82%A2%E3%82%AF%E3%82%BB%E3%82%B9%E3%81%99%E3%82%8B%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%81%AE%E3%83%A6%E3%83%BC%E3%82%B6%E3%83%BC%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6",
+ "instances": [
+ "Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)",
+ "Y!J-BRJ/YATS crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "Y!J-PSC/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "Y!J-BRW/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "Mozilla/5.0 (iPhone; Y!J-BRY/YATSH crawler; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "Mozilla/5.0 (compatible; Y!J SearchMonkey/1.0 (Y!J-AGENT; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html))"
+ ],
+ "addition_date": "2015/05/26"
+ }
+ ,
+ {
+ "pattern": "Domain Re-Animator Bot",
+ "url": "http://domainreanimator.com",
+ "instances": [
+ "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com"
+ ],
+ "addition_date": "2015/04/14"
+ }
+ ,
+ {
+ "pattern": "AddThis",
+ "url": "https://www.addthis.com",
+ "instances": [
+ "AddThis.com robot tech.support@clearspring.com"
+ ],
+ "addition_date": "2015/06/02"
+ }
+ ,
+ {
+ "pattern": "Screaming Frog SEO Spider",
+ "url": "http://www.screamingfrog.co.uk/seo-spider",
+ "instances": [
+ "Screaming Frog SEO Spider/5.1"
+ ],
+ "addition_date": "2016/01/08"
+ }
+ ,
+ {
+ "pattern": "MetaURI",
+ "url": "http://www.useragentstring.com/MetaURI_id_17683.php",
+ "instances": [
+ "MetaURI API/2.0 +metauri.com"
+ ],
+ "addition_date": "2016/01/02"
+ }
+ ,
+ {
+ "pattern": "Scrapy",
+ "url": "http://scrapy.org/",
+ "instances": [
+ "Scrapy/1.0.3 (+http://scrapy.org)"
+ ],
+ "addition_date": "2016/01/02"
+ }
+ ,
+ {
+ "pattern": "Livelap[bB]ot",
+ "url": "http://site.livelap.com/crawler",
+ "instances": [
+ "LivelapBot/0.2 (http://site.livelap.com/crawler)",
+ "Livelapbot/0.1"
+ ],
+ "addition_date": "2016/01/02"
+ }
+ ,
+ {
+ "pattern": "OpenHoseBot",
+ "url": "http://www.openhose.org/bot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)"
+ ],
+ "addition_date": "2016/01/02"
+ }
+ ,
+ {
+ "pattern": "CapsuleChecker",
+ "url": "http://www.capsulink.com/about",
+ "instances": [
+ "CapsuleChecker (http://www.capsulink.com/)"
+ ],
+ "addition_date": "2016/01/02"
+ }
+ ,
+ {
+ "pattern": "collection@infegy.com",
+ "url": "http://infegy.com/",
+ "instances": [
+ "Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com"
+ ],
+ "addition_date": "2016/01/03"
+ }
+ ,
+ {
+ "pattern": "IstellaBot",
+ "url": "http://www.tiscali.it/",
+ "instances": [
+ "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)"
+ ],
+ "addition_date": "2016/01/09"
+ }
+ ,
+ {
+ "pattern": "DeuSu\\/",
+ "addition_date": "2016/01/23",
+ "url": "https://deusu.de/robot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; DeuSu/0.1.0; +https://deusu.org)",
+ "Mozilla/5.0 (compatible; DeuSu/5.0.2; +https://deusu.de/robot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "betaBot",
+ "addition_date": "2016/01/23",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "Cliqzbot\\/",
+ "addition_date": "2016/01/23",
+ "url": "http://cliqz.com/company/cliqzbot",
+ "instances": [
+ "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)",
+ "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)",
+ "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)",
+ "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "MojeekBot\\/",
+ "addition_date": "2016/01/23",
+ "url": "https://www.mojeek.com/bot.html",
+ "instances": [
+ "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)",
+ "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)",
+ "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)",
+ "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)",
+ "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)",
+ "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "netEstate NE Crawler",
+ "addition_date": "2016/01/23",
+ "url": "+http://www.website-datenbank.de/",
+ "instances": [
+ "netEstate NE Crawler (+http://www.sengine.info/)",
+ "netEstate NE Crawler (+http://www.website-datenbank.de/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "SafeSearch microdata crawler",
+ "addition_date": "2016/01/23",
+ "url": "https://safesearch.avira.com",
+ "instances": [
+ "SafeSearch microdata crawler (https://safesearch.avira.com, safesearch-abuse@avira.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Gluten Free Crawler\\/",
+ "addition_date": "2016/01/23",
+ "url": "http://glutenfreepleasure.com/",
+ "instances": [
+ "Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Sonic",
+ "addition_date": "2016/02/08",
+ "url": "http://www.yama.info.waseda.ac.jp/~crawler/info.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; RankSonicSiteAuditor/1.0; +https://ranksonic.com/ranksonic_sab.html)",
+ "Mozilla/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)",
+ "Mozzila/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Sysomos",
+ "addition_date": "2016/02/08",
+ "url": "http://www.sysomos.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; Sysomos/1.0; +http://www.sysomos.com/; Sysomos)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Trove",
+ "addition_date": "2016/02/08",
+ "url": "http://www.trove.com",
+ "instances": []
+ }
+ ,
+ {
+ "pattern": "deadlinkchecker",
+ "addition_date": "2016/02/08",
+ "url": "http://www.deadlinkchecker.com",
+ "instances": [
+ "www.deadlinkchecker.com Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36",
+ "www.deadlinkchecker.com XMLHTTP/1.0",
+ "www.deadlinkchecker.com XMLHTTP/1.0 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36"
+ ]
+ }
+ ,
+ {
+ "pattern": "Slack-ImgProxy",
+ "addition_date": "2016/04/25",
+ "url": "https://api.slack.com/robots",
+ "instances": [
+ "Slack-ImgProxy (+https://api.slack.com/robots)",
+ "Slack-ImgProxy 0.59 (+https://api.slack.com/robots)",
+ "Slack-ImgProxy 0.66 (+https://api.slack.com/robots)",
+ "Slack-ImgProxy 1.106 (+https://api.slack.com/robots)",
+ "Slack-ImgProxy 1.138 (+https://api.slack.com/robots)",
+ "Slack-ImgProxy 149 (+https://api.slack.com/robots)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Embedly",
+ "addition_date": "2016/04/25",
+ "url": "http://support.embed.ly",
+ "instances": [
+ "Embedly +support@embed.ly",
+ "Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/)",
+ "Mozilla/5.0 (compatible; Embedly/0.2; snap; +http://support.embed.ly/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "RankActiveLinkBot",
+ "addition_date": "2016/06/20",
+ "url": "https://rankactive.com/resources/rankactive-linkbot",
+ "instances": [
+ "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "iskanie",
+ "addition_date": "2016/09/02",
+ "url": "http://www.iskanie.com",
+ "instances": [
+ "iskanie (+http://www.iskanie.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "SafeDNSBot",
+ "addition_date": "2016/09/10",
+ "url": "https://www.safedns.com/searchbot",
+ "instances": [
+ "SafeDNSBot (https://www.safedns.com/searchbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "SkypeUriPreview",
+ "addition_date": "2016/10/10",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5"
+ ]
+ }
+ ,
+ {
+ "pattern": "Veoozbot",
+ "addition_date": "2016/11/03",
+ "url": "http://www.veooz.com/veoozbot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; Veoozbot/1.0; +http://www.veooz.com/veoozbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Slackbot",
+ "addition_date": "2016/11/03",
+ "url": "https://api.slack.com/robots",
+ "instances": [
+ "Slackbot-LinkExpanding (+https://api.slack.com/robots)",
+ "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)"
+ ]
+ }
+ ,
+ {
+ "pattern": "redditbot",
+ "addition_date": "2016/11/03",
+ "url": "http://www.reddit.com/feedback",
+ "instances": [
+ "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)"
+ ]
+ }
+ ,
+ {
+ "pattern": "datagnionbot",
+ "addition_date": "2016/11/03",
+ "url": "http://www.datagnion.com/bot.html",
+ "instances": [
+ "datagnionbot (+http://www.datagnion.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Google-Adwords-Instant",
+ "addition_date": "2016/11/03",
+ "url": "http://www.google.com/adsbot.html",
+ "instances": [
+ "Google-Adwords-Instant (+http://www.google.com/adsbot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "adbeat_bot",
+ "addition_date": "2016/11/04",
+ "instances": [
+ "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)",
+ "adbeat_bot"
+ ]
+ }
+ ,
+ {
+ "pattern": "WhatsApp",
+ "addition_date": "2016/11/15",
+ "url": "https://www.whatsapp.com/",
+ "instances": [
+ "WhatsApp",
+ "WhatsApp/2.12.15/i",
+ "WhatsApp/2.12.16/i",
+ "WhatsApp/2.12.17/i",
+ "WhatsApp/2.12.449 A",
+ "WhatsApp/2.12.453 A",
+ "WhatsApp/2.12.510 A",
+ "WhatsApp/2.12.540 A",
+ "WhatsApp/2.12.548 A",
+ "WhatsApp/2.12.555 A",
+ "WhatsApp/2.12.556 A",
+ "WhatsApp/2.16.1/i",
+ "WhatsApp/2.16.13 A",
+ "WhatsApp/2.16.2/i",
+ "WhatsApp/2.16.42 A",
+ "WhatsApp/2.16.57 A"
+ ]
+ }
+ ,
+ {
+ "pattern": "contxbot",
+ "addition_date": "2017/02/25",
+ "instances": [
+ "Mozilla/5.0 (compatible;contxbot/1.0)"
+ ]
+ }
+ ,
+ {
+ "pattern": "pinterest.com.bot",
+ "addition_date": "2017/03/03",
+ "instances": [
+ "Pinterest/0.2 (+http://www.pinterest.com/bot.html)"
+ ],
+ "url": "http://www.pinterest.com/bot.html"
+ }
+ ,
+ {
+ "pattern": "electricmonk",
+ "addition_date": "2017/03/04",
+ "instances": [
+ "Mozilla/5.0 (compatible; electricmonk/3.2.0 +https://www.duedil.com/our-crawler/)"
+ ],
+ "url": "https://www.duedil.com/our-crawler/"
+ }
+ ,
+ {
+ "pattern": "GarlikCrawler",
+ "addition_date": "2017/03/18",
+ "instances": [
+ "GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)"
+ ],
+ "url": "http://garlik.com/"
+ }
+ ,
+ {
+ "pattern": "BingPreview\\/",
+ "addition_date": "2017/04/23",
+ "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b",
+ "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; BingPreview/1.0b) like Gecko",
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; WOW64; Trident/6.0; BingPreview/1.0b)",
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; WOW64; Trident/5.0; BingPreview/1.0b)",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 BingPreview/1.0b"
+ ]
+ }
+ ,
+ {
+ "pattern": "vebidoobot",
+ "addition_date": "2017/05/08",
+ "instances": [
+ "Mozilla/5.0 (compatible; vebidoobot/1.0; +https://blog.vebidoo.de/vebidoobot/"
+ ],
+ "url": "https://blog.vebidoo.de/vebidoobot/"
+ }
+ ,
+ {
+ "pattern": "FemtosearchBot",
+ "addition_date": "2017/05/16",
+ "instances": [
+ "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)"
+ ],
+ "url": "http://femtosearch.com"
+ }
+ ,
+ {
+ "pattern": "Yahoo Link Preview",
+ "addition_date": "2017/06/28",
+ "instances": [
+ "Mozilla/5.0 (compatible; Yahoo Link Preview; https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)"
+ ],
+ "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html"
+ }
+ ,
+ {
+ "pattern": "MetaJobBot",
+ "addition_date": "2017/08/16",
+ "instances": [
+ "Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler)"
+ ],
+ "url": "http://www.metajob.de/the/crawler"
+ }
+ ,
+ {
+ "pattern": "DomainStatsBot",
+ "addition_date": "2017/08/16",
+ "instances": [
+ "DomainStatsBot/1.0 (http://domainstats.io/our-bot)"
+ ],
+ "url": "http://domainstats.io/our-bot"
+ }
+ ,
+ {
+ "pattern": "mindUpBot",
+ "addition_date": "2017/08/16",
+ "instances": [
+ "mindUpBot (datenbutler.de)"
+ ],
+ "url": "http://www.datenbutler.de/"
+ }
+ ,
+ {
+ "pattern": "Daum\\/",
+ "addition_date": "2017/08/16",
+ "instances": [
+ "Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)"
+ ],
+ "url": "http://cs.daum.net/faq/15/4118.html?faqId=28966"
+ }
+ ,
+ {
+ "pattern": "Jugendschutzprogramm-Crawler",
+ "addition_date": "2017/08/16",
+ "instances": [
+ "Jugendschutzprogramm-Crawler; Info: http://www.jugendschutzprogramm.de"
+ ],
+ "url": "http://www.jugendschutzprogramm.de"
+ }
+ ,
+ {
+ "pattern": "Xenu Link Sleuth",
+ "addition_date": "2017/08/19",
+ "instances": [
+ "Xenu Link Sleuth/1.3.8"
+ ],
+ "url": "http://home.snafu.de/tilman/xenulink.html"
+ }
+ ,
+ {
+ "pattern": "Pcore-HTTP",
+ "addition_date": "2017/08/19",
+ "instances": [
+ "Pcore-HTTP/v0.40.3"
+ ],
+ "url": "https://bitbucket.org/softvisio/pcore/overview"
+ }
+ ,
+ {
+ "pattern": "moatbot",
+ "addition_date": "2017/09/16",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36 moatbot",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4 moatbot"
+ ],
+ "url": "https://moat.com"
+ }
+ ,
+ {
+ "pattern": "KosmioBot",
+ "addition_date": "2017/09/16",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36 (compatible; KosmioBot/1.0; +http://kosm.io/bot.html)"
+ ],
+ "url": "http://kosm.io/bot.html"
+ }
+ ,
+ {
+ "pattern": "pingdom",
+ "addition_date": "2017/09/16",
+ "instances": [
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)",
+ "Mozilla/5.0 (compatible; pingbot/2.0; +http://www.pingdom.com/)"
+ ],
+ "url": "http://www.pingdom.com"
+ }
+ ,
+ {
+ "pattern": "PhantomJS",
+ "addition_date": "2017/09/18",
+ "instances": [
+ "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1 bl.uk_lddc_renderbot/2.0.0 (+ http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)"
+ ],
+ "url": "http://phantomjs.org/"
+ }
+ ,
+ {
+ "pattern": "Gowikibot",
+ "addition_date": "2017/10/26",
+ "instances": [
+ "Mozilla/5.0 (compatible; Gowikibot/1.0; +http://www.gowikibot.com)"
+ ],
+ "url": "http://www.gowikibot.com"
+ }
+ ,
+ {
+ "pattern": "PiplBot",
+ "addition_date": "2017/10/30",
+ "instances": [
+ "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)"
+ ],
+ "url": "http://www.pipl.com/bot/"
+ }
+ ,
+ {
+ "pattern": "Discordbot",
+ "addition_date": "2017/09/22",
+ "url": "https://discordapp.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "TelegramBot",
+ "addition_date": "2017/10/01",
+ "instances": [
+ "TelegramBot (like TwitterBot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Jetslide",
+ "addition_date": "2017/09/27",
+ "url": "http://jetsli.de/crawler",
+ "instances": [
+ "Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)"
+ ]
+ }
+ ,
+ {
+ "pattern": "newsharecounts",
+ "addition_date": "2017/09/30",
+ "url": "http://newsharecounts.com/crawler",
+ "instances": [
+ "Mozilla/5.0 (compatible; NewShareCounts.com/1.0; +http://newsharecounts.com/crawler)"
+ ]
+ }
+ ,
+ {
+ "pattern": "James BOT",
+ "addition_date": "2017/10/12",
+ "url": "http://cognitiveseo.com/bot.html",
+ "instances": [
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20070725 Firefox/ - James BOT - WebCrawler http://cognitiveseo.com/bot.html"
+ ]
+ }
+ ,
+ {
+ "pattern": "Barkrowler",
+ "addition_date": "2017/10/09",
+ "url": "http://www.exensa.com/crawl",
+ "instances": [
+ "Barkrowler/0.5.1 (experimenting / debugging - sorry for your logs ) http://www.exensa.com/crawl - admin@exensa.com -- based on BuBiNG",
+ "Barkrowler/0.7 (+http://www.exensa.com/crawl)"
+ ]
+ }
+ ,
+ {
+ "pattern": "TinEye",
+ "addition_date": "2017/10/14",
+ "url": "http://www.tineye.com/crawler.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)",
+ "TinEye/1.1 (http://tineye.com/crawler.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "SocialRankIOBot",
+ "addition_date": "2017/10/19",
+ "url": "http://socialrank.io/about",
+ "instances": [
+ "SocialRankIOBot; http://socialrank.io/about"
+ ]
+ }
+ ,
+ {
+ "pattern": "trendictionbot",
+ "addition_date": "2017/10/30",
+ "url": "http://www.trendiction.de/bot",
+ "instances": [
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/"
+ ]
+ }
+ ,
+ {
+ "pattern": "Ocarinabot",
+ "addition_date": "2017/09/27",
+ "instances": [
+ "Ocarinabot"
+ ]
+ }
+ ,
+ {
+ "pattern": "epicbot",
+ "addition_date": "2017/10/31",
+ "url": "http://www.epictions.com/epicbot",
+ "instances": [
+ "Mozilla/5.0 (compatible; epicbot; +http://www.epictions.com/epicbot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Primalbot",
+ "addition_date": "2017/09/27",
+ "url": "https://www.primal.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; Primalbot; +https://www.primal.com;)"
+ ]
+ }
+ ,
+ {
+ "pattern": "DuckDuckGo-Favicons-Bot",
+ "addition_date": "2017/10/06",
+ "url": "http://duckduckgo.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "GnowitNewsbot",
+ "addition_date": "2017/10/30",
+ "url": "http://www.gnowit.com",
+ "instances": [
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0 / GnowitNewsbot / Contact information at http://www.gnowit.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "Leikibot",
+ "addition_date": "2017/09/24",
+ "url": "http://www.leiki.com",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.3;compatible; Leikibot/1.0; +http://www.leiki.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "LinkArchiver",
+ "addition_date": "2017/09/24",
+ "instances": [
+ "@LinkArchiver twitter bot"
+ ]
+ }
+ ,
+ {
+ "pattern": "YaK\\/",
+ "addition_date": "2017/09/25",
+ "url": "http://linkfluence.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "PaperLiBot",
+ "addition_date": "2017/09/25",
+ "url": "http://support.paper.li/entries/20023257-what-is-paper-li",
+ "instances": [
+ "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)",
+ "Mozilla/5.0 (compatible; PaperLiBot/2.1; https://support.paper.li/entries/20023257-what-is-paper-li)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Digg Deeper",
+ "addition_date": "2017/09/26",
+ "url": "http://digg.com/about",
+ "instances": [
+ "Digg Deeper/v1 (http://digg.com/about)"
+ ]
+ }
+ ,
+ {
+ "pattern": "dcrawl",
+ "addition_date": "2017/09/22",
+ "instances": [
+ "dcrawl/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "Snacktory",
+ "addition_date": "2017/09/23",
+ "url": "https://github.com/karussell/snacktory",
+ "instances": [
+ "Mozilla/5.0 (compatible; Snacktory; +https://github.com/karussell/snacktory)"
+ ]
+ }
+ ,
+ {
+ "pattern": "AndersPinkBot",
+ "addition_date": "2017/09/24",
+ "url": "http://anderspink.com/bot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; AndersPinkBot/1.0; +http://anderspink.com/bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Fyrebot",
+ "addition_date": "2017/09/22",
+ "instances": [
+ "Fyrebot/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "EveryoneSocialBot",
+ "addition_date": "2017/09/22",
+ "url": "http://everyonesocial.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Mediatoolkitbot",
+ "addition_date": "2017/10/06",
+ "url": "http://mediatoolkit.com",
+ "instances": [
+ "Mediatoolkitbot (complaints@mediatoolkit.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Luminator-robots",
+ "addition_date": "2017/09/22",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.13 Luminator-robots/2.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "ExtLinksBot",
+ "addition_date": "2017/11/02",
+ "url": "https://extlinks.com/Bot.html",
+ "instances": [
+ "Mozilla/5.0 (compatible; ExtLinksBot/1.5 +https://extlinks.com/Bot.html)"
+ ]
+ }
+ ,
+ {
+ "pattern": "SurveyBot",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv: Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools)"
+ ]
+ }
+ ,
+ {
+ "pattern": "NING\\/",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "NING/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "okhttp",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "okhttp/2.5.0",
+ "okhttp/2.7.5",
+ "okhttp/3.2.0",
+ "okhttp/3.5.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "Nuzzel",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Nuzzel"
+ ]
+ }
+ ,
+ {
+ "pattern": "omgili",
+ "addition_date": "2017/11/02",
+ "url": "http://omgili.com",
+ "instances": [
+ "omgili/0.5 +http://omgili.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "PocketParser",
+ "addition_date": "2017/11/02",
+ "url": "https://getpocket.com/pocketparser_ua",
+ "instances": [
+ "PocketParser/2.0 (+https://getpocket.com/pocketparser_ua)"
+ ]
+ }
+ ,
+ {
+ "pattern": "YisouSpider",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "YisouSpider"
+ ]
+ }
+ ,
+ {
+ "pattern": "um-LN",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "ToutiaoSpider",
+ "addition_date": "2017/11/02",
+ "url": "http://web.toutiao.com/media_cooperation/",
+ "instances": [
+ "Mozilla/5.0 (compatible; ToutiaoSpider/1.0; http://web.toutiao.com/media_cooperation/;)"
+ ]
+ }
+ ,
+ {
+ "pattern": "MuckRack",
+ "addition_date": "2017/11/02",
+ "url": "http://muckrack.com",
+ "instances": [
+ "Mozilla/5.0 (compatible; MuckRack/1.0; +http://muckrack.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Jamie's Spider",
+ "addition_date": "2017/11/02",
+ "url": "http://jamiembrown.com/",
+ "instances": [
+ "Jamie's Spider (http://jamiembrown.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "AHC\\/",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "AHC/2.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "NetcraftSurveyAgent",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Laserlikebot",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Apache-HttpClient",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Apache-HttpClient/4.2.3 (java 1.5)",
+ "Apache-HttpClient/4.2.5 (java 1.5)",
+ "Apache-HttpClient/4.3.1 (java 1.5)",
+ "Apache-HttpClient/4.3.3 (java 1.5)",
+ "Apache-HttpClient/4.3.5 (java 1.5)",
+ "Apache-HttpClient/4.4.1 (Java/1.8.0_65)",
+ "Apache-HttpClient/4.5.3 (Java/1.8.0_121)"
+ ]
+ }
+ ,
+ {
+ "pattern": "AppEngine-Google",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "AppEngine-Google; (+http://code.google.com/appengine; appid: example)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Jetty",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Jetty/9.3.z-SNAPSHOT"
+ ]
+ }
+ ,
+ {
+ "pattern": "Upflow",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "Upflow/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "Thinklab",
+ "addition_date": "2017/11/02",
+ "url": "thinklab.com",
+ "instances": [
+ "Thinklab (thinklab.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Traackr.com",
+ "addition_date": "2017/11/02",
+ "url": "Traackr.com",
+ "instances": [
+ "Traackr.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "Twurly",
+ "addition_date": "2017/11/02",
+ "url": "http://twurly.org",
+ "instances": [
+ "Ruby, Twurly v1.1 (http://twurly.org)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Mastodon",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "http.rb/2.2.2 (Mastodon/1.5.1; +https://example-masto-instance.org/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "http_get",
+ "addition_date": "2017/11/02",
+ "instances": [
+ "http_get"
+ ]
+ }
+ ,
+ {
+ "pattern": "DnyzBot",
+ "addition_date": "2017/11/20",
+ "instances": [
+ "Mozilla/5.0 (compatible; DnyzBot/1.0)",
+ "Mozilla/5.0 (compatible; DnyzBot/1.0) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/64.0.3282.167 Safari/537.36",
+ "Mozilla/5.0 (compatible; DnyzBot/1.0) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/64.0.3264.0 Safari/537.36"
+ ]
+ }
+ ,
+ {
+ "pattern": "botify",
+ "addition_date": "2018/02/01",
+ "instances": [
+ "Mozilla/5.0 (compatible; botify; http://botify.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "007ac9 Crawler",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "BehloolBot",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "Mozilla/5.0 (compatible; BehloolBot/beta; +http://www.webeaver.com/bot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "BrandVerity",
+ "addition_date": "2018/02/27",
+ "instances": [
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/55.0 BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)"
+ ]
+ }
+ ,
+ {
+ "pattern": "check_http",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "check_http/v2.2.1 (nagios-plugins 2.2.1)"
+ ]
+ }
+ ,
+ {
+ "pattern": "BDCbot",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36"
+ ]
+ }
+ ,
+ {
+ "pattern": "ZumBot",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)"
+ ]
+ }
+ ,
+ {
+ "pattern": "EZID",
+ "addition_date": "2018/02/09",
+ "instances": [
+ "EZID (EZID link checker; https://ezid.cdlib.org/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "ICC-Crawler",
+ "addition_date": "2018/02/28",
+ "instances": [
+ "ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)"
+ ],
+ "url": "http://ucri.nict.go.jp/en/icccrawler.html"
+ }
+ ,
+ {
+ "pattern": "ArchiveBot",
+ "addition_date": "2018/02/28",
+ "instances": [
+ "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)"
+ ],
+ "url": "https://github.com/ArchiveTeam/ArchiveBot"
+ }
+ ,
+ {
+ "pattern": "^LCC ",
+ "addition_date": "2018/02/28",
+ "instances": [
+ "LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html)"
+ ],
+ "url": "http://corpora.informatik.uni-leipzig.de/crawler_faq.html"
+ }
+ ,
+ {
+ "pattern": "filterdb.iss.net\\/crawler",
+ "addition_date": "2018/03/16",
+ "instances": [
+ "Mozilla/5.0 (compatible; oBot/2.3.1; +http://filterdb.iss.net/crawler/)"
+ ],
+ "url": "http://filterdb.iss.net/crawler/"
+ }
+ ,
+ {
+ "pattern": "BLP_bbot",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "BLP_bbot/0.1"
+ ]
+ }
+ ,
+ {
+ "pattern": "BomboraBot",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)"
+ ],
+ "url": "http://www.bombora.com/bot"
+ }
+ ,
+ {
+ "pattern": "Buck\\/",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)"
+ ],
+ "url": "https://app.hypefactors.com/media-monitoring/about.html"
+ }
+ ,
+ {
+ "pattern": "Companybook-Crawler",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Companybook-Crawler (+https://www.companybooknetworking.com/)"
+ ],
+ "url": "https://www.companybooknetworking.com/"
+ }
+ ,
+ {
+ "pattern": "Genieo",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)"
+ ],
+ "url": "http://www.genieo.com/webfilter.html"
+ }
+ ,
+ {
+ "pattern": "magpie-crawler",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
+ ],
+ "url": "http://www.brandwatch.net"
+ }
+ ,
+ {
+ "pattern": "MeltwaterNews",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "MeltwaterNews www.meltwater.com"
+ ],
+ "url": "http://www.meltwater.com"
+ }
+ ,
+ {
+ "pattern": "Moreover",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0 Moreover/5.1 (+http://www.moreover.com)"
+ ],
+ "url": "http://www.moreover.com"
+ }
+ ,
+ {
+ "pattern": "newspaper\\/",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "newspaper/0.2.5",
+ "newspaper/0.2.6",
+ "newspaper/"
+ ]
+ }
+ ,
+ {
+ "pattern": "ScoutJet",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)"
+ ],
+ "url": "http://www.scoutjet.com/"
+ }
+ ,
+ {
+ "pattern": "(^| )sentry\\/",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "sentry/8.22.0 (https://sentry.io)"
+ ],
+ "url": "https://sentry.io"
+ }
+ ,
+ {
+ "pattern": "StorygizeBot",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; StorygizeBot; http://www.storygize.com)"
+ ],
+ "url": "http://www.storygize.com"
+ }
+ ,
+ {
+ "pattern": "UptimeRobot",
+ "addition_date": "2018/03/27",
+ "instances": [
+ "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)"
+ ],
+ "url": "http://www.uptimerobot.com/"
+ }
+ ,
+ {
+ "pattern": "OutclicksBot",
+ "addition_date": "2018/04/21",
+ "instances": [
+ "OutclicksBot/2 +https://www.outclicks.net/agent/VjzDygCuk4ubNmg40ZMbFqT0sIh7UfOKk8s8ZMiupUR",
+ "OutclicksBot/2 +https://www.outclicks.net/agent/gIYbZ38dfAuhZkrFVl7sJBFOUhOVct6J1SvxgmBZgCe",
+ "OutclicksBot/2 +https://www.outclicks.net/agent/PryJzTl8POCRHfvEUlRN5FKtZoWDQOBEvFJ2wh6KH5J",
+ "OutclicksBot/2 +https://www.outclicks.net/agent/p2i4sNUh7eylJF1S6SGgRs5mP40ExlYvsr9GBxVQG6h"
+ ],
+ "url": "https://www.outclicks.net"
+ }
+ ,
+ {
+ "pattern": "seoscanners",
+ "addition_date": "2018/05/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)"
+ ],
+ "url": "http://www.seoscanners.net/"
+ }
+ ,
+ {
+ "pattern": "Hatena",
+ "addition_date": "2018/05/29",
+ "instances": [
+ "Hatena Antenna/0.3",
+ "Hatena::Russia::Crawler/0.01"
+ ]
+ }
+ ,
+ {
+ "pattern": "Google Web Preview",
+ "addition_date": "2018/05/31",
+ "instances": [
+ "Mozilla/5.0 (Linux; U; Android 2.3.4; generic) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Version/4.0 Mobile Safari/537.36"
+ ]
+ }
+ ,
+ {
+ "pattern": "MauiBot",
+ "addition_date": "2018/06/06",
+ "instances": [
+ "MauiBot (crawler.feedback+wc@gmail.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "AlphaBot",
+ "addition_date": "2018/05/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; AlphaBot/3.2; +http://alphaseobot.com/bot.html)"
+ ],
+ "url": "http://alphaseobot.com/bot.html"
+ }
+ ,
+ {
+ "pattern": "SBL-BOT",
+ "addition_date": "2018/06/06",
+ "instances": [
+ "SBL-BOT (http://sbl.net)"
+ ],
+ "url": "http://sbl.net",
+ "description" : "Bot of SoftByte BlackWidow"
+ }
+ ,
+ {
+ "pattern": "IAS crawler",
+ "addition_date": "2018/06/06",
+ "instances": [
+ "IAS crawler (ias_crawler; http://integralads.com/site-indexing-policy/)"
+ ],
+ "url": "http://integralads.com/site-indexing-policy/",
+ "description" : "Bot of Integral Ad Science, Inc."
+ }
+ ,
+ {
+ "pattern": "adscanner",
+ "addition_date": "2018/06/24",
+ "instances": [
+ "Mozilla/5.0 (compatible; adscanner/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Netvibes",
+ "addition_date": "2018/06/24",
+ "instances": [
+ "Netvibes (crawler/bot; http://www.netvibes.com",
+ "Netvibes (crawler; http://www.netvibes.com)"
+ ],
+ "url": "http://www.netvibes.com"
+ }
+ ,
+ {
+ "pattern": "acapbot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/5.0 (compatible;acapbot/0.1;treat like Googlebot)",
+ "Mozilla/5.0 (compatible;acapbot/0.1.;treat like Googlebot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Baidu-YunGuanCe",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Baidu-YunGuanCe-Bot(ce.baidu.com)",
+ "Baidu-YunGuanCe-SLABot(ce.baidu.com)",
+ "Baidu-YunGuanCe-ScanBot(ce.baidu.com)",
+ "Baidu-YunGuanCe-PerfBot(ce.baidu.com)",
+ "Baidu-YunGuanCe-VSBot(ce.baidu.com)"
+ ],
+ "url": "https://ce.baidu.com/topic/topic20150908",
+ "description": "Baidu Cloud Watch"
+ }
+ ,
+ {
+ "pattern": "bitlybot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "bitlybot/3.0 (+http://bit.ly/)",
+ "bitlybot/2.0",
+ "bitlybot"
+ ],
+ "url": "http://bit.ly/"
+ }
+ ,
+ {
+ "pattern": "blogmuraBot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "blogmuraBot (+http://www.blogmura.com)"
+ ],
+ "url": "http://www.blogmura.com",
+ "description": "A blog ranking site which links to blogs on just about every theme possible."
+ }
+ ,
+ {
+ "pattern": "Bot.AraTurka.com",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Bot.AraTurka.com/0.0.1"
+ ],
+ "url": "http://www.araturka.com"
+ }
+ ,
+ {
+ "pattern": "bot-pge.chlooe.com",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)"
+ ]
+ }
+ ,
+ {
+ "pattern": "BoxcarBot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)"
+ ],
+ "url": "https://boxcar.io/"
+ }
+ ,
+ {
+ "pattern": "BTWebClient",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "BTWebClient/180B(9704)"
+ ],
+ "url": "http://www.utorrent.com/",
+ "description": "µTorrent BitTorrent Client"
+ }
+ ,
+ {
+ "pattern": "ContextAd Bot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)",
+ "ContextAd Bot 1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "Digincore bot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)"
+ ],
+ "url": "http://www.digincore.com/crawler.html"
+ }
+ ,
+ {
+ "pattern": "Disqus",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Disqus/1.0"
+ ],
+ "url": "https://disqus.com/",
+ "description": "validate and quality check pages."
+ }
+ ,
+ {
+ "pattern": "Feedly",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)",
+ "FeedlyBot/1.0 (http://feedly.com)"
+ ],
+ "url": "https://www.feedly.com/fetcher.html",
+ "description": "Feedly Fetcher is how Feedly grabs RSS or Atom feeds when users choose to add them to their Feedly or any of the other applications built on top of the feedly cloud."
+ }
+ ,
+ {
+ "pattern": "Fetch\\/",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Fetch/2.0a (CMS Detection/Web/SEO analysis tool, see http://guess.scritch.org)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Fever",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Fever/1.38 (Feed Parser; http://feedafever.com; Allow like Gecko)"
+ ],
+ "url": "http://feedafever.com"
+ }
+ ,
+ {
+ "pattern": "Flamingo_SearchEngine",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)"
+ ]
+ }
+ ,
+ {
+ "pattern": "FlipboardProxy",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)",
+ "Mozilla/5.0 (compatible; FlipboardProxy/1.2; +http://flipboard.com/browserproxy)",
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:28.0) Gecko/20100101 Firefox/28.0 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)"
+ ],
+ "url": "https://about.flipboard.com/browserproxy/",
+ "description": "a proxy service to fetch, validate, and prepare certain elements of websites for presentation through the Flipboard Application"
+ }
+ ,
+ {
+ "pattern": "g2reader-bot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "g2reader-bot/1.0 (+http://www.g2reader.com/)"
+ ],
+ "url": "http://www.g2reader.com/"
+ }
+ ,
+ {
+ "pattern": "imrbot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; imrbot/1.10.8 +http://www.mignify.com)"
+ ],
+ "url": "http://www.mignify.com"
+ }
+ ,
+ {
+ "pattern": "K7MLWCBot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "K7MLWCBot/1.0 (+http://www.k7computing.com)"
+ ],
+ "url": "http://www.k7computing.com",
+ "description": "Virus scanner"
+ }
+ ,
+ {
+ "pattern": "Kemvibot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Kemvibot/1.0 (http://kemvi.com, marco@kemvi.com)"
+ ],
+ "url": "http://kemvi.com"
+ }
+ ,
+ {
+ "pattern": "Landau-Media-Spider",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)"
+ ],
+ "url": "http://bots.landaumedia.de/bot.html"
+ }
+ ,
+ {
+ "pattern": "linkapediabot",
+ "addition_date": "2018/06/27",
+ "instances": [
+ "linkapediabot (+http://www.linkapedia.com)"
+ ],
+ "url": "http://www.linkapedia.com"
+ }
+ ,
+ {
+ "pattern": "vkShare",
+ "addition_date": "2018/07/02",
+ "instances": [
+ "Mozilla/5.0 (compatible; vkShare; +http://vk.com/dev/Share)"
+ ],
+ "url": "http://vk.com/dev/Share"
+ }
+ ,
+ {
+ "pattern": "Siteimprove.com",
+ "addition_date": "2018/06/22",
+ "instances": [
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) LinkCheck by Siteimprove.com",
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) Match by Siteimprove.com",
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) SiteCheck-sitecrawl by Siteimprove.com",
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) LinkCheck by Siteimprove.com"
+ ]
+ }
+ ,
+ {
+ "pattern": "BLEXBot\\/",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)"
+ ],
+ "url": "http://webmeup-crawler.com"
+ }
+ ,
+ {
+ "pattern": "DareBoost",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36 DareBoost"
+ ],
+ "url": "https://www.dareboost.com/",
+ "description": "Bot to test, Analyze and Optimize website"
+ }
+ ,
+ {
+ "pattern": "ZuperlistBot\\/",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (compatible; ZuperlistBot/1.0)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Miniflux\\/",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (compatible; Miniflux/2.0.x-dev; +https://miniflux.net)",
+ "Mozilla/5.0 (compatible; Miniflux/2.0.3; +https://miniflux.net)",
+ "Mozilla/5.0 (compatible; Miniflux/2.0.7; +https://miniflux.net)",
+ "Mozilla/5.0 (compatible; Miniflux/2.0.10; +https://miniflux.net)",
+ "Mozilla/5.0 (compatibl$; Miniflux/2.0.x-dev; +https://miniflux.app)",
+ "Mozilla/5.0 (compatible; Miniflux/2.0.11; +https://miniflux.app)",
+ "Mozilla/5.0 (compatible; Miniflux/2.0.12; +https://miniflux.app)",
+ "Mozilla/5.0 (compatible; Miniflux/ae1dc1a; +https://miniflux.app)",
+ "Mozilla/5.0 (compatible; Miniflux/3b6e44c; +https://miniflux.app)"
+ ],
+ "url": "https://miniflux.net",
+ "description": "Miniflux is a minimalist and opinionated feed reader."
+ }
+ ,
+ {
+ "pattern": "Feedspot",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)",
+ "Mozilla/5.0 (compatible; Feedspot/1.0 (+https://www.feedspot.com/fs/fetcher; like FeedFetcher-Google)"
+ ],
+ "url": "http://www.feedspot.com/fs/bot"
+ }
+ ,
+ {
+ "pattern": "Diffbot\\/",
+ "addition_date": "2018/07/07",
+ "instances": [
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)"
+ ],
+ "url": "http://www.diffbot.com"
+ }
+ ,
+ {
+ "pattern": "SEOkicks",
+ "addition_date": "2018/08/22",
+ "instances": [
+ "Mozilla/5.0 (compatible; SEOkicks; +https://www.seokicks.de/robot.html)"
+ ],
+ "url": "https://www.seokicks.de/robot.html"
+ }
+ ,
+ {
+ "pattern": "tracemyfile",
+ "addition_date": "2018/08/23",
+ "instances": [
+ "Mozilla/5.0 (compatible; tracemyfile/1.0; +bot@tracemyfile.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Nimbostratus-Bot",
+ "addition_date": "2018/08/29",
+ "instances": [
+ "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)"
+ ]
+ }
+ ,
+ {
+ "pattern": "zgrab",
+ "addition_date": "2018/08/30",
+ "instances": [
+ "Mozilla/5.0 zgrab/0.x"
+ ],
+ "url": "https://zmap.io/"
+ }
+ ,
+ {
+ "pattern": "PR-CY.RU",
+ "addition_date": "2018/08/30",
+ "instances": [
+ "Mozilla/5.0 (compatible; PR-CY.RU; + https://a.pr-cy.ru)"
+ ],
+ "url": "https://a.pr-cy.ru/"
+ }
+ ,
+ {
+ "pattern": "AdsTxtCrawler",
+ "addition_date": "2018/08/30",
+ "instances": [
+ "AdsTxtCrawler/1.0"
+ ]
+ },
+ {
+ "pattern": "Datafeedwatch",
+ "addition_date": "2018/09/05",
+ "instances": [
+ "Datafeedwatch/2.1.x"
+ ],
+ "url": "https://www.datafeedwatch.com/"
+ }
+ ,
+ {
+ "pattern": "Zabbix",
+ "addition_date": "2018/09/05",
+ "instances": [
+ "Zabbix"
+ ],
+ "url": "https://www.zabbix.com/documentation/3.4/manual/web_monitoring"
+ }
+ ,
+ {
+ "pattern": "TangibleeBot",
+ "addition_date": "2018/09/05",
+ "instances": [
+ "TangibleeBot/ (http://tangiblee.com/bot)"
+ ],
+ "url": "http://tangiblee.com/bot"
+ }
+ ,
+ {
+ "pattern": "google-xrawler",
+ "addition_date": "2018/09/05",
+ "instances": [
+ "google-xrawler"
+ ],
+ "url": "https://webmasters.stackexchange.com/questions/105560/what-is-the-google-xrawler-user-agent-used-for"
+ }
+ ,
+ {
+ "pattern": "axios",
+ "addition_date": "2018/09/06",
+ "instances": [
+ "axios/0.18.0"
+ ],
+ "url": "https://github.com/axios/axios"
+ }
+ ,
+ {
+ "pattern": "Amazon CloudFront",
+ "addition_date": "2018/09/07",
+ "instances": [
+ "Amazon CloudFront"
+ ],
+ "url": "https://aws.amazon.com/cloudfront/"
+ }
+ ,
+ {
+ "pattern": "Pulsepoint",
+ "addition_date": "2018/09/24",
+ "instances": [
+ "Pulsepoint XT3 web scraper"
+ ]
+ }
+ ,
+ {
+ "pattern": "CloudFlare-AlwaysOnline",
+ "addition_date": "2018/09/27",
+ "instances": [
+ "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +http://www.cloudflare.com/always-online) AppleWebKit/534.34",
+ "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +https://www.cloudflare.com/always-online) AppleWebKit/534.34"
+ ],
+ "url" : "https://www.cloudflare.com/always-online/"
+ }
+ ,
+ {
+ "pattern": "Google-Structured-Data-Testing-Tool",
+ "addition_date": "2018/10/02",
+ "instances": [
+ "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +https://search.google.com/structured-data/testing-tool)",
+ "Mozilla/5.0 (compatible; Google-Structured-Data-Testing-Tool +http://developers.google.com/structured-data/testing-tool/)"
+ ],
+ "url": "https://search.google.com/structured-data/testing-tool"
+ }
+ ,
+ {
+ "pattern": "WordupInfoSearch",
+ "addition_date": "2018/10/07",
+ "instances": [
+ "WordupInfoSearch/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "WebDataStats",
+ "addition_date": "2018/10/08",
+ "instances": [
+ "Mozilla/5.0 (compatible; WebDataStats/1.0 ; +https://webdatastats.com/policy.html)"
+ ],
+ "url": "https://webdatastats.com/"
+ }
+ ,
+ {
+ "pattern": "HttpUrlConnection",
+ "addition_date": "2018/10/08",
+ "instances": [
+ "Jersey/2.25.1 (HttpUrlConnection 1.8.0_141)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Seekport Crawler",
+ "addition_date": "2018/10/08",
+ "instances": [
+ "Mozilla/5.0 (compatible; Seekport Crawler; http://seekport.com/)"
+ ],
+ "url": "http://seekport.com/"
+ }
+ ,
+ {
+ "pattern": "ZoomBot",
+ "addition_date": "2018/10/10",
+ "instances": [
+ "ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)"
+ ],
+ "url": "http://suite.seozoom.it/bot.html"
+ }
+ ,
+ {
+ "pattern": "VelenPublicWebCrawler",
+ "addition_date": "2018/10/09",
+ "instances": [
+ "VelenPublicWebCrawler (velen.io)"
+ ]
+ }
+ ,
+ {
+ "pattern": "MoodleBot",
+ "addition_date": "2018/10/10",
+ "instances": [
+ "MoodleBot/1.0"
+ ]
+ }
+ ,
+ {
+ "pattern": "jpg-newsbot",
+ "addition_date": "2018/10/10",
+ "instances": [
+ "jpg-newsbot/2.0; (+https://vipnytt.no/bots/)"
+ ],
+ "url": "https://vipnytt.no/bots/"
+ }
+ ,
+ {
+ "pattern": "outbrain",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "Mozilla/5.0 (Java) outbrain"
+ ],
+ "url": "https://www.outbrain.com/help/advertisers/invalid-url/"
+ }
+ ,
+ {
+ "pattern": "W3C_Validator",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "W3C_Validator/1.3"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "Validator\\.nu",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "Validator.nu/LV"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "W3C-checklink",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "W3C-checklink"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "W3C-mobileOK",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "W3C-mobileOK/DDC-1.0"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "W3C_I18n-Checker",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "W3C_I18n-Checker/1.0"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "FeedValidator",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "FeedValidator/1.3"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "W3C_CSS_Validator",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "Jigsaw/2.3.0 W3C_CSS_Validator_JFouffa/2.0"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "W3C_Unicorn",
+ "addition_date": "2018/10/14",
+ "instances": [
+ "W3C_Unicorn/1.0"
+ ],
+ "url": "https://validator.w3.org/services"
+ }
+ ,
+ {
+ "pattern": "Google-PhysicalWeb",
+ "addition_date": "2018/10/21",
+ "instances": [
+ "Mozilla/5.0 (Google-PhysicalWeb)"
+ ]
+ }
+ ,
+ {
+ "pattern": "Blackboard",
+ "addition_date": "2018/10/28",
+ "instances": [
+ "Blackboard Safeassign"
+ ],
+ "url": "https://help.blackboard.com/Learn/Administrator/Hosting/Tools_Management/SafeAssign"
+ },
+ {
+ "pattern": "ICBot\\/",
+ "addition_date": "2018/10/23",
+ "instances": [
+ "Mozilla/5.0 (compatible; ICBot/0.1; +https://ideasandcode.xyz"
+ ],
+ "url": "https://ideasandcode.xyz"
+ },
+ {
+ "pattern": "BazQux",
+ "addition_date": "2018/10/23",
+ "instances": [
+ "Mozilla/5.0 (compatible; BazQux/2.4; +https://bazqux.com/fetcher; 1 subscribers)"
+ ],
+ "url": "https://bazqux.com/fetcher"
+ },
+ {
+ "pattern": "Twingly",
+ "addition_date": "2018/10/23",
+ "instances": [
+ "Mozilla/5.0 (compatible; Twingly Recon; twingly.com)"
+ ],
+ "url": "https://twingly.com"
+ },
+ {
+ "pattern": "Rivva",
+ "addition_date": "2018/10/23",
+ "instances": [
+ "Mozilla/5.0 (compatible; Rivva; http://rivva.de)"
+ ],
+ "url": "http://rivva.de"
+ },
+ {
+ "pattern": "Experibot",
+ "addition_date": "2018/11/03",
+ "instances": [
+ "Experibot-v2 http://goo.gl/ZAr8wX",
+ "Experibot-v3 http://goo.gl/ZAr8wX"
+ ],
+ "url": "https://amirkr.wixsite.com/experibot"
+ },
+ {
+ "pattern": "awesomecrawler",
+ "addition_date": "2018/11/24",
+ "instances": [
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.5 Safari/537.22 +awesomecrawler"
+ ]
+ },
+ {
+ "pattern": "Dataprovider.com",
+ "addition_date": "2018/11/24",
+ "instances": [
+ "Mozilla/5.0 (compatible; Dataprovider.com)"
+ ],
+ "url": "https://www.dataprovider.com/"
+ },
+ {
+ "pattern": "GroupHigh\\/",
+ "addition_date": "2018/11/24",
+ "instances": [
+ "Mozilla/5.0 (compatible; GroupHigh/1.0; +http://www.grouphigh.com/"
+ ],
+ "url": "http://www.grouphigh.com/"
+ },
+ {
+ "pattern": "theoldreader.com",
+ "addition_date": "2018/12/02",
+ "instances": [
+ "Mozilla/5.0 (compatible; theoldreader.com)"
+ ],
+ "url": "https://www.theoldreader.com/"
+ }
+ ,
+ {
+ "pattern": "AnyEvent",
+ "addition_date": "2018/12/07",
+ "instances": [
+ "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/2.24; +http://software.schmorp.de/pkg/AnyEvent)"
+ ],
+ "url": "http://software.schmorp.de/pkg/AnyEvent.html"
+ }
+ ,
+ {
+ "pattern": "Uptimebot",
+ "addition_date": "2019/01/17",
+ "instances": [
+ "Uptimebot.org - Free website monitoring"
+ ],
+ "url": "http://uptimebot.org/"
+ }
+ ,
+ {
+ "pattern": "Nmap Scripting Engine",
+ "addition_date": "2019/02/04",
+ "instances": [
+ "Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)"
+ ],
+ "url": "https://nmap.org/book/nse.html"
+ }
diff --git a/lib/impressionist/bots.rb b/lib/impressionist/bots.rb
index 93c11d2d..d6da72a4 100644
--- a/lib/impressionist/bots.rb
+++ b/lib/impressionist/bots.rb
@@ -1,20 +1,19 @@
require 'timeout'
require 'net/http'
-require 'nokogiri'
+require 'json'
module Impressionist
module Bots
- LIST_URL = "http://www.user-agents.org/allagents.xml"
+ LIST_URL = "https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json"
def self.consume
Timeout.timeout(4) do
response = Net::HTTP.get(URI.parse(LIST_URL))
- doc = Nokogiri::XML(response)
+ my_hash = JSON.parse(response)
list = []
- doc.xpath('//user-agent').each do |agent|
- type = agent.xpath("Type").text
- list << agent.xpath("String").text.gsub("<","<") if ["R","S"].include?(type) #gsub hack for badly formatted data
+ my_hash.each do |agent|
+ list << agent['instances']
- list
+ pp list.flatten
diff --git a/tests/test_app/spec/models/bots_spec.rb b/tests/test_app/spec/models/bots_spec.rb
index a2983587..ee347311 100644
--- a/tests/test_app/spec/models/bots_spec.rb
+++ b/tests/test_app/spec/models/bots_spec.rb
@@ -8,7 +8,7 @@
it "is true if user_agent is on bot list" do
- Impressionist::Bots.bot?("A-Online Search").should be_true
+ Impressionist::Bots.bot?("Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com").should be_true
it "is false if user_agent is blank" do
@@ -24,4 +24,4 @@
Impressionist::Bots.bot?.should be_false
\ No newline at end of file