User-agent: *
Disallow: /bad.html
Disallow: /blank.html
Disallow: /getout.php
Disallow: /mt/
# http://www.webmasterworld.com/robots.txt has a long list of active robots you might want to block.
# Some of these (and many others) ignore robots.txt, and are forcibly blocked in .htaccess.
# List below stolen from http://www.diveintomerk.org/robots.txt . Sorry Mark.
User-agent: MarcoPolo
User-agent: Nutch
User-agent: Zao
User-agent: semanticdiscovery
User-agent: PubCrawl
User-agent: TurnitinBot
User-agent: NPbot
User-agent: psbot
User-agent: baiduspider
User-agent: larbin
User-agent: NationalDirectory
User-agent: LNSpiderguy
User-agent: Teleport
User-agent: MIIxpc
User-agent: asterias
User-agent: lwp-trivial
User-agent: LinkWalker
User-agent: cosmos
User-agent: MSIECrawler
User-agent: sitecheck.internetseer.com
User-agent: pompos
User-agent: Generic
User-agent: WebSearchBench
User-agent: almaden
User-agent: k2spider
User-agent: curl
User-agent: Wget
Disallow: /