# robots.txt for www.smythies.com
#
# A note to wordpress users that fetch this file as
# an example via the "Multipart robots.txt editor"
# plugin.
# I deny a lot of bots, you might want to re-consider
# for your application.
# Also, this file is only one step in my access management.
# I also use:
# Apache rewrite rules, i.e. for bots that ignore this file.
# Direct iptables DROP rules for annoying crawlers without an
# otherwise uniquely identifiable user agent string.
# 
#
# robots.txt 2024.04.06
#	dissallow Friendly_Crawler
# robots.txt 2024.04.03
#	dissallow FriendlyCrawler
# robots.txt 2023.01.24
#	dissallow woorankreview
#	dissallow MaCoCu
#	dissallow AwarioBot
#	dissallow okhttp
#	dissallow Bytespider
#	dissallow SenutoBot
#	dissallow OWLer
#	dissallow FunnelBack
#	dissallow TinyTestBot
#	dissallow Cincraw
#	dissallow HubSpot
#	dissallow fidget-spinner-bot
#	dissallow ClaudeBot
#
# robots.txt 2022.04.21
#	dissallow Keybot Translation-Search-Machine
#	dissallow INETDEX-BOT
#	dissallow YaK
#	dissallow MixrankBot
#	dissallow SERPtimizerBot
#	dissallow vuhuvBot
#	dissallow Amazonbot
#	dissallow IonCrawl
#	dissallow SeekportBot
#	dissallow Jambot
#	dissallow Diffbot
#	dissallow SeekBot
#	dissallow Twitterbot
#	dissallow webprosbot
#
# robots.txt 2022.02.27
#	dissallow .mp4
#
# robots.txt 2021.09.29
#	dissallow: User-agent: clark-crawler
#	requires its own directive, or doesn't work:
#	dissallow: User-agent: DataForSeoBot
#	dissallow: SurdotlyBot
#	dissallow: DomainStatsBot
#	dissallow: FlfBaldrBot
#
# robots.txt 2021.06.15
#	dissallow: User-agent: InfoTigerBot
#	Is this just a new name for the 2021.05.17 entry?
#
# robots.txt 2021.05.17
#	dissallow: User-agent: infotiger
#	They do not actually set their user agent, but
#	they do read robots.txt, although not often.
#
# robots.txt 2021.05.14
#	I suspect bingbot is ignoring it's disallow
#	and using the later old MSNbot stuff. Try
#	setting MSNbot to disallow.
#
# robots.txt 2021.05.06
#	dissallow: User-agent: Neevabot
#	dissallow: User-agent: Linguee
#
# robots.txt 2021.03.26
#	dissallow: User-agent: SemanticScholarBot
#
# robots.txt 2020.12.28
#	dissallow: User-agent: niocBot
#	dissallow: User-agent: PetalBot
#
# robots.txt 2020.07.23
#	dissallow: User-agent: oBot
#
# robots.txt 2020.07.19
#	restrict some additional sub directories
#	where I file junk for others and myself.
#	under ~doug/linux/s18/hwp
#
#	dissallow: User-agent: JobboerseBot
#	dissallow: User-agent: CheckMarkNetwork
#
# robots.txt 2018.10.30
#	dissallow: User-agent: bbot
#	dissallow: User-agent: brands-bot-logo
#
# robots.txt 2018.10.30
#	dissallow: User-agent: oBot
#
# robots.txt 2018.10.30
#	dissallow: User-agent: serpstatbot
#	dissallow: User-agent: Datanyze
#	dissallow: User-agent: IndeedBot
#	dissallow: User-agent: Experibot
#	dissallow: User-agent: Seekport
#	dissallow: User-agent: Clarabot
#	not sure if Datanyze checks robots.txt
#
# robots.txt 2018.08.06
#	add .svg files to disallow lists.
#
# robots.txt 2018.05.09
#	disallow: User-agent: dataprovider
#	disallow: User-agent: crawler4j
#	disallow: User-agent: ExtLinksBot
#	disallow: User-agent: The Knowledge AI
#	Add more version of Sogou crawlers
#
# robots.txt 2018.04.11
#	I often post .csv files and .data files for my
#	experiments. Add to disallow list
#
# robots.txt 2018.04.10
#	disallow: User-agent: MauiBot
#	disallow: User-agent: DAUM
#
# robots.txt 2017.12.02
#	Change to multiple User-agent: lines per disallow, as
#	specified in the original robots.txt specifications.
#	disallow: User-agent: SEOkicks-Robot
#
# robots.txt 2017.07.01
#	disallow: User-agent: SiteExplorer Findxbot GarlikCrawler ZoominfoBot BUbiNG
#	Barkrowler rogerbot dotbot JamesBOT Contacts-Crawler CCBot IDBot DnyzBot
#	PiplBot AlphaBot AlphaSeoBot AlphaSeoBot-SA
#
# robots.txt 2017.04.20
#	disallow: User-agent: Qwantify
#
# robots.txt 2017.04.15
#	007ac9.net crawlers are seriously annoying.
#	While they do attempt to fetch the robots.txt file,
#	they do not use any user agent string when they do so,
#	and therfore they hit my "no user agent" rewrite rule.
#	They use a user agent string for everything else, but
#	do not identify themselves as a 007ac9 crawler, making
#	a user agent based rewrite rule impossible.
#
#	disallow: User-agent: coccocbot-web
#
# robots.txt 2017.03.18
#            disallow: User-agent: 007ac9
#            although their bots do not identify themselves,
#            they claim to honor robots.txt. We'll see.
#
# robots.txt 2017.02.17
#            disallow: User-agent: yoozBot
#            googlebot does ignore crawl delay. Take it out.
#
# robots.txt 2017.01.20
#            googlebot crawls too fast.
#            Try a crawl delay directive for googlebot,
#            although I seem to recall it ignores it.
#
# robots.txt 2017.01.09
#            disallow: User-agent: DomainCrawler
#
# robots.txt 2016.12.28
#            disallow: User-agent: Cliqzbot
#
# robots.txt 2016.12.08
#            disallow: User-agent: Seeker
#
# robots.txt 2016.09.04
#            disallow: User-agent: Uptimebot
#
# robots.txt 2016.08.04
#            disallow: User-agent: Sogou web spider
#
# robots.txt 2016.07.08
#            I have been watching Qwantify.
#            It gets the same stuff over and over again,
#            rather often. Try a crawl delay. If that
#            doesn't help, I'll just disallow it.
#
# robots.txt 2016.04.03
#            disallow: User-agent: RankActiveLinkBot
#
# robots.txt 2016.02.23
#            Aboundex has already been added, it turns out twice.
#            try Abountdex only, in addition to the already
#            rule for Abountdexbot.
#            Will also be adding a re-write rule.
#
# robots.txt 2016.02.09
#            disallow: User-agent: plukkie
#            disallow: User-agent: Applebot
#            disallow: User-agent: Lipperhey
#            disallow: User-agent: SafeDNSBot
#
# robots.txt 2016.01.09
#            Try, does this work?
#            disallow: User-agent: gocrawl
#
# robots.txt 2015.10.25
#            disallow: User-agent: NextGenSearchBot
#
# robots.txt 2015.10.17
#            disallow: User-agent: parsijoo-bot
#
# robots.txt 2015.10.09
#            disallow: User-agent: betaBot
#
# robots.txt 2015.09.13
#            disallow: User-agent: RankSonicBot
#
# robots.txt 2015.09.08
#            try this instead:
#            disallow: User-agent: yacybot
#
# robots.txt 2015.09.08
#            disallow: User-agent: YaCy
#            I do not know if it should be
#            YaCy or yacybot.
#
# robots.txt 2015.08.25
#            disallow: User-agent: thunderstone
#
# robots.txt 2015.08.19
#            The Nutch disallow is not working.
#            The apache web site says it should work.
#            Try "tbot-nutch".
#            Oh my god, these bots are annoying.
#
# robots.txt 2015.07.20
#            Google Search Appliance
#            disallow: User-agent: gsa-crawler
#            Apache Nutch-based bots.
#            dissallow: User-agent: Nutch
#
# robots.txt 2015.07.10
#            disallow: User-agent: LSSRocketCrawler
#
# robots.txt 2015.06.20
#            disallow: User-agent: YisouSpider
#
# robots.txt 2015.05.29
#            Yet another demented bot.
#            disallow: User-agent: SMTBot
#
# robots.txt 2015.01.25
#            Add some directives for slurp (Yahoo)
#            See also 2009.09.09
#
# robots.txt 2015.01.22
#            disallow: User-agent: ltx71
#            disallow: User-agent: AdvBot
#
# robots.txt 2015.01.10
#            Make sure all know to avoid bot_trap.html
#
# robots.txt 2014.12.31
#            disallow: User-agent: memoryBot
#            Another day, another challenged bot.
#
# robots.txt 2014.11.22
#            disallow: User-agent: MojeekBot
#            It doesn't need to check this file for every access.
#
# robots.txt 2014.11.13
#            dissallow .mp3
#
# robots.txt 2014.10.08
#            There seems to be a lot more Bots lately.
#            disallow: User-agent: LoadTimeBot
#            disallow: User-agent: oBot
#
# robots.txt 2014.10.02
#            disallow: User-agent: Riddler
#            disallow: User-agent: A6-Indexer
#            Seems to check robots.txt often.
#            disallow: User-agent: SemrushBot
#            Although, it doesn't seem to check this file anyhow.
#
# robots.txt 2014.09.29
#            bingbot has had mental breakdown
#            and I'm fed up with it.
#            disallow: User-agent: bingbot
#
# robots.txt 2014.09.03
#            disallow: User-agent: XoviBot
#            Although, it doesn't seem to check this file anyhow.
#
# robots.txt 2014.08.28
#            bingbot needs to slow down. Use crawl_delay
#
# robots.txt 2014.05.15
#            disallow: User-agent: Aboundexbot
#            No: already done. See 2013.09.02
#
# robots.txt 2014.05.15
#            disallow: User-agent: BLEXBot
#
# robots.txt 2014.04.18
#            disallow: User-agent: wotbox
#
# robots.txt 2015.01.22
#            disallow: User-agent: lx71EasouSpider
#            disallow: User-agent: SeznamBot
#
# robots.txt 2014.02.16
#            disallow: User-agent: LinkpadBot
#
# robots.txt 2014.02.15
#            disallow: User-agent: archive.org_bot
#            (The WayBack machine).
#
# robots.txt 2013.11.18
#            disallow: User-agent: spbot
#
# robots.txt 2013.10.07
#            disallow: User-agent: Mail.Ru
#            disallow: User-agent: meanpathbot
#
# robots.txt 2013.09.02
#            Another day, another mentally challenged crawler.
#            disallow: User-agent: Aboundexbot
#
# robots.txt 2013.05.15
#            Another day, another mentally challenged crawler.
#            disallow: User-agent: netEstate NE Crawler
#
# robots.txt 2013.04.25
#            disallow ip-web-crawler.com. It crawls way too fast and while
#            it claims to obey robtos.txt directives, it does not.
#            If it doesn't obey the disallow, then an iptables drop
#            50.31.96.6 - 50.31.96.12 could be used 
#
# robots.txt 2013.04.17
#            add some dissallow stuff for specific file extensions. 
#            Somehow I missed it before.
#
# robots.txt 2013.04.04
#            disallow Sosospider. Any web crawler that is too stupid to know the 
#            difference between upper and lower case is not worthy.
#
# robots.txt 2013.02.28
#            disallow Exabot. I wonder if the resulting search engine
#            database is the reason I get so many forged referrer
#            hits.
#
# robots.txt 2012.10.08
#            disallow WBSearchBot.
#
# robots.txt 2012.09.02
#            disallow SearchmetricsBot. It is mentally challenged.
#
# robots.txt 2012.05.03
#            disallow TurnitinBot. It is mentally challenged.
#
# robots.txt 2012.03.29
#            disallow EC2LinkFinder. I do not know if it obeys robots.txt, but I wll try.
#            For sure it ignores most robots.txt directives. It copies everything, hogging
#            bandwidth.
#            It is time to think of a generic deny, to cover all these new bots.
#
# robots.txt 2012.03.13
#            disallow SWEBot. It is not polite and disobaeys robots.txt file.
#
# robots.txt 2012.01.29
#            disallow aiHitBot
#            Try a useragent "InfoPath" and "InfoPath.2" dissallow. (Another MS thing.)
#            I am trying to get rid of what appears to be a tracking site.
#            80.40.134.103, .104, .120, seem to track 92.9.131.199 and 92.9.150.29 and ...
#            80.40.134.XXX does read the robots.txt file.
#
# robots.txt 2012.01.04
#            SISTRIX crawler does not behave well. It ignores meta tags and some robots.txt directives.
#            Disallow it.
#
# robots.txt 2011.12.01
#            Try to get rid of Ezooms bot, although it is not clear what its exact user agent name is.
#            (Days later: "User-agent: Ezooms" seems to work, but it takes a few days.)
#            It ignores meta tags, and has become generally annoying.
#
# robots.txt 2011.09.26
#            Until now I have allowed Baiduspider. But it has gone mental and also ignores some meta tags.
#            Disallow it.
#            A new robot, AhrefsBot, does not behave or obey meta tags.
#            Disallow it.
#
# robots.txt 2011.06.19
#
# robots.txt 2011.04.12
#            Googlebot is so very very severely mentally challenged.
#            It ignores the NOFOLLOW meta tag.
#            Try to block useless content from being indexed via, yet another,
#            block command.
#
#            It is still looking for pages that haven't been there for over a year now.
#            (see 2010.04.29)
#
# robots.txt 2010.10.14
#            Eliminate crawl delay for Yahoo slurp (see 2007.03.13)
#
# robots.txt 2010.09.20
#            TwengaBot is severely mentally challenged. Try global disallow for it.
#            Googlebot is still annoying and accessing pages it shouldn't.
#
# robots.txt 2010.04.29
#            Googlebot is very severely mentally challenged.
#            Add disallow directives for directories that are not even there,
#            and haven't been for over 5 weeks now.
#            This is merely to try to get around having my request to delete the
#            non-existant directories from the search database being denied.
#
# robots.txt 2010.04.16
#            Add specific directives for exabot, including a crawl delay.
#            Reduce the slurp (Yahoo) crawl delay (which it doesn't seem to obey anyhow).
#            Disallow googlebot-image.
#
# robots.txt 2010.04.13
#            disallow taptubot, the mobile device crawler
#
# robots.txt 2010.04.01
#            Yet another attempt to get web crawlers not to index old versions of index.html files.
#            All old version are called index_0???.html.
#
# robots.txt 2010.03.19
#            Archives have been moved to a seperate directory. Add disallow directive.
#
# robots.txt 2010.02.10
#            The Yandex web crawler behaves in a very strange manor. Block it.
#            Ask Robots not to copy PDF files. 
#
# robots.txt 2009.12.07
#            Fix some syntax based on feedback from http://tool.motoricerca.info/robots-checker.phtml
#
# robots.txt 2009.12.04
#            There are still issues with googlebot. I don't want old versions of index.html
#            type pages indexed, but I do want the photoshop elements generated pages indexed.
#            Try some new directives.
#
# robots.txt 2009.09.09
#            Googlebot is not ignoring the rebuilt directory and is obtaining .MOV videos.
#            Add some more googlebot specific directives.
#
# robots.txt 2009.07.27
#            Googlebot directives are case sensitive. Add .JPG to .jpg ignore directives.
#            Googlebot is not ignoring old index pages as global directive indicates to. Try a googlebot
#            specific directive.
#
# robots.txt 2009.04.12
#            Some robots, for example googlebot, obey global directives as well as googlebot specific directives.
#            Other robots, for example slurp (Yahoo) and msnbot, only obey their specific directives.
#            The robots.txt standard is rather weak, incomplete, and generally annoying.
#            Add tons of the same specific directives to each robot area.
#            Try to change no index Christmas pages to include a wildcard.
#
# robots.txt 2008.12.03
#            Block the Cuil (twiceler) robot entirely.
#
# robots.txt 2008.11.23
#            The majestic robot comes in bursts at a high rate. Just block it.
#            The Cuil robot comes to much. Try to slow it down.
#
# robots.txt 2008.07.03
#            Now msnbot has started to grab images. Try to stop it.
#            Googlebot is grabbing PNG files. Try to stop it.
#
# robots.txt 2007.11.20
#            Try to disallow the panscient.com web crawler.
#
# robots.txt 2007.08.23
#            Still search engine pages do not agree with contents of robots.txt file.
#            Add specific disallow for ~doug/rebuilt.
#            - put global user agent lines after specific ones.
#            - next will be to repeat global lines in each specific agent area.
#
# robots.txt 2007.05.03
#            Now Googlebot has started to grab images. Try to stop it.
#            For whatever reason, google is mainly showing my re-built directory. It 
#            never seems to go back to the higher level page that now has meta tags
#            telling it not to index those pages. Put in a global disallow.
#            Add some other global disallows, that I got behind on.
#
# robots.txt 2007.03.13
#            stupid yahoo slurp comes all the time now. It supports a non-standard delay command.
#            so add the command. The web site doesn't state the units of measure.
#
# robots.txt 2007.02.11
#            yahoo, slurp seems to now obey the non-standard ignore this type of file wildcard usage
#            try it.
#
# robots.txt 2006.12.29
#            Delete instructions for directories that don't exist anymore
#
# robots.txt 2004:12:21
#            Try to eliminate yahoo.com grabbing images.
#            Can only think of global deny.
#            Can not find Yahoo name, try one shown below.
#
# robots.txt 2004:11:16
#            Try to eliminate alexa.com grabbing images.
#            InkTomi comes too often, can them entirely.
#
# robots.txt 2004:07:16
#            Try to eliminate picsearch.com grabbing images.
#
# robots.txt 2004:07:09
#            Try to eliminate altavista grabbing images.
#

User-agent: FriendlyCrawler
User-agent: ClaudeBot
User-agent: fidget-spinner-bot
User-agent: HubSpot
User-agent: Cincraw
User-agent: TinyTestBot
User-agent: FunnelBack
User-agent: OWLer
User-agent: SenutoBot
User-agent: okhttp
User-agent: AwarioBot
User-agent: MaCoCu
User-agent: woorankreview
User-agent: webprosbot
User-agent: Twitterbot
User-agent: Diffbot
User-agent: Jambot
User-agent: SeekBot
User-agent: SeekportBot
User-agent: Amazonbot
User-agent: vuhuvBot
User-agent: SERPtimizerBot
User-agent: MixrankBot
User-agent: YaK
User-agent: INETDEX-BOT
User-agent: Keybot Translation-Search-Machine
User-agent: FlfBaldrBot
User-agent: DomainStatsBot
User-agent: SurdotlyBot
User-agent: clark-crawler
User-agent: InfoTigerBot
User-agent: infotiger
User-agent: Neevabot
User-agent: Linguee
User-agent: SemanticScholarBot
User-agent: PetalBot
User-agent: niocBot
User-agent: Adsbot
User-agent: CheckMarkNetwork
User-agent: JobboerseBot
User-agent: oBot
User-agent: bbot
User-agent: brands-bot-logo
User-agent: Clarabot
User-agent: serpstatbot
User-agent: Seekport
User-agent: Datanyze
User-agent: Experibot
User-agent: IndeedBot
User-agent: ExtLinksBot
User-agent: crawler4j
User-agent: dataprovider
User-agent: DAUM
User-agent: MauiBot
User-agent: panscient.com
User-agent: vscooter
User-agent: psbot
User-agent: ia_archiver
User-agent: MJ12bot
User-agent: twiceler
User-agent: Yandex
User-agent: taptubot
User-agent: Googlebot-Image
User-agent: TwengaBot
User-agent: sitebot
User-agent: Baiduspider
User-agent: AhrefsBot
User-agent: Ezooms
User-agent: sistrix
User-agent: aiHitBot
User-agent: InfoPath
User-agent: InfoPath.2
User-agent: swebot
User-agent: EC2LinkFinder
User-agent: TurnitinBot
User-agent: The Knowledge AI
User-agent: Mappy
Disallow: /

# Some bots are stupid and
# need their own personal Dissallow

# test:
User-agent: Friendly_Crawler
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: DataForSeoBot
Disallow: /

User-agent: IonCrawl
Disallow: /

User-agent: SearchmetericsBot
User-agent: WBSearchBot
User-agent: Exabot
User-agent: Sosospider
User-agent: ip-web-crawler.com
User-agent: netEstate NE Crawler
User-agent: Aboundexbot
User-agent: Aboundex
User-agent: meanpathbot
User-agent: Mail.Ru
User-agent: spbot
User-agent: archive.org_bot
User-agent: LinkpadBot
User-agent: EasouSpider
User-agent: SeznamBot
User-agent: wotbox
User-agent: BLEXBot
User-agent: XoviBot
User-agent: SemrushBot
User-agent: A6-Indexer
User-agent: Riddler
User-agent: LoadTimeBot
User-agent: oBot
User-agent: MojeekBot
User-agent: memoryBot
User-agent: ltx71
Disallow: /

User-agent: AdvBot
User-agent: SMTBot
User-agent: YisouSpider
User-agent: LSSRocketCrawler
User-agent: gsa-crawler
User-agent: Nutch
User-agent: tbot-nutch
User-agent: thunderstone
User-agent: yacybot
User-agent: RankSonicBot
User-agent: betaBot
User-agent: parsijoo-bot
User-agent: NextGenSearchBot
User-agent: gocrawl
User-agent: plukkie
User-agent: Applebot
User-agent: Lipperhey
User-agent: SafeDNSBot
User-agent: RankActiveLinkBot
User-agent: Sogou blog
User-agent: Sogou inst spider
User-agent: Sogou News Spider
User-agent: Sogou Orion spider
User-agent: Sogou spider2
User-agent: Sogou web spider
User-agent: Uptimebot
User-agent: Seeker
User-agent: Cliqzbot
User-agent: DomainCrawler
User-agent: yoozBot
User-agent: 007ac9
User-agent: coccocbot-web
User-agent: Qwantify
User-agent: SiteExplorer
User-agent: Findxbot
User-agent: GarlikCrawler
User-agent: ZoominfoBot
User-agent: BUbiNG
User-agent: Barkrowler
User-agent: rogerbot
User-agent: dotbot
User-agent: JamesBOT
User-agent: Contacts-Crawler
User-agent: CCBot
User-agent: IDBot
User-agent: DnyzBot
User-agent: PiplBot
User-agent: AlphaBot
User-agent: AlphaSeoBot
User-agent: AlphaSeoBot-SA
User-agent: SEOkicks-Robot
Disallow: /

User-agent: msnbot
User-agent: bingbot
Disallow: /

User-agent: Slurp
Disallow: /*.jpg
Disallow: /*.JPG
Disallow: /*.png
Disallow: /*.PDF
Disallow: /*.pdf
Disallow: /*.mp3
Disallow: /*.mp4
Disallow: /*.MOV
Disallow: /*.mov
Disallow: /*.AVI
Disallow: /*.avi
Disallow: /*.svg
Disallow: /*.csv
Disallow: /*.data
Disallow: /disclaimer.html
Disallow: /security.html
Disallow: /poweredby.html
Disallow: /about_smythies.html
Disallow: /unused_link.html
Disallow: /old_pages.html
Disallow: /index_0*
Disallow: /*index_0*$
Disallow: /digital_camera/
Disallow: /lab/
Disallow: /xmas_*
Disallow: /~doug/archives/
Disallow: /~doug/linux/ubuntu-docs/help.ubuntu.com/
Disallow: /~doug/linux/s18/misc/
Disallow: /~doug/linux/s18/hwp/doug/
Disallow: /~doug/linux/s18/hwp/srinivas/
Disallow: /~doug/linux/s18/hwp/k510-rc6/
Disallow: /bot_trap.html

User-agent: Googlebot
Disallow: /*.jpg$
Disallow: /*.JPG$
Disallow: /*.png$
Disallow: /*.PDF$
Disallow: /*.pdf$
Disallow: /*.mp3$
Disallow: /*.mp4$
Disallow: /*.MOV$
Disallow: /*.mov$
Disallow: /*.AVI$
Disallow: /*.avi$
Disallow: /*.csv$
Disallow: /*.svg$
Disallow: /*.data$
Disallow: /index_0*$
Disallow: /*index_0*$
Disallow: /xmas_*
Disallow: /~doug/archives/
Disallow: /~doug/linux/ubuntu-docs/help.ubuntu.com/
Disallow: /~doug/linux/s18/misc/
Disallow: /~doug/linux/s18/hwp/doug/
Disallow: /~doug/linux/s18/hwp/srinivas/
Disallow: /~doug/linux/s18/hwp/k510-rc6/
Disallow: /~doug/2010.01.23/
Disallow: /~doug/2007.11.20/
Disallow: /~doug/2004.06.26/
Disallow: /digital_camera/
Disallow: /old_pages.html
Disallow: /unused_link.html
Disallow: /disclaimer.html
Disallow: /security.html
Disallow: /about_smythies.html
Disallow: /poweredby.html
Disallow: /DSCN*.htm
Disallow: /bot_trap.html

User-agent: *
Disallow: /*.jpg
Disallow: /*.JPG
Disallow: /*.png
Disallow: /*.PDF
Disallow: /*.pdf
Disallow: /*.mp3
Disallow: /*.mp4
Disallow: /*.MOV
Disallow: /*.mov
Disallow: /*.AVI
Disallow: /*.avi
Disallow: /*.svg
Disallow: /*.csv
Disallow: /*.data
Disallow: /disclaimer.html
Disallow: /security.html
Disallow: /poweredby.html
Disallow: /about_smythies.html
Disallow: /unused_link.html
Disallow: /old_pages.html
Disallow: /index_0*
Disallow: /*index_0*$
Disallow: /digital_camera/
Disallow: /lab/
Disallow: /xmas_*
Disallow: /~doug/archives/
Disallow: /~doug/linux/ubuntu-docs/help.ubuntu.com/
Disallow: /~doug/linux/s18/misc/
Disallow: /~doug/linux/s18/hwp/doug/
Disallow: /~doug/linux/s18/hwp/srinivas/
Disallow: /~doug/linux/s18/hwp/k510-rc6/
Disallow: /bot_trap.html