Forum Moderators: phranque
WARNING: USE AT YOUR OWN RISK!
Begin .htaccess:
RewriteEngine On
# Hotlink block for jpg/jpeg/gif/png/bmp
RewriteCond %{HTTP_REFERER}!^http://YOUR_DOMAIN.TDL/.*$ [NC]
RewriteCond %{HTTP_REFERER}!^http://www.YOUR_DOMAIN.TDL/.*$ [NC]
RewriteRule .*\.(jpg¦jpeg¦gif¦png¦bmp)$ http://bandwidth.theft.denied [R,NC]
# Forbid requests for exploits & annoyances
# Bad requests
RewriteCond %{REQUEST_METHOD}!^(GET¦HEAD¦POST) [NC,OR]
# CodeRed
RewriteCond %{REQUEST_URI} ^/default\.(ida¦idq) [NC,OR]
RewriteCond %{REQUEST_URI} ^/.*\.printer$ [NC,OR]
# Email
# RewriteCond %{REQUEST_URI} (mail.?form¦form¦form.?mail¦mail¦mailto)\.(cgi¦exe¦pl)$ [NC,OR]
# MSOffice
RewriteCond %{REQUEST_URI} ^/(MSOffice¦_vti) [NC,OR]
# Nimda
RewriteCond %{REQUEST_URI} ^/(admin¦cmd¦httpodbc¦nsiislog¦root¦shell)\.(dll¦exe) [NC,OR]
# Unknown/mixed
RewriteCond %{REQUEST_URI} ^/(cltreq.asp¦owssrv.dll) [NC,OR]
RewriteCond %{REQUEST_URI} ^/missing.html [NC,OR]
RewriteCond %{REQUEST_URI} ^/(cgi\-bin/¦cgi\-local/)\FormMail.(cgi¦php¦pl) [NC,OR]
RewriteCond %{REQUEST_URI} ^/(cgi\-bin/¦cgi\-local/)\FormMail [NC,OR]
RewriteCond %{REQUEST_URI} ^/FormMail.(cgi¦php¦pl) [NC,OR]
RewriteCond %{REQUEST_URI} ^/FormMail [NC,OR]
RewriteCond %{REQUEST_URI} ^/sumthin [NC,OR]
ReWriteCond %{REQUEST_URI} ^/default.htm [NC]
RewriteRule .* - [F]
# Various
# RewriteCond %{REQUEST_URI} ^/(bin/¦cgi/¦cgi\-local/¦sumthin) [NC,OR]
# RewriteCond %{THE_REQUEST} ^GET\ http [NC,OR]
# RewriteCond %{REQUEST_URI} /sensepost\.exe [NC]
# RewriteRule .* - [F]
# Forbid if blank (or "-") Referer *and* UA
RewriteCond %{HTTP_REFERER} ^-?$
RewriteCond %{HTTP_USER_AGENT} ^-?$
RewriteRule .* - [F]
# Banning BOTS bellow
# Address harvesters
RewriteCond %{HTTP_USER_AGENT} ^(autoemailspider¦ExtractorPro) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^E?Mail.?(Collect¦Harvest¦Magnet¦Reaper¦Siphon¦Sweeper¦Wolf) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (DTS.?Agent¦Email.?Extrac) [NC,OR]
RewriteCond %{HTTP_REFERER} guestbook [NC,OR]
RewriteCond %{HTTP_REFERER} iaea\.org [NC,OR]
# Download managers
RewriteCond %{HTTP_USER_AGENT} ^(Alligator¦DA.?[0-9]¦DC\-Sakura¦Download.?(Demon¦Express¦Master¦Wonder)¦FileHound) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Flash¦Leech)Get [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Fresh¦Lightning¦Mass¦Real¦Smart¦Speed¦Star).?Download(er)? [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Gamespy¦Go!Zilla¦iGetter¦JetCar¦Net(Ants¦Mechanic¦Pumper¦Spider)¦SiteSnagger¦Teleport.?Pro¦WebReaper) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(My)?GetRight [NC,OR]
# Image-grabbers
RewriteCond %{HTTP_USER_AGENT} ^(AcoiRobot¦FlickBot¦webcollage) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Express¦Mister¦Web).?(Web¦Pix¦Image).?(Pictures¦Collector)? [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Image.?(fetch¦Stripper¦Sucker) [NC,OR]
# "Gray-hats"
RewriteCond %{HTTP_USER_AGENT} ^(Atomz¦BlackWidow¦BlogBot¦EasyDL¦Marketwave¦Sqworm¦SurveyBot¦Webclipping\.com) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (girafa\.com¦gossamer\-threads\.com¦grub\-client¦Netcraft¦Nutch) [NC,OR]
# Site-grabbers
RewriteCond %{HTTP_USER_AGENT} ^(eCatch¦(Get¦Super)Bot¦Kapere¦HTTrack¦JOC¦Offline¦UtilMind¦Xaldon) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Web.?(Auto¦Cop¦dup¦Fetch¦Filter¦Gather¦Go¦Leach¦Mine¦Mirror¦Pix¦QL¦RACE¦Sauger) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Web.?(site.?(eXtractor¦Quester)¦Snake¦ster¦Strip¦Suck¦vac¦walk¦Whacker¦ZIP) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} WebCapture [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^DISCo\ Pump [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EirGrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Net\ Vampire [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NetZIP [NC,OR]
# Tools
RewriteCond %{HTTP_USER_AGENT} ^(curl¦Dart.?Communications¦Enfish¦htdig¦Java¦larbin) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (FrontPage¦Indy.?Library¦RPT\-HTTPClient) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(libwww¦lwp¦PHP¦Python¦www\.thatrobotsite\.com¦webbandit¦Wget¦Zeus) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Microsoft¦MFC).(Data¦Internet¦URL¦WebDAV¦Foundation).(Access¦Explorer¦Control¦MiniRedir¦Class) [NC,OR]
# Unknown (any edits to this would be appreciated!)
RewriteCond %{HTTP_USER_AGENT} Cherry.?Picker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} Educate.?Search¦Full.?Web.?Bot¦IUFW.?Web [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Miss.*g.*.?Locat.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(ChinaClaw¦Crescent¦Custo¦EyeNetIE¦Go-Ahead-Got-It¦GornKer¦GrabNet¦Grafula) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(HMView¦InterGET¦Irvine¦LeechFTP¦MIDown¦Navroad¦NearSite¦NetSpider¦NICErsPRO) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(NPBot¦Octopus¦PageGrabber¦pavuk¦pcBrowser¦PersonaPilot¦puf¦ReGet¦SearchExpress) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(SuperHTTP¦Surfbot¦tAkeOut¦TurnitinBot¦VoidEYE¦WebBandit¦WebReaper¦ZyBorg) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Crawl_Application¦Lachesis¦Nutscrape) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^[CDEFPRS](Browse¦Eval¦Surf) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Demo¦Full.?Web¦Lite¦Production¦Franklin¦Missauga¦Missigua).?(Bot¦Locat) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} (efp@gmx\.net¦hhjhj@yahoo\.com¦lerly\.net¦mapfeatures\.net¦metacarta\.com) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Industry¦Internet¦IUFW¦Lincoln¦Missouri¦Program).?(Program¦Explore¦Web¦State¦College¦Shareware) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Mac¦Ram¦Educate¦WEP).?(Finder¦Search) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(Moz+illa¦MSIE).?[0-9]?.?[0-9]?[0-9]?$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla/[0-9]\.[0-9][0-9]?.\(compatible[\)\ ] [NC,OR]
RewriteCond %{HTTP_USER_AGENT} NaverRobot [NC]
# The next lines block NPBot by IP
# RewriteCond %{REMOTE_ADDR} ^12\.148\.196\.(12[8-9]¦1[3-9][0-9]¦2[0-4][0-9]¦25[0-5])$ [OR]
# RewriteCond %{REMOTE_ADDR} ^12\.148\.209\.(19[2-9]¦2[0-4][0-9]¦25[0-5])$ [OR]
# RewriteCond %{REMOTE_ADDR} ^12\.175\.0\.(3[2-9]¦4[0-7])$ [OR]
# RewriteCond %{REMOTE_ADDR} ^(203\.186\.145\.225¦218\.6\.10\.113¦68\.59\.94\.40¦66\.75\.128\.202)$ [OR]
# RewriteCond %{REMOTE_ADDR} ^210\.192\.(9[6-9]¦1[0-1][0-9]¦12[0-7])\. [OR]
# RewriteCond %{REMOTE_ADDR} ^211\.(1[0-1][4-9])\. [OR]
# RewriteCond %{REMOTE_ADDR} ^218\.([0-2][0-9]¦[3][0-1])\. [OR]
# RewriteCond %{REMOTE_ADDR} ^218\.(5[6-9]¦[6-9][0-9])\. [OR]
# Start Cyveillance blocks
RewriteCond %{REMOTE_ADDR} ^63\.148\.99\.2(2[4-9]¦[3-4][0-9]¦5[0-5])$ [OR]
RewriteCond %{REMOTE_ADDR} ^65\.118\.41\.(19[2-9]¦2[0-1][0-9]¦22[0-3])$ [OR]
# End Cyveillance blocks
# RewriteRule!^(includes/403\.html¦cgi-bin/various_filenames\.pl¦various_filenames\.html) - [F]
# Alternate RewriteRule without allowing access to custom 403 or trap pages, or cgi scripts:
RewriteRule .* - [F]
RedirectMatch permanent .*/favicon\.ico$ http://no.favicon.here
http://joseluis.pellicer.org/ua/configure.html
Plus it contains a looooong list of bots/spiders for reviewing! :)
Cheers!
[edited by: jdMorgan at 9:07 pm (utc) on Jan. 20, 2004]
[edit reason] De-linked [/edit]
Welcome to WebmasterWorld [webmasterworld.com]!
Mozilla/3.01 (compatible;) is an extremely-common caching-proxy user agent. From your post, I'm not sure what you intended to do with it, but I'd advise you not to block it.
However, the usual way to specify that user-agent in a regular-expressions pattern would be:
Mozilla/3\.01\ \(compatible;\)
Jim