# Courtel Communications Ltd # John Randall # Created: 10/07/2002 # Last Updated: 15/08/2006 # robots.txt file for the www.courtnews1.co.uk FTP site # Location: www.courtnews2.co.uk/robots.txt # # Disallow all external robots from any part of this site # except from the root directory & michelle # but allow the selected local Search Engine to # index the courtlist sub directory structure. # # Search engine User-agent values: # All: * # Default ht://Dig: htdig # Netcom ht://Dig: netcom-search # AGL ht://Dig: AGL-htdig # PicoSearch: PicoSearch/1.0 # Disallow all search engines from all of the web side apart # from the root & michelle directories. # Note 1: By default, everything is included. # Note 2: No blank lines allowed within a User-agent record structure. # Note 3: The url values are partial paths. User-agent: * Disallow: /. Disallow: /_ Disallow: /bin Disallow: /cgi-bin Disallow: /common Disallow: /courtlists Disallow: /eng Disallow: /htdig Disallow: /icons Disallow: /images Disallow: /rss Disallow: /scripts Disallow: /setups Disallow: /stats # Disallow the local search engine from all # except the root and courtlists directories. # Note that spiders such as htDig only index # pages on the site # that can be found by # following hypertext links. User-agent: PicoSearch/1.0 Disallow: /. Disallow: /_ Disallow: /courtliststest Disallow: /bin Disallow: /cgi-bin Disallow: /common Disallow: /eng Disallow: /htdig Disallow: /icons Disallow: /images Disallow: /michelle Disallow: /rss Disallow: /scripts Disallow: /setups Disallow: /stats