# /robots.txt file for http://www.joke.co.uk/ User-agent: * Disallow: /starwars/halloween1/ # out of date Disallow: /halloween1/ # out of date Disallow: /facemask/ # out of date Disallow: /christmas/ # out of date Disallow: /acatalog/ # out of date Disallow: /epages/ # out of date Disallow: /powerrangers/ # out of date Disallow: /ftp/ # out of date Disallow: /products/ # out of date Disallow: /management/ # god knows what this is! Disallow: /military/ # god knows what this is! Disallow: /black_dog/ # god knows what this is! Disallow: farea.htm # god knows what this is! Disallow: fetch.shtml # old joke site Disallow: login.asp # old joke site Disallow: default.asp # old joke site Disallow: showPart_large.asp # old joke site Disallow: showPart_largeDD.asp # old joke site Disallow: prodpage.asp # old joke site Disallow: prodpagesearch.asp # old joke site Disallow: /links/ # no index no follow Disallow: /assets/ # no index no follow Disallow: /storefront/keywordsearch/ # disallow to stop bots indexing search Disallow: /storefront/viewprofile/ # disallow to stop bots indexing stuff Disallow: /storefront/checkout/ # disallow to stop bots indexing checkout Disallow: /storefront/main/ # disallow to stop bots indexing checkout Disallow: /*.pdf$ # not to crawl .pdf's Disallow: /*jsessionid= # dont index pages with session ids User-agent: http://www.almaden.ibm.com/cs/crawler Disallow: / User-agent: e-SocietyRobot Disallow: /