pages/robots.txt

108 lines
1.3 KiB
Text
Raw Normal View History

# Google AI
User-agent: Google-Extended
Disallow: /
# Google
#User-agent: Googlebot
#Disallow: /
User-agent: GoogleOther
Disallow: /
User-agent: Googlebot-Image
Disallow: /
User-agent: AdsBot-Google
Disallow: /
# Bing
#User-agent: bingbot
#Disallow: /
# Common Crawl
User-agent: CCBot
Disallow: /
# OpenAI
User-agent: ChatGPT-User
Disallow: /
User-agent: ChatGPT
Disallow: /
User-agent: GPTBot
Disallow: /
# Omgili
User-agent: Omgilibot
Disallow: /
User-agent: Omgili
Disallow: /
# Facebook AI
User-agent: FacebookBot
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: ClaudeBot
Disallow: /
# Other AI training known or suspected
User-agent: anthropic-ai
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: The Knowledge AI
Disallow: /
User-agent: aiHitBot
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: YouBot
Disallow: /
User-agent: ImagesiftBot
Disallow: /
# Siri and Alexa yuck
User-agent: Amazonbot
Disallow: /
User-agent: Applebot
Disallow: /
# Miscellaneous
User-agent: TurnitinBot
Disallow: /
User-agent: NPBot
Disallow: /
User-agent: SlySearch
Disallow: /
User-agent: 360Spider
Disallow: /
User-agent: netEstate NE Crawler
Disallow: /
User-agent: Pinterestbot
Disallow: /
Sitemap: https://www.thepaperpilot.org/sitemap.xml