# https://www.robotstxt.org/robotstxt.html
#
# Policy:
# - ALLOW AI-search / answer / user-triggered fetch crawlers so Jarvis is
# citable in ChatGPT search, Claude, Perplexity, Gemini, Meta AI, DuckAssist.
# - BLOCK pure training crawlers (the ones whose only purpose is scraping the
# public web to retrain base models).
# Robots.txt is not a security boundary — it only deters well-behaved bots.
# Traditional search engines (Googlebot, Bingbot, DuckDuckBot) are unaffected
# and continue to crawl normally via the `User-agent: *` block at the bottom.
# ──────────────────────────────────────────────────────────────────────
# ALLOWED: AI-search / answer / user-triggered fetch agents
# ──────────────────────────────────────────────────────────────────────
# OpenAI — ChatGPT search index + user-triggered fetches
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /
# Anthropic — Claude search index + user-triggered fetches
User-agent: Claude-SearchBot
Allow: /
User-agent: Claude-User
Allow: /
# Perplexity — search index + user-triggered fetches
User-agent: PerplexityBot
Allow: /
User-agent: Perplexity-User
Allow: /
# Google — Gemini + AI Overviews grounding
User-agent: Google-Extended
Allow: /
# Meta AI — user-triggered fetches
User-agent: Meta-ExternalFetcher
Allow: /
# DuckDuckGo — DuckAssist answers
User-agent: DuckAssistBot
Allow: /
# Apple — Applebot (search) is allowed via `*` below; Applebot-Extended (training) blocked below.
# ──────────────────────────────────────────────────────────────────────
# BLOCKED: training-only crawlers
# ──────────────────────────────────────────────────────────────────────
# OpenAI training
User-agent: GPTBot
Disallow: /
# Anthropic training
User-agent: ClaudeBot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Claude-Web
Disallow: /
# Common Crawl (feeds many LLM training corpora)
User-agent: CCBot
Disallow: /
# ByteDance training
User-agent: Bytespider
Disallow: /
# Amazon training
User-agent: Amazonbot
Disallow: /
# Apple AI training (separate from Applebot search crawl)
User-agent: Applebot-Extended
Disallow: /
# Meta training
User-agent: meta-externalagent
Disallow: /
User-agent: FacebookBot
Disallow: /
# Diffbot
User-agent: Diffbot
Disallow: /
# Cohere training
User-agent: cohere-ai
Disallow: /
# ──────────────────────────────────────────────────────────────────────
# Default: allow all other crawlers (Googlebot, Bingbot, DuckDuckBot, Applebot, …)
# ──────────────────────────────────────────────────────────────────────
User-agent: *
Allow: /
# Block dynamic referral redirect targets (handled by Vercel rewrite, not real pages).
Disallow: /r/
Sitemap: https://www.getjarvis.eu/sitemap.xml