{
  "_meta": {
    "current_as_of": "2026-06-15",
    "sourcing_invariant": "Citations of record must be PRIMARY vendor/standards docs. Aggregators, blogs, and social posts are never valid. Every published entry needs vendor_stated.doc_url + current_as_of; an entry missing either is a stub, marked TODO(verify-primary).",
    "entry_schema": {
      "token": "string",
      "operator": "string",
      "purpose": "training | search | user_fetch | opt_out_token",
      "robotstxt_control_token": "string",
      "honors_robotstxt": "boolean | null (vendor-stated)",
      "disputed": "boolean",
      "vendor_stated": { "behavior": "string", "doc_url": "primary url | TODO(verify-primary)", "current_as_of": "YYYY-MM-DD" },
      "observed_disputed": "{ claim, source_url, current_as_of } | null"
    }
  },
  "bots": [
    {
      "token": "GPTBot",
      "operator": "OpenAI",
      "purpose": "training",
      "robotstxt_control_token": "GPTBot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "honors robots.txt; if a site allows both GPTBot and OAI-SearchBot, OpenAI may use one crawl for both purposes",
        "doc_url": "https://developers.openai.com/api/docs/bots",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "OAI-SearchBot",
      "operator": "OpenAI",
      "purpose": "search",
      "robotstxt_control_token": "OAI-SearchBot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "honors robots.txt; independently controlled from GPTBot",
        "doc_url": "https://developers.openai.com/api/docs/bots",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "ChatGPT-User",
      "operator": "OpenAI",
      "purpose": "user_fetch",
      "robotstxt_control_token": "ChatGPT-User",
      "honors_robotstxt": false,
      "disputed": false,
      "vendor_stated": {
        "behavior": "user-initiated fetch; robots.txt may not apply (OpenAI Dec 2025 crawler-docs revision)",
        "doc_url": "https://developers.openai.com/api/docs/bots",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "ClaudeBot",
      "operator": "Anthropic",
      "purpose": "training",
      "robotstxt_control_token": "ClaudeBot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "stated to honor robots.txt; independently controllable",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Claude-SearchBot",
      "operator": "Anthropic",
      "purpose": "search",
      "robotstxt_control_token": "Claude-SearchBot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "stated to honor robots.txt; independently controllable",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Claude-User",
      "operator": "Anthropic",
      "purpose": "user_fetch",
      "robotstxt_control_token": "Claude-User",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "stated to honor robots.txt; independently controllable",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "PerplexityBot",
      "operator": "Perplexity",
      "purpose": "search",
      "robotstxt_control_token": "PerplexityBot",
      "honors_robotstxt": true,
      "disputed": true,
      "vendor_stated": {
        "behavior": "respects robots.txt",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": {
        "claim": "undeclared Perplexity crawlers observed circumventing robots.txt",
        "source_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      }
    },
    {
      "token": "Perplexity-User",
      "operator": "Perplexity",
      "purpose": "user_fetch",
      "robotstxt_control_token": "Perplexity-User",
      "honors_robotstxt": null,
      "disputed": false,
      "vendor_stated": {
        "behavior": "TODO(verify-primary)",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Google-Extended",
      "operator": "Google",
      "purpose": "opt_out_token",
      "robotstxt_control_token": "Google-Extended",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "robots.txt-only opt-out token; not a user-agent in logs",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Applebot",
      "operator": "Apple",
      "purpose": "search",
      "robotstxt_control_token": "Applebot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "TODO(verify-primary)",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Applebot-Extended",
      "operator": "Apple",
      "purpose": "opt_out_token",
      "robotstxt_control_token": "Applebot-Extended",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "robots.txt-only training opt-out token; does not appear in logs",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "CCBot",
      "operator": "Common Crawl",
      "purpose": "training",
      "robotstxt_control_token": "CCBot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "TODO(verify-primary)",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Amazonbot",
      "operator": "Amazon",
      "purpose": "search",
      "robotstxt_control_token": "Amazonbot",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "TODO(verify-primary)",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Meta-ExternalAgent",
      "operator": "Meta",
      "purpose": "training",
      "robotstxt_control_token": "Meta-ExternalAgent",
      "honors_robotstxt": true,
      "disputed": false,
      "vendor_stated": {
        "behavior": "vendor-stated to honor robots.txt (vendor-stated-only)",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": null
    },
    {
      "token": "Bytespider",
      "operator": "ByteDance",
      "purpose": "training",
      "robotstxt_control_token": "Bytespider",
      "honors_robotstxt": null,
      "disputed": true,
      "vendor_stated": {
        "behavior": "no official documentation",
        "doc_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      },
      "observed_disputed": {
        "claim": "reported to ignore robots.txt",
        "source_url": "TODO(verify-primary)",
        "current_as_of": "2026-06-15"
      }
    }
  ]
}
