# robots.txt for thorneverse.com
# Author: Lindzay Browne (Yaznil.cross) — Thorneverse Publishing
# Last updated: May 8, 2026
#
# This file expresses Rights Holder's reservation of rights pursuant to
# Article 4(3) of Directive (EU) 2019/790 (CDSM Directive) and constitutes
# a machine-readable opt-out under Article 53 of Regulation (EU) 2024/1689
# (EU AI Act). Full Declaration of Rights available at:
# https://thorneverse.com/rights

# =====================================================================
# AI TRAINING CRAWLERS — DISALLOWED
# =====================================================================
# These crawlers fetch content for the purpose of training large language
# models or other AI systems. Rights Holder has reserved her rights and
# does not consent to such use of any content on this domain.

# OpenAI (ChatGPT, GPT models)
User-agent: GPTBot
Disallow: /

# Anthropic (Claude)
User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

# Google (Bard/Gemini training)
User-agent: Google-Extended
Disallow: /

# Apple (Apple Intelligence training)
User-agent: Applebot-Extended
Disallow: /

# Common Crawl (feeds many training datasets)
User-agent: CCBot
Disallow: /

# Meta (Llama training)
User-agent: Meta-ExternalAgent
Disallow: /

User-agent: FacebookBot
Disallow: /

# ByteDance (TikTok parent — Doubao/aggressive training crawler)
User-agent: Bytespider
Disallow: /

# Perplexity
User-agent: PerplexityBot
Disallow: /

User-agent: Perplexity-User
Disallow: /

# Cohere
User-agent: cohere-ai
Disallow: /

User-agent: cohere-training-data-crawler
Disallow: /

# Amazon
User-agent: Amazonbot
Disallow: /

# Other known AI training/scraping crawlers
User-agent: Diffbot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: omgili
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: AI2Bot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: magpie-crawler
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: DataForSeoBot
Disallow: /

# =====================================================================
# AI SEARCH AND RETRIEVAL CRAWLERS — ALLOWED
# =====================================================================
# These crawlers fetch content to provide attribution and traffic-driving
# search/retrieval functionality. Rights Holder permits these uses
# because they serve the same function as traditional search engines.

User-agent: OAI-SearchBot
Allow: /

User-agent: Claude-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: Google-CloudVertexBot
Allow: /

# =====================================================================
# TRADITIONAL SEARCH ENGINES — ALLOWED
# =====================================================================

User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Applebot
Allow: /

# =====================================================================
# DEFAULT RULE
# =====================================================================
# Unidentified crawlers are permitted general access. However, by virtue
# of the specific reservations above and the published Declaration of
# Rights, any use of content for AI training, model fine-tuning, or
# generative inference purposes is expressly NOT permitted regardless of
# whether the operator's user-agent appears above. Operators bear the
# burden of identifying themselves and respecting Rights Holder's
# reservation.

User-agent: *
Allow: /

# =====================================================================
# SITEMAP
# =====================================================================
Sitemap: https://thorneverse.com/sitemap.xml