# Robots.txt for halostatue.ca # Standard search engines - welcome User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Applebot Allow: / # AI Training Crawlers - explicitly blocked # OpenAI User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: OAI-SearchBot Disallow: / # Anthropic User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: ClaudeBot Disallow: / # Google AI (Gemini training, not search) User-agent: Google-Extended Disallow: / # Meta User-agent: FacebookBot Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: meta-externalagent Disallow: / # Apple Intelligence User-agent: Applebot-Extended Disallow: / # Common Crawl (used for AI training datasets) User-agent: CCBot Disallow: / # Perplexity User-agent: PerplexityBot Disallow: / # Cohere User-agent: cohere-ai Disallow: / # Amazon/AWS User-agent: Amazonbot Disallow: / # ByteDance User-agent: Bytespider Disallow: / # Musk-owned garbage - explicit blocks # X/Twitter User-agent: Twitterbot Disallow: / # xAI/Grok User-agent: Grok Disallow: / User-agent: xAI Disallow: / User-agent: xai-ai Disallow: / # Generic AI scrapers User-agent: AI2Bot Disallow: / User-agent: Ai2Bot-Dolma Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: YouBot Disallow: / User-agent: PetalBot Disallow: / User-agent: Scrapy Disallow: / User-agent: img2dataset Disallow: / User-agent: Webzio-Extended Disallow: / User-agent: sentibot Disallow: / User-agent: iaskspider Disallow: / # Default - allow legitimate crawlers User-agent: * Allow: / # Sitemap Sitemap: https://halostatue.ca/sitemap.xml