2025-11-17 13:40:05 +00:00
|
|
|
# robots.txt for juan.andres.morenorub.io
|
2025-11-30 13:23:22 +00:00
|
|
|
# Last Updated: 2025-11-30
|
2025-10-31 11:06:38 +00:00
|
|
|
|
2025-11-30 13:23:22 +00:00
|
|
|
# =============================================================================
|
|
|
|
|
# DEFAULT RULES - Allow all search engines
|
|
|
|
|
# =============================================================================
|
2025-10-31 11:06:38 +00:00
|
|
|
User-agent: *
|
|
|
|
|
Allow: /
|
|
|
|
|
|
2025-11-30 13:23:22 +00:00
|
|
|
# Disallow admin/internal paths
|
2025-10-31 11:06:38 +00:00
|
|
|
Disallow: /admin/
|
|
|
|
|
Disallow: /api/internal/
|
|
|
|
|
Disallow: /.git/
|
|
|
|
|
Disallow: /.env
|
|
|
|
|
|
2026-04-09 12:24:41 +01:00
|
|
|
# Plain text version — accessible but not indexed (X-Robots-Tag: noindex)
|
|
|
|
|
# Canonical HTML version at / is preferred for search results
|
|
|
|
|
# Allow: /text (crawlable for LLMs and text browsers)
|
2026-04-09 12:24:06 +01:00
|
|
|
|
2025-11-30 13:23:22 +00:00
|
|
|
# =============================================================================
|
|
|
|
|
# SITEMAPS & AI CONTENT
|
|
|
|
|
# =============================================================================
|
2025-11-17 13:40:05 +00:00
|
|
|
Sitemap: https://juan.andres.morenorub.io/static/sitemap.xml
|
2025-10-31 11:06:38 +00:00
|
|
|
|
2025-11-30 13:23:22 +00:00
|
|
|
# LLMs.txt for AI crawlers (standard: https://llmstxt.org/)
|
|
|
|
|
# Location: https://juan.andres.morenorub.io/static/llms.txt
|
2025-10-31 11:06:38 +00:00
|
|
|
|
2025-11-30 13:23:22 +00:00
|
|
|
# =============================================================================
|
|
|
|
|
# TRADITIONAL SEARCH ENGINES
|
|
|
|
|
# =============================================================================
|
2025-10-31 11:06:38 +00:00
|
|
|
User-agent: Googlebot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: Bingbot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: Slurp
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: DuckDuckBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: Baiduspider
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: YandexBot
|
|
|
|
|
Allow: /
|
2025-11-30 13:23:22 +00:00
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# AI CRAWLERS & LLM BOTS - Explicitly allowed
|
|
|
|
|
# =============================================================================
|
|
|
|
|
|
|
|
|
|
# OpenAI - ChatGPT, GPT-4
|
|
|
|
|
User-agent: GPTBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: ChatGPT-User
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Anthropic - Claude
|
|
|
|
|
User-agent: ClaudeBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: Claude-Web
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: anthropic-ai
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Google AI - Bard, Gemini
|
|
|
|
|
User-agent: Google-Extended
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Meta AI
|
|
|
|
|
User-agent: FacebookBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: Meta-ExternalAgent
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
User-agent: meta-externalagent
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Perplexity AI
|
|
|
|
|
User-agent: PerplexityBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Cohere AI
|
|
|
|
|
User-agent: cohere-ai
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Common Crawl (used by many AI models)
|
|
|
|
|
User-agent: CCBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Amazon/Alexa
|
|
|
|
|
User-agent: Amazonbot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Apple - Applebot (for Siri, Spotlight)
|
|
|
|
|
User-agent: Applebot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Microsoft Copilot
|
|
|
|
|
User-agent: Copilot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# You.com AI
|
|
|
|
|
User-agent: YouBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# Brave Search
|
|
|
|
|
User-agent: BraveBot
|
|
|
|
|
Allow: /
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# CRAWL RATE LIMITS (Optional)
|
|
|
|
|
# =============================================================================
|
|
|
|
|
# Uncomment if needed to prevent server overload
|
|
|
|
|
# Crawl-delay: 1
|