From 19951b6f425d4817688aa385b916797d4a22008a Mon Sep 17 00:00:00 2001 From: juanatsap Date: Sun, 30 Nov 2025 14:28:51 +0000 Subject: [PATCH] feat: Auto-detect text browsers and serve plain text CV - Detect curl, wget, lynx, w3m, links, elinks, browsh, carbonyl - Check User-Agent and Accept: text/plain header - Redirect to /text endpoint automatically - Document in SEO guide and modern techniques --- doc/15-SEO.md | 38 ++++++++++++++++++++++++++ doc/2-MODERN-WEB-TECHNIQUES.md | 50 ++++++++++++++++++++++++++++++++++ internal/handlers/cv_pages.go | 6 ++++ internal/handlers/cv_text.go | 36 ++++++++++++++++++++++++ 4 files changed, 130 insertions(+) diff --git a/doc/15-SEO.md b/doc/15-SEO.md index 8ba47cb..18cca06 100644 --- a/doc/15-SEO.md +++ b/doc/15-SEO.md @@ -163,6 +163,42 @@ description: Interactive curriculum vitae... **Purpose:** Provides AI systems (ChatGPT, Claude, Perplexity, etc.) with structured, human-readable information about the site content. +#### Plain Text Auto-Detection (`/text` endpoint) + +The site automatically detects text-based browsers and CLI tools, serving a clean 80-character plain text version: + +**Auto-detected clients:** +| Client | Type | +|--------|------| +| curl | CLI tool | +| wget | CLI tool | +| HTTPie | CLI tool | +| Lynx | Text browser | +| w3m | Text browser | +| Links/ELinks | Text browser | +| Browsh | Terminal browser | +| Carbonyl | Terminal browser | + +**Usage:** +```bash +# Auto-detected (serves plain text): +curl https://juan.andres.morenorub.io/ + +# Explicit endpoint: +curl https://juan.andres.morenorub.io/text?lang=en + +# With Accept header: +curl -H "Accept: text/plain" https://juan.andres.morenorub.io/ +``` + +**Output features:** +- 80-character line wrapping +- ASCII art section headers +- Clean, structured text +- All CV content preserved + +--- + #### robots.txt AI Bot Rules (`static/robots.txt`) Explicit permissions for AI crawlers: @@ -223,6 +259,8 @@ The implementation supports Google's E-E-A-T (Experience, Expertise, Authority, | `static/sitemap.xml` | XML sitemap for search engines | | `data/cv-en.json` | SEO fields (pageTitle, metaTitle, etc.) | | `data/cv-es.json` | Spanish SEO fields | +| `/text` endpoint | Plain text CV for CLI/TUI browsers | +| `templates/cv-text.txt` | Plain text template | --- diff --git a/doc/2-MODERN-WEB-TECHNIQUES.md b/doc/2-MODERN-WEB-TECHNIQUES.md index da72dd6..74bf98f 100644 --- a/doc/2-MODERN-WEB-TECHNIQUES.md +++ b/doc/2-MODERN-WEB-TECHNIQUES.md @@ -3532,6 +3532,55 @@ Allow: / | **Authority** | Social links (LinkedIn, GitHub), company associations | | **Trust** | HTTPS, canonical URLs, clear contact info, privacy-respecting analytics | +#### 14. Plain Text Version for CLI/TUI Browsers + +**Implementation:** Auto-detect text-based browsers and serve clean plain text. + +```go +// Text-based browsers that get plain text automatically +var textBrowsers = []string{ + "curl", "wget", "httpie", + "lynx", "w3m", "links", "elinks", + "browsh", "carbonyl", +} + +func isTextBrowser(r *http.Request) bool { + ua := strings.ToLower(r.Header.Get("User-Agent")) + for _, browser := range textBrowsers { + if strings.Contains(ua, browser) { + return true + } + } + // Also check Accept: text/plain header + return strings.HasPrefix(r.Header.Get("Accept"), "text/plain") +} +``` + +**Usage:** +```bash +# These automatically get plain text: +curl https://example.com/ # Detects curl User-Agent +wget -qO- https://example.com/ # Detects wget User-Agent +lynx https://example.com/ # Text browser gets text version + +# Explicit plain text endpoint: +curl https://example.com/text?lang=en +``` + +**Features:** +- 80-character line wrapping for terminal readability +- Centered section titles with ASCII art separators +- Clean, structured output (no HTML/CSS/JS) +- Preserves all CV content: experience, skills, projects, etc. + +**Benefits:** +- ✅ **CLI-friendly:** `curl example.com` just works +- ✅ **AI-accessible:** Easy parsing for LLMs and crawlers +- ✅ **Accessibility:** Works in any terminal environment +- ✅ **No dependencies:** Pure text, no rendering required + +--- + ### SEO Files Overview | File | Purpose | @@ -3541,6 +3590,7 @@ Allow: / | `static/llms.txt` | AI crawler information file | | `static/sitemap.xml` | XML sitemap | | `data/cv-{lang}.json` | SEO fields per language | +| `/text` endpoint | Plain text CV for CLI/TUI browsers | ### Validation diff --git a/internal/handlers/cv_pages.go b/internal/handlers/cv_pages.go index f5a6104..d3e5ae5 100644 --- a/internal/handlers/cv_pages.go +++ b/internal/handlers/cv_pages.go @@ -25,6 +25,12 @@ func (h *CVHandler) Home(w http.ResponseWriter, r *http.Request) { return } + // Detect text-based browsers and serve plain text version + if isTextBrowser(r) { + h.PlainText(w, r) + return + } + // Get language from query parameter, default to English lang := r.URL.Query().Get("lang") if lang == "" { diff --git a/internal/handlers/cv_text.go b/internal/handlers/cv_text.go index 2fff843..9771f8d 100644 --- a/internal/handlers/cv_text.go +++ b/internal/handlers/cv_text.go @@ -10,6 +10,42 @@ import ( "text/template" ) +// Text-based browsers and CLI tools that should get plain text +var textBrowsers = []string{ + "curl", + "wget", + "httpie", + "lynx", + "w3m", + "links", + "elinks", + "browsh", + "carbonyl", + "netrik", + "retawq", + "surfraw", +} + +// isTextBrowser detects if the request comes from a text-based browser or CLI tool +func isTextBrowser(r *http.Request) bool { + ua := strings.ToLower(r.Header.Get("User-Agent")) + + // Check for known text browsers + for _, browser := range textBrowsers { + if strings.Contains(ua, browser) { + return true + } + } + + // Check Accept header - if client prefers text/plain + accept := r.Header.Get("Accept") + if strings.HasPrefix(accept, "text/plain") { + return true + } + + return false +} + // ============================================================================== // PLAIN TEXT HANDLER // Renders CV as clean plain text for terminal/AI consumption