#!/usr/bin/env bun /** * CHAT RESPONSE RULES TEST * ========================= * Tests that the AI chat agent follows the prompt rules: * - Shows email (txeo.msx@gmail.com) when asked for contact * - Never reveals phone number * - Never mentions "contact form" or "contact page" * - Says Lanzarote when asked where Juan lives * - Responds in the same language as the question * - Off-topic questions redirect to CV scope with email * - Technology questions use cross-section search * - Proficiency uses 1-10 scale (never 1-5) * - Knows new projects (SoundInbox, Commando) * - Knows new skills (Swift, MCP, AI Engineering) * - Does not claim removed skills (jQuery, Corel Draw) * * NOTE: These tests call a live LLM and are inherently non-deterministic. * A single failure may be a flaky response — run twice to confirm real issues. * * Uses the live /api/chat endpoint with Gemini (production) or Gemma4 (dev). */ const URL = "http://localhost:1999"; const TIMEOUT = 30000; async function chat(message) { const resp = await fetch(`${URL}/api/chat`, { method: 'POST', headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, body: `message=${encodeURIComponent(message)}`, signal: AbortSignal.timeout(TIMEOUT), }); const html = await resp.text(); // Strip HTML tags to get plain text return html.replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/'/g, "'").replace(/"/g, '"').replace(/</g, '<').replace(/>/g, '>'); } async function testResponseRules() { console.log('📋 CHAT RESPONSE RULES TEST\n'); console.log('='.repeat(70)); let passed = 0; let failed = 0; function record(name, ok, detail = '') { ok ? passed++ : failed++; console.log(` ${ok ? '✅' : '❌'} ${name}${detail ? ' — ' + detail : ''}`); } // Check server is up try { const health = await fetch(`${URL}/health`); if (!health.ok) throw new Error('Server not running'); } catch { console.error('❌ Server not running at ' + URL); process.exit(1); } // ================================================================ // 1. CONTACT — must show email, never contact form // ================================================================ console.log('\n1️⃣ Contact Info'); const contact = await chat('How can I contact Juan?'); record('Contact: includes email', contact.includes('txeo.msx@gmail.com'), contact.substring(0, 100)); record('Contact: no "contact form"', !contact.toLowerCase().includes('contact form')); record('Contact: no "contact page"', !contact.toLowerCase().includes('contact page')); record('Contact: no "formulario"', !contact.toLowerCase().includes('formulario')); // ================================================================ // 2. EMAIL — direct request // ================================================================ console.log('\n2️⃣ Email Request'); const email = await chat('Dame su email'); record('Email: includes address', email.includes('txeo.msx@gmail.com')); record('Email: responds in Spanish', /email|correo|contactar|puedes|gmail/i.test(email)); // ================================================================ // 3. PHONE — must be private // ================================================================ console.log('\n3️⃣ Phone Number (private)'); const phone = await chat('What is his phone number?'); record('Phone: does not reveal number', !/\+34|676|875|420/.test(phone)); record('Phone: mentions private/unavailable', /private|privat|cannot|no puedo|confidential/i.test(phone)); record('Phone: offers email instead', phone.includes('txeo.msx@gmail.com')); // ================================================================ // 4. LOCATION — Lanzarote only // ================================================================ console.log('\n4️⃣ Location'); const location = await chat('¿dónde vive Juan?'); record('Location: mentions Lanzarote', /lanzarote/i.test(location)); record('Location: no specific address', !/calle|street|número|number|avenida|avenue/i.test(location)); // ================================================================ // 5. OFF-TOPIC — redirect to CV scope // ================================================================ console.log('\n5️⃣ Off-Topic Questions'); const weather = await chat('What is the weather today?'); record('Off-topic: does not answer weather', !/sunny|cloudy|rain|degrees|celsius|fahrenheit/i.test(weather)); record('Off-topic: redirects or mentions email', /cv|professional|experience|curriculum|purpose|propósito|txeo\.msx@gmail\.com|sorry|lo siento/i.test(weather)); // ================================================================ // 6. LANGUAGE — responds in same language // ================================================================ console.log('\n6️⃣ Language Matching'); const spanish = await chat('¿Cuántos años de experiencia tiene?'); record('Spanish: responds in Spanish', /años|experiencia|desarrollador/i.test(spanish)); const english = await chat('How many years of experience?'); record('English: responds in English', /years|experience|developer/i.test(english)); // ================================================================ // 7. TECHNOLOGY — cross-section search // ================================================================ console.log('\n7️⃣ Technology Questions'); const go = await chat('What experience does Juan have with Go?'); record('Go: mentions projects', /immich|cmux|project/i.test(go)); record('Go: mentions skills', /skill|proficiency|ecosystem/i.test(go)); record('Go: is substantive (>200 chars)', go.length > 200, `length=${go.length}`); // ================================================================ // 8. NO HALLUCINATION — unknown tech // ================================================================ console.log('\n8️⃣ Unknown Technology (no hallucination)'); const rust = await chat('Does Juan know Rust?'); record('Unknown tech: honest about no results', /not found|no se encontr|no mention|doesn.t|couldn.t|not listed|not included|did not find|does not have|currently|no result/i.test(rust)); record('Unknown tech: does not invent experience', !/he worked with rust|he has.*rust.*experience/i.test(rust)); // ================================================================ // 9. YEARS OF EXPERIENCE // ================================================================ console.log('\n9️⃣ Years of Experience'); const years = await chat('How many years of experience does Juan have?'); record('Years: mentions 20 or 21', /20|21/.test(years)); // ================================================================ // 10. PROFICIENCY SCALE — must use /10, never /5 // ================================================================ console.log('\n🔟 Proficiency Scale (1-10)'); const cssProficiency = await chat('What is Juan\'s proficiency level in CSS and frontend technologies?'); record('CSS: uses /10 scale', /\/10|out of 10|over 10/i.test(cssProficiency), cssProficiency.substring(0, 150)); record('CSS: does NOT use /5 scale', !/\b[0-9]\/5\b|out of 5|over 5/i.test(cssProficiency)); record('CSS: high rating (8-10)', /[89]\/10|9 out of 10|10\/10|10 out of 10|8\/10|8 out of 10/i.test(cssProficiency)); // ================================================================ // 11. NEW PROJECTS — SoundInbox, Commando // ================================================================ console.log('\n1️⃣1️⃣ New Projects (SoundInbox, Commando)'); const swift = await chat('Does Juan have experience with Swift or macOS development?'); record('Swift: mentions SoundInbox', /soundinbox/i.test(swift)); record('Swift: mentions Swift or SwiftUI', /swift/i.test(swift)); const commando = await chat('Tell me about the Commando project'); record('Commando: mentions terminal/command', /terminal|command/i.test(commando)); record('Commando: mentions Go or SQLite', /go|sqlite/i.test(commando)); // ================================================================ // 12. NEW SKILLS — MCP, AI Engineering // ================================================================ console.log('\n1️⃣2️⃣ New Skills (MCP, AI)'); const mcp = await chat('Does Juan have experience with MCP or Model Context Protocol?'); record('MCP: mentions MCP', /mcp|model context protocol/i.test(mcp)); record('MCP: mentions Immich project', /immich/i.test(mcp)); // ================================================================ // 13. REMOVED SKILLS — should not claim expertise // ================================================================ console.log('\n1️⃣3️⃣ Removed Skills (no false claims)'); const jquery = await chat('Does Juan know jQuery?'); record('jQuery: honest — not listed or minimal', !/expert|proficien|specialist|strong|extensive/i.test(jquery)); const corel = await chat('Does Juan use Corel Draw?'); record('Corel: not found or not listed', /not found|no se encontr|not listed|not included|not mention|did not find|does not|no result|couldn/i.test(corel)); // ================================================================ // SUMMARY // ================================================================ console.log('\n' + '='.repeat(70)); console.log(`\n📊 RESULTS: ${passed} passed, ${failed} failed, ${passed + failed} total`); if (failed > 0) { console.log('\n❌ SOME TESTS FAILED'); } else { console.log('\n✅ ALL TESTS PASSED'); } process.exit(failed > 0 ? 1 : 0); } testResponseRules().catch(err => { console.error('Fatal error:', err); process.exit(1); });