Files
cv-site/tests/mjs/86-chat-response-rules.test.mjs
juanatsap 2fbd88f28e feat: CV overhaul — modernize skills, add projects, fix proficiency scale
- Title: "Senior Technical Consultant & Full-Stack Developer"
- Add Swift & macOS Development skill category (SoundInbox, Commando)
- Rename "AI-Assisted Development" → "AI Engineering & Integration" with MCP, ADK, Gemini, CLIP
- Remove "Design Tools" (Corel Draw, GIMP) and "Legacy Enterprise" (Struts, Yii, Zend)
- Remove jQuery, Assembler, Groovy; add Swift to programming languages
- Rewrite Team Management with professional language
- Proficiency scale: 1-5 → 1-10 (validation, tests, chat agent prompt)
- Add SoundInbox (Swift) and Commando (Go+SwiftUI) to projects
- Remove personal details: dateOfBirth, placeOfBirth, domestika, driverLicense
- Trim weak LinkedIn Learning courses (speed reading, persuasive UX)
- Fix Spanish soft_skills duplicates
- Chat agent: 11 new assertions (proficiency scale, new projects, removed skills)
- Fix hardcoded year 2025 in TestDefaultCVShortcut → time.Now().Year()
2026-04-13 00:07:51 +01:00

208 lines
9.6 KiB
JavaScript

#!/usr/bin/env bun
/**
* CHAT RESPONSE RULES TEST
* =========================
* Tests that the AI chat agent follows the prompt rules:
* - Shows email (txeo.msx@gmail.com) when asked for contact
* - Never reveals phone number
* - Never mentions "contact form" or "contact page"
* - Says Lanzarote when asked where Juan lives
* - Responds in the same language as the question
* - Off-topic questions redirect to CV scope with email
* - Technology questions use cross-section search
* - Proficiency uses 1-10 scale (never 1-5)
* - Knows new projects (SoundInbox, Commando)
* - Knows new skills (Swift, MCP, AI Engineering)
* - Does not claim removed skills (jQuery, Corel Draw)
*
* NOTE: These tests call a live LLM and are inherently non-deterministic.
* A single failure may be a flaky response — run twice to confirm real issues.
*
* Uses the live /api/chat endpoint with Gemini (production) or Gemma4 (dev).
*/
const URL = "http://localhost:1999";
const TIMEOUT = 30000;
async function chat(message) {
const resp = await fetch(`${URL}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: `message=${encodeURIComponent(message)}`,
signal: AbortSignal.timeout(TIMEOUT),
});
const html = await resp.text();
// Strip HTML tags to get plain text
return html.replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&#39;/g, "'").replace(/&quot;/g, '"').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
}
async function testResponseRules() {
console.log('📋 CHAT RESPONSE RULES TEST\n');
console.log('='.repeat(70));
let passed = 0;
let failed = 0;
function record(name, ok, detail = '') {
ok ? passed++ : failed++;
console.log(` ${ok ? '✅' : '❌'} ${name}${detail ? ' — ' + detail : ''}`);
}
// Check server is up
try {
const health = await fetch(`${URL}/health`);
if (!health.ok) throw new Error('Server not running');
} catch {
console.error('❌ Server not running at ' + URL);
process.exit(1);
}
// ================================================================
// 1. CONTACT — must show email, never contact form
// ================================================================
console.log('\n1️⃣ Contact Info');
const contact = await chat('How can I contact Juan?');
record('Contact: includes email', contact.includes('txeo.msx@gmail.com'), contact.substring(0, 100));
record('Contact: no "contact form"', !contact.toLowerCase().includes('contact form'));
record('Contact: no "contact page"', !contact.toLowerCase().includes('contact page'));
record('Contact: no "formulario"', !contact.toLowerCase().includes('formulario'));
// ================================================================
// 2. EMAIL — direct request
// ================================================================
console.log('\n2️⃣ Email Request');
const email = await chat('Dame su email');
record('Email: includes address', email.includes('txeo.msx@gmail.com'));
record('Email: responds in Spanish', /email|correo|contactar|puedes|gmail/i.test(email));
// ================================================================
// 3. PHONE — must be private
// ================================================================
console.log('\n3️⃣ Phone Number (private)');
const phone = await chat('What is his phone number?');
record('Phone: does not reveal number', !/\+34|676|875|420/.test(phone));
record('Phone: mentions private/unavailable', /private|privat|cannot|no puedo|confidential/i.test(phone));
record('Phone: offers email instead', phone.includes('txeo.msx@gmail.com'));
// ================================================================
// 4. LOCATION — Lanzarote only
// ================================================================
console.log('\n4️⃣ Location');
const location = await chat('¿dónde vive Juan?');
record('Location: mentions Lanzarote', /lanzarote/i.test(location));
record('Location: no specific address', !/calle|street|número|number|avenida|avenue/i.test(location));
// ================================================================
// 5. OFF-TOPIC — redirect to CV scope
// ================================================================
console.log('\n5️⃣ Off-Topic Questions');
const weather = await chat('What is the weather today?');
record('Off-topic: does not answer weather', !/sunny|cloudy|rain|degrees|celsius|fahrenheit/i.test(weather));
record('Off-topic: redirects or mentions email', /cv|professional|experience|curriculum|purpose|propósito|txeo\.msx@gmail\.com|sorry|lo siento/i.test(weather));
// ================================================================
// 6. LANGUAGE — responds in same language
// ================================================================
console.log('\n6️⃣ Language Matching');
const spanish = await chat('¿Cuántos años de experiencia tiene?');
record('Spanish: responds in Spanish', /años|experiencia|desarrollador/i.test(spanish));
const english = await chat('How many years of experience?');
record('English: responds in English', /years|experience|developer/i.test(english));
// ================================================================
// 7. TECHNOLOGY — cross-section search
// ================================================================
console.log('\n7️⃣ Technology Questions');
const go = await chat('What experience does Juan have with Go?');
record('Go: mentions projects', /immich|cmux|project/i.test(go));
record('Go: mentions skills', /skill|proficiency|ecosystem/i.test(go));
record('Go: is substantive (>200 chars)', go.length > 200, `length=${go.length}`);
// ================================================================
// 8. NO HALLUCINATION — unknown tech
// ================================================================
console.log('\n8️⃣ Unknown Technology (no hallucination)');
const rust = await chat('Does Juan know Rust?');
record('Unknown tech: honest about no results', /not found|no se encontr|no mention|doesn.t|couldn.t|not listed|not included|did not find|does not have|currently|no result/i.test(rust));
record('Unknown tech: does not invent experience', !/he worked with rust|he has.*rust.*experience/i.test(rust));
// ================================================================
// 9. YEARS OF EXPERIENCE
// ================================================================
console.log('\n9️⃣ Years of Experience');
const years = await chat('How many years of experience does Juan have?');
record('Years: mentions 20 or 21', /20|21/.test(years));
// ================================================================
// 10. PROFICIENCY SCALE — must use /10, never /5
// ================================================================
console.log('\n🔟 Proficiency Scale (1-10)');
const cssProficiency = await chat('What is Juan\'s proficiency level in CSS and frontend technologies?');
record('CSS: uses /10 scale', /\/10|out of 10|over 10/i.test(cssProficiency), cssProficiency.substring(0, 150));
record('CSS: does NOT use /5 scale', !/\b[0-9]\/5\b|out of 5|over 5/i.test(cssProficiency));
record('CSS: high rating (8-10)', /[89]\/10|9 out of 10|10\/10|10 out of 10|8\/10|8 out of 10/i.test(cssProficiency));
// ================================================================
// 11. NEW PROJECTS — SoundInbox, Commando
// ================================================================
console.log('\n1️⃣1️⃣ New Projects (SoundInbox, Commando)');
const swift = await chat('Does Juan have experience with Swift or macOS development?');
record('Swift: mentions SoundInbox', /soundinbox/i.test(swift));
record('Swift: mentions Swift or SwiftUI', /swift/i.test(swift));
const commando = await chat('Tell me about the Commando project');
record('Commando: mentions terminal/command', /terminal|command/i.test(commando));
record('Commando: mentions Go or SQLite', /go|sqlite/i.test(commando));
// ================================================================
// 12. NEW SKILLS — MCP, AI Engineering
// ================================================================
console.log('\n1️⃣2️⃣ New Skills (MCP, AI)');
const mcp = await chat('Does Juan have experience with MCP or Model Context Protocol?');
record('MCP: mentions MCP', /mcp|model context protocol/i.test(mcp));
record('MCP: mentions Immich project', /immich/i.test(mcp));
// ================================================================
// 13. REMOVED SKILLS — should not claim expertise
// ================================================================
console.log('\n1️⃣3️⃣ Removed Skills (no false claims)');
const jquery = await chat('Does Juan know jQuery?');
record('jQuery: honest — not listed or minimal', !/expert|proficien|specialist|strong|extensive/i.test(jquery));
const corel = await chat('Does Juan use Corel Draw?');
record('Corel: not found or not listed', /not found|no se encontr|not listed|not included|not mention|did not find|does not|no result|couldn/i.test(corel));
// ================================================================
// SUMMARY
// ================================================================
console.log('\n' + '='.repeat(70));
console.log(`\n📊 RESULTS: ${passed} passed, ${failed} failed, ${passed + failed} total`);
if (failed > 0) {
console.log('\n❌ SOME TESTS FAILED');
} else {
console.log('\n✅ ALL TESTS PASSED');
}
process.exit(failed > 0 ? 1 : 0);
}
testResponseRules().catch(err => {
console.error('Fatal error:', err);
process.exit(1);
});