161 lines
6.9 KiB
JavaScript
161 lines
6.9 KiB
JavaScript
|
|
#!/usr/bin/env bun
|
||
|
|
/**
|
||
|
|
* CHAT RESPONSE RULES TEST
|
||
|
|
* =========================
|
||
|
|
* Tests that the AI chat agent follows the prompt rules:
|
||
|
|
* - Shows email (txeo.msx@gmail.com) when asked for contact
|
||
|
|
* - Never reveals phone number
|
||
|
|
* - Never mentions "contact form" or "contact page"
|
||
|
|
* - Says Lanzarote when asked where Juan lives
|
||
|
|
* - Responds in the same language as the question
|
||
|
|
* - Off-topic questions redirect to CV scope with email
|
||
|
|
* - Technology questions use cross-section search
|
||
|
|
*
|
||
|
|
* NOTE: These tests call a live LLM and are inherently non-deterministic.
|
||
|
|
* A single failure may be a flaky response — run twice to confirm real issues.
|
||
|
|
*
|
||
|
|
* Uses the live /api/chat endpoint with Gemini (production) or Gemma4 (dev).
|
||
|
|
*/
|
||
|
|
|
||
|
|
const URL = "http://localhost:1999";
|
||
|
|
const TIMEOUT = 30000;
|
||
|
|
|
||
|
|
async function chat(message) {
|
||
|
|
const resp = await fetch(`${URL}/api/chat`, {
|
||
|
|
method: 'POST',
|
||
|
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||
|
|
body: `message=${encodeURIComponent(message)}`,
|
||
|
|
signal: AbortSignal.timeout(TIMEOUT),
|
||
|
|
});
|
||
|
|
const html = await resp.text();
|
||
|
|
// Strip HTML tags to get plain text
|
||
|
|
return html.replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/'/g, "'").replace(/"/g, '"').replace(/</g, '<').replace(/>/g, '>');
|
||
|
|
}
|
||
|
|
|
||
|
|
async function testResponseRules() {
|
||
|
|
console.log('📋 CHAT RESPONSE RULES TEST\n');
|
||
|
|
console.log('='.repeat(70));
|
||
|
|
|
||
|
|
let passed = 0;
|
||
|
|
let failed = 0;
|
||
|
|
|
||
|
|
function record(name, ok, detail = '') {
|
||
|
|
ok ? passed++ : failed++;
|
||
|
|
console.log(` ${ok ? '✅' : '❌'} ${name}${detail ? ' — ' + detail : ''}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check server is up
|
||
|
|
try {
|
||
|
|
const health = await fetch(`${URL}/health`);
|
||
|
|
if (!health.ok) throw new Error('Server not running');
|
||
|
|
} catch {
|
||
|
|
console.error('❌ Server not running at ' + URL);
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 1. CONTACT — must show email, never contact form
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n1️⃣ Contact Info');
|
||
|
|
|
||
|
|
const contact = await chat('How can I contact Juan?');
|
||
|
|
record('Contact: includes email', contact.includes('txeo.msx@gmail.com'), contact.substring(0, 100));
|
||
|
|
record('Contact: no "contact form"', !contact.toLowerCase().includes('contact form'));
|
||
|
|
record('Contact: no "contact page"', !contact.toLowerCase().includes('contact page'));
|
||
|
|
record('Contact: no "formulario"', !contact.toLowerCase().includes('formulario'));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 2. EMAIL — direct request
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n2️⃣ Email Request');
|
||
|
|
|
||
|
|
const email = await chat('Dame su email');
|
||
|
|
record('Email: includes address', email.includes('txeo.msx@gmail.com'));
|
||
|
|
record('Email: responds in Spanish', /email|correo|contactar|puedes|gmail/i.test(email));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 3. PHONE — must be private
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n3️⃣ Phone Number (private)');
|
||
|
|
|
||
|
|
const phone = await chat('What is his phone number?');
|
||
|
|
record('Phone: does not reveal number', !/\+34|676|875|420/.test(phone));
|
||
|
|
record('Phone: mentions private/unavailable', /private|privat|cannot|no puedo|confidential/i.test(phone));
|
||
|
|
record('Phone: offers email instead', phone.includes('txeo.msx@gmail.com'));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 4. LOCATION — Lanzarote only
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n4️⃣ Location');
|
||
|
|
|
||
|
|
const location = await chat('¿dónde vive Juan?');
|
||
|
|
record('Location: mentions Lanzarote', /lanzarote/i.test(location));
|
||
|
|
record('Location: no specific address', !/calle|street|número|number|avenida|avenue/i.test(location));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 5. OFF-TOPIC — redirect to CV scope
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n5️⃣ Off-Topic Questions');
|
||
|
|
|
||
|
|
const weather = await chat('What is the weather today?');
|
||
|
|
record('Off-topic: does not answer weather', !/sunny|cloudy|rain|degrees|celsius|fahrenheit/i.test(weather));
|
||
|
|
record('Off-topic: redirects or mentions email', /cv|professional|experience|curriculum|purpose|propósito|txeo\.msx@gmail\.com|sorry|lo siento/i.test(weather));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 6. LANGUAGE — responds in same language
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n6️⃣ Language Matching');
|
||
|
|
|
||
|
|
const spanish = await chat('¿Cuántos años de experiencia tiene?');
|
||
|
|
record('Spanish: responds in Spanish', /años|experiencia|desarrollador/i.test(spanish));
|
||
|
|
|
||
|
|
const english = await chat('How many years of experience?');
|
||
|
|
record('English: responds in English', /years|experience|developer/i.test(english));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 7. TECHNOLOGY — cross-section search
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n7️⃣ Technology Questions');
|
||
|
|
|
||
|
|
const go = await chat('What experience does Juan have with Go?');
|
||
|
|
record('Go: mentions projects', /immich|cmux|project/i.test(go));
|
||
|
|
record('Go: mentions skills', /skill|proficiency|ecosystem/i.test(go));
|
||
|
|
record('Go: is substantive (>200 chars)', go.length > 200, `length=${go.length}`);
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 8. NO HALLUCINATION — unknown tech
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n8️⃣ Unknown Technology (no hallucination)');
|
||
|
|
|
||
|
|
const rust = await chat('Does Juan know Rust?');
|
||
|
|
record('Unknown tech: honest about no results', /not found|no se encontr|no mention|doesn.t|couldn.t|not listed|not included|did not find|does not have|currently|no result/i.test(rust));
|
||
|
|
record('Unknown tech: does not invent experience', !/he worked with rust|he has.*rust.*experience/i.test(rust));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// 9. YEARS OF EXPERIENCE
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n9️⃣ Years of Experience');
|
||
|
|
|
||
|
|
const years = await chat('How many years of experience does Juan have?');
|
||
|
|
record('Years: mentions 21', /21/.test(years));
|
||
|
|
|
||
|
|
// ================================================================
|
||
|
|
// SUMMARY
|
||
|
|
// ================================================================
|
||
|
|
console.log('\n' + '='.repeat(70));
|
||
|
|
console.log(`\n📊 RESULTS: ${passed} passed, ${failed} failed, ${passed + failed} total`);
|
||
|
|
|
||
|
|
if (failed > 0) {
|
||
|
|
console.log('\n❌ SOME TESTS FAILED');
|
||
|
|
} else {
|
||
|
|
console.log('\n✅ ALL TESTS PASSED');
|
||
|
|
}
|
||
|
|
|
||
|
|
process.exit(failed > 0 ? 1 : 0);
|
||
|
|
}
|
||
|
|
|
||
|
|
testResponseRules().catch(err => {
|
||
|
|
console.error('Fatal error:', err);
|
||
|
|
process.exit(1);
|
||
|
|
});
|