feat: auto-fallback Gemini→Ollama + model warmup on chat open

Dual-provider architecture:
- Both Gemini and Ollama initialize at startup (if configured)
- Primary (Gemini) tried first for every request
- On any error (429, 503, timeout), automatically falls back to Ollama
- No manual switching needed — completely transparent to the user
- Log shows: "Primary failed (gemini: ...), falling back to ollama: ..."

Warmup:
- POST /api/chat/warmup called silently when chat panel opens
- Pre-loads Ollama model in background (10-15s) while user reads welcome
- By the time user types, model is ready for instant response
- Warms up fallback provider specifically (Gemini doesn't need it)

Timeout:
- Agent context increased to 60s (Ollama first response can be slow)
- Each request creates a fresh session (stateless for fallback compat)
This commit is contained in:
juanatsap
2026-04-08 14:57:38 +01:00
parent 8205a22972
commit 160be31b31
3 changed files with 142 additions and 92 deletions
+134 -91
View File
@@ -20,86 +20,106 @@ import (
"google.golang.org/genai"
)
// Handler serves the chat API endpoint.
type Handler struct {
// chatRunner bundles a runner with its session service and label.
type chatRunner struct {
runner *runner.Runner
sessionService session.Service
session session.Service
label string
}
// Handler serves the chat API endpoint with automatic fallback.
// Primary runner (Gemini) is tried first; if it fails, fallback (Ollama) is used.
type Handler struct {
primary *chatRunner
fallback *chatRunner
enabled bool
}
// NewHandler creates a chat handler. Returns a disabled handler if no model provider is configured.
// NewHandler creates a chat handler with primary + optional fallback provider.
// - If GOOGLE_API_KEY is set → Gemini is primary
// - If OLLAMA_HOST or Ollama is available → Ollama is fallback
// - If only one is available, it becomes the sole provider
// - If neither is available, chat is disabled
func NewHandler(dataCache *cache.DataCache) *Handler {
provider := os.Getenv("MODEL_PROVIDER")
if provider == "" {
provider = "gemini"
}
h := &Handler{}
var llm model.LLM
var providerLabel string
switch provider {
case "ollama":
llm, providerLabel = initOllamaProvider()
default:
var err error
llm, providerLabel, err = initGeminiProvider()
if err != nil {
return &Handler{enabled: false}
// Try Gemini as primary
geminiLLM, geminiLabel, geminiErr := initGeminiProvider()
if geminiErr == nil && geminiLLM != nil {
r, err := buildRunner(geminiLLM, dataCache, "cv-chat-gemini")
if err == nil {
h.primary = &chatRunner{runner: r.runner, session: r.session, label: geminiLabel}
}
}
if llm == nil {
// Try Ollama as fallback (or primary if Gemini unavailable)
ollamaLLM, ollamaLabel := initOllamaProvider()
if ollamaLLM != nil {
r, err := buildRunner(ollamaLLM, dataCache, "cv-chat-ollama")
if err == nil {
if h.primary != nil {
h.fallback = &chatRunner{runner: r.runner, session: r.session, label: ollamaLabel}
} else {
h.primary = &chatRunner{runner: r.runner, session: r.session, label: ollamaLabel}
}
}
}
if h.primary == nil {
log.Println("⚠️ No chat provider available — chat disabled")
return &Handler{enabled: false}
}
h.enabled = true
if h.fallback != nil {
log.Printf("💬 Chat enabled (primary: %s, fallback: %s)", h.primary.label, h.fallback.label)
} else {
log.Printf("💬 Chat enabled (%s)", h.primary.label)
}
return h
}
// buildRunner creates an ADK runner for a given LLM provider.
func buildRunner(llm model.LLM, dataCache *cache.DataCache, appName string) (*chatRunner, error) {
cvAgent, err := NewAgent(llm, dataCache)
if err != nil {
log.Printf("⚠️ Failed to create CV agent: %v — chat disabled", err)
return &Handler{enabled: false}
return nil, err
}
sessionSvc := session.InMemoryService()
r, err := runner.New(runner.Config{
AppName: "cv-chat",
AppName: appName,
Agent: cvAgent,
SessionService: sessionSvc,
AutoCreateSession: true,
})
if err != nil {
log.Printf("⚠️ Failed to create runner: %v — chat disabled", err)
return &Handler{enabled: false}
return nil, err
}
log.Printf("💬 Chat agent enabled (%s)", providerLabel)
return &Handler{
runner: r,
sessionService: sessionSvc,
enabled: true,
}
return &chatRunner{runner: r, session: sessionSvc}, nil
}
// initGeminiProvider initializes the Gemini LLM provider.
func initGeminiProvider() (model.LLM, string, error) {
apiKey := os.Getenv("GOOGLE_API_KEY")
if apiKey == "" {
log.Println("⚠️ GOOGLE_API_KEY not set — chat feature disabled")
return nil, "", fmt.Errorf("no API key")
}
ctx := context.Background()
modelName := os.Getenv("MODEL_NAME")
if modelName == "" {
modelName = "gemini-2.5-flash"
}
llm, err := gemini.NewModel(ctx, modelName, &genai.ClientConfig{
llm, err := gemini.NewModel(context.Background(), modelName, &genai.ClientConfig{
APIKey: apiKey,
})
if err != nil {
log.Printf("⚠️ Failed to initialize Gemini model: %v — chat disabled", err)
log.Printf("⚠️ Gemini init failed: %v", err)
return nil, "", err
}
@@ -127,9 +147,42 @@ func (h *Handler) Enabled() bool {
return h.enabled
}
// HandleWarmup pre-loads the LLM model so the first real question is fast.
func (h *Handler) HandleWarmup(w http.ResponseWriter, r *http.Request) {
if !h.enabled || r.Method != http.MethodPost {
w.WriteHeader(http.StatusNoContent)
return
}
// Warm up fallback (Ollama) in background — Gemini doesn't need warmup
target := h.fallback
if target == nil {
target = h.primary
}
go func() {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
sess, err := target.session.Create(ctx, &session.CreateRequest{
AppName: "cv-chat-warmup",
UserID: "warmup",
})
if err != nil {
return
}
msg := genai.NewContentFromText("hi", genai.RoleUser)
for range target.runner.Run(ctx, "warmup", sess.Session.ID(), msg, agent.RunConfig{}) {
}
log.Printf("💬 Model warmed up (%s)", target.label)
}()
w.WriteHeader(http.StatusNoContent)
}
// HandleChat processes POST /api/chat requests.
// Expects form field "message" and optional "session_id".
// Returns an HTML fragment for HTMX to swap into the chat panel.
// Tries the primary provider first; falls back to the secondary on error.
func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
if !h.enabled {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
@@ -150,45 +203,16 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
return
}
sessionID := r.FormValue("session_id")
if sessionID == "" {
sessionID = "default"
// Try primary, fall back if it fails
response, sessionID, err := h.runAgent(h.primary, message)
if err != nil && h.fallback != nil {
log.Printf("💬 Primary failed (%s: %v), falling back to %s", h.primary.label, err, h.fallback.label)
response, sessionID, err = h.runAgent(h.fallback, message)
}
// Ensure session exists
ctx := r.Context()
_, err := h.sessionService.Get(ctx, &session.GetRequest{
AppName: "cv-chat",
UserID: "visitor",
SessionID: sessionID,
})
if err != nil {
// Create new session
created, createErr := h.sessionService.Create(ctx, &session.CreateRequest{
AppName: "cv-chat",
UserID: "visitor",
})
if createErr != nil {
log.Printf("Chat session create error: %v", createErr)
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(http.StatusInternalServerError)
_, _ = fmt.Fprint(w, `<div class="chat-message chat-error">Failed to start chat session.</div>`)
return
}
sessionID = created.Session.ID()
}
// Run the agent with a dedicated context (not tied to HTTP request lifecycle)
agentCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
userMsg := genai.NewContentFromText(message, genai.RoleUser)
var response strings.Builder
for event, err := range h.runner.Run(agentCtx, "visitor", sessionID, userMsg, agent.RunConfig{}) {
if err != nil {
log.Printf("Chat agent error: %v", err)
w.Header().Set("Content-Type", "text/html; charset=utf-8")
errMsg := "Something went wrong. Please try again in a moment."
if strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "RESOURCE_EXHAUSTED") {
errMsg = "The AI service is temporarily busy. Please try again in a few seconds."
@@ -196,6 +220,42 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-error">%s</div>`, errMsg)
return
}
// User message bubble
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-user">%s</div>`, html.EscapeString(message))
// Agent response bubble
if response == "" {
response = "I couldn't find an answer to that. Try asking about experience, projects, skills, or education."
}
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-agent">%s</div>`, formatResponse(response))
// Session ID via OOB swap
_, _ = fmt.Fprintf(w, `<input type="hidden" id="chat-session-id" name="session_id" value="%s" form="chat-form" hx-swap-oob="true"/>`, sessionID)
}
// runAgent executes the agent on the given runner and returns the response text.
func (h *Handler) runAgent(cr *chatRunner, message string) (string, string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
// Create a new session for each request (stateless for fallback compatibility)
sess, err := cr.session.Create(ctx, &session.CreateRequest{
AppName: "cv-chat",
UserID: "visitor",
})
if err != nil {
return "", "", fmt.Errorf("session create: %w", err)
}
sessionID := sess.Session.ID()
userMsg := genai.NewContentFromText(message, genai.RoleUser)
var response strings.Builder
for event, err := range cr.runner.Run(ctx, "visitor", sessionID, userMsg, agent.RunConfig{}) {
if err != nil {
return "", "", err
}
if event.IsFinalResponse() {
if event.Content != nil {
for _, part := range event.Content.Parts {
@@ -207,35 +267,18 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
}
}
// Render the response as HTML
w.Header().Set("Content-Type", "text/html; charset=utf-8")
// User message bubble
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-user">%s</div>`, html.EscapeString(message))
// Agent response bubble
agentText := response.String()
if agentText == "" {
agentText = "I couldn't find an answer to that. Try asking about experience, projects, skills, or education."
}
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-agent">%s</div>`, formatResponse(agentText))
// Update session ID via OOB swap (replaces existing input, avoids duplicates)
_, _ = fmt.Fprintf(w, `<input type="hidden" id="chat-session-id" name="session_id" value="%s" form="chat-form" hx-swap-oob="true"/>`, sessionID)
return response.String(), sessionID, nil
}
// formatResponse converts basic markdown to HTML for the chat bubble.
func formatResponse(text string) string {
// Escape HTML first
text = html.EscapeString(text)
// Bold: **text** → <strong>text</strong>
for strings.Contains(text, "**") {
text = strings.Replace(text, "**", "<strong>", 1)
text = strings.Replace(text, "**", "</strong>", 1)
}
// Bullet points: lines starting with "- " → <li>
lines := strings.Split(text, "\n")
var result []string
inList := false
+1
View File
@@ -23,6 +23,7 @@ func Setup(cvHandler *handlers.CVHandler, healthHandler *handlers.HealthHandler,
// Chat endpoint with rate limiting (30 requests/hour per IP)
chatRateLimiter := middleware.NewRateLimiter(c.RateLimitChatRequests, c.RateLimitChatWindow)
mux.Handle("/api/chat", chatRateLimiter.Middleware(http.HandlerFunc(chatHandler.HandleChat)))
mux.HandleFunc("/api/chat/warmup", chatHandler.HandleWarmup) // Pre-load model on chat open
// Public routes
mux.HandleFunc("/", cvHandler.Home)
@@ -76,6 +76,7 @@
<!-- Chat JavaScript — all interactions in plain JS, no Hyperscript -->
<script>
// Toggle chat panel open/close
var chatWarmedUp = false;
function toggleChatPanel() {
var panel = document.getElementById('chat-panel');
var btn = document.getElementById('chat-toggle-btn');
@@ -83,6 +84,11 @@ function toggleChatPanel() {
btn.classList.toggle('mascot-active');
if (panel.classList.contains('chat-open')) {
document.getElementById('chat-input').focus();
// Warm up the model on first open (silent background ping)
if (!chatWarmedUp) {
chatWarmedUp = true;
fetch('/api/chat/warmup', { method: 'POST' }).catch(function() {});
}
}
}