feat: auto-fallback Gemini→Ollama + model warmup on chat open
Dual-provider architecture: - Both Gemini and Ollama initialize at startup (if configured) - Primary (Gemini) tried first for every request - On any error (429, 503, timeout), automatically falls back to Ollama - No manual switching needed — completely transparent to the user - Log shows: "Primary failed (gemini: ...), falling back to ollama: ..." Warmup: - POST /api/chat/warmup called silently when chat panel opens - Pre-loads Ollama model in background (10-15s) while user reads welcome - By the time user types, model is ready for instant response - Warms up fallback provider specifically (Gemini doesn't need it) Timeout: - Agent context increased to 60s (Ollama first response can be slow) - Each request creates a fresh session (stateless for fallback compat)
This commit is contained in:
+134
-91
@@ -20,86 +20,106 @@ import (
|
|||||||
"google.golang.org/genai"
|
"google.golang.org/genai"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Handler serves the chat API endpoint.
|
// chatRunner bundles a runner with its session service and label.
|
||||||
type Handler struct {
|
type chatRunner struct {
|
||||||
runner *runner.Runner
|
runner *runner.Runner
|
||||||
sessionService session.Service
|
session session.Service
|
||||||
|
label string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handler serves the chat API endpoint with automatic fallback.
|
||||||
|
// Primary runner (Gemini) is tried first; if it fails, fallback (Ollama) is used.
|
||||||
|
type Handler struct {
|
||||||
|
primary *chatRunner
|
||||||
|
fallback *chatRunner
|
||||||
enabled bool
|
enabled bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHandler creates a chat handler. Returns a disabled handler if no model provider is configured.
|
// NewHandler creates a chat handler with primary + optional fallback provider.
|
||||||
|
// - If GOOGLE_API_KEY is set → Gemini is primary
|
||||||
|
// - If OLLAMA_HOST or Ollama is available → Ollama is fallback
|
||||||
|
// - If only one is available, it becomes the sole provider
|
||||||
|
// - If neither is available, chat is disabled
|
||||||
func NewHandler(dataCache *cache.DataCache) *Handler {
|
func NewHandler(dataCache *cache.DataCache) *Handler {
|
||||||
provider := os.Getenv("MODEL_PROVIDER")
|
h := &Handler{}
|
||||||
if provider == "" {
|
|
||||||
provider = "gemini"
|
|
||||||
}
|
|
||||||
|
|
||||||
var llm model.LLM
|
// Try Gemini as primary
|
||||||
var providerLabel string
|
geminiLLM, geminiLabel, geminiErr := initGeminiProvider()
|
||||||
|
if geminiErr == nil && geminiLLM != nil {
|
||||||
switch provider {
|
r, err := buildRunner(geminiLLM, dataCache, "cv-chat-gemini")
|
||||||
case "ollama":
|
if err == nil {
|
||||||
llm, providerLabel = initOllamaProvider()
|
h.primary = &chatRunner{runner: r.runner, session: r.session, label: geminiLabel}
|
||||||
default:
|
|
||||||
var err error
|
|
||||||
llm, providerLabel, err = initGeminiProvider()
|
|
||||||
if err != nil {
|
|
||||||
return &Handler{enabled: false}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if llm == nil {
|
// Try Ollama as fallback (or primary if Gemini unavailable)
|
||||||
|
ollamaLLM, ollamaLabel := initOllamaProvider()
|
||||||
|
if ollamaLLM != nil {
|
||||||
|
r, err := buildRunner(ollamaLLM, dataCache, "cv-chat-ollama")
|
||||||
|
if err == nil {
|
||||||
|
if h.primary != nil {
|
||||||
|
h.fallback = &chatRunner{runner: r.runner, session: r.session, label: ollamaLabel}
|
||||||
|
} else {
|
||||||
|
h.primary = &chatRunner{runner: r.runner, session: r.session, label: ollamaLabel}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.primary == nil {
|
||||||
|
log.Println("⚠️ No chat provider available — chat disabled")
|
||||||
return &Handler{enabled: false}
|
return &Handler{enabled: false}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
h.enabled = true
|
||||||
|
|
||||||
|
if h.fallback != nil {
|
||||||
|
log.Printf("💬 Chat enabled (primary: %s, fallback: %s)", h.primary.label, h.fallback.label)
|
||||||
|
} else {
|
||||||
|
log.Printf("💬 Chat enabled (%s)", h.primary.label)
|
||||||
|
}
|
||||||
|
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRunner creates an ADK runner for a given LLM provider.
|
||||||
|
func buildRunner(llm model.LLM, dataCache *cache.DataCache, appName string) (*chatRunner, error) {
|
||||||
cvAgent, err := NewAgent(llm, dataCache)
|
cvAgent, err := NewAgent(llm, dataCache)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("⚠️ Failed to create CV agent: %v — chat disabled", err)
|
return nil, err
|
||||||
return &Handler{enabled: false}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sessionSvc := session.InMemoryService()
|
sessionSvc := session.InMemoryService()
|
||||||
|
|
||||||
r, err := runner.New(runner.Config{
|
r, err := runner.New(runner.Config{
|
||||||
AppName: "cv-chat",
|
AppName: appName,
|
||||||
Agent: cvAgent,
|
Agent: cvAgent,
|
||||||
SessionService: sessionSvc,
|
SessionService: sessionSvc,
|
||||||
AutoCreateSession: true,
|
AutoCreateSession: true,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("⚠️ Failed to create runner: %v — chat disabled", err)
|
return nil, err
|
||||||
return &Handler{enabled: false}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("💬 Chat agent enabled (%s)", providerLabel)
|
return &chatRunner{runner: r, session: sessionSvc}, nil
|
||||||
|
|
||||||
return &Handler{
|
|
||||||
runner: r,
|
|
||||||
sessionService: sessionSvc,
|
|
||||||
enabled: true,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// initGeminiProvider initializes the Gemini LLM provider.
|
// initGeminiProvider initializes the Gemini LLM provider.
|
||||||
func initGeminiProvider() (model.LLM, string, error) {
|
func initGeminiProvider() (model.LLM, string, error) {
|
||||||
apiKey := os.Getenv("GOOGLE_API_KEY")
|
apiKey := os.Getenv("GOOGLE_API_KEY")
|
||||||
if apiKey == "" {
|
if apiKey == "" {
|
||||||
log.Println("⚠️ GOOGLE_API_KEY not set — chat feature disabled")
|
|
||||||
return nil, "", fmt.Errorf("no API key")
|
return nil, "", fmt.Errorf("no API key")
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
modelName := os.Getenv("MODEL_NAME")
|
modelName := os.Getenv("MODEL_NAME")
|
||||||
if modelName == "" {
|
if modelName == "" {
|
||||||
modelName = "gemini-2.5-flash"
|
modelName = "gemini-2.5-flash"
|
||||||
}
|
}
|
||||||
|
|
||||||
llm, err := gemini.NewModel(ctx, modelName, &genai.ClientConfig{
|
llm, err := gemini.NewModel(context.Background(), modelName, &genai.ClientConfig{
|
||||||
APIKey: apiKey,
|
APIKey: apiKey,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("⚠️ Failed to initialize Gemini model: %v — chat disabled", err)
|
log.Printf("⚠️ Gemini init failed: %v", err)
|
||||||
return nil, "", err
|
return nil, "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,9 +147,42 @@ func (h *Handler) Enabled() bool {
|
|||||||
return h.enabled
|
return h.enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HandleWarmup pre-loads the LLM model so the first real question is fast.
|
||||||
|
func (h *Handler) HandleWarmup(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if !h.enabled || r.Method != http.MethodPost {
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Warm up fallback (Ollama) in background — Gemini doesn't need warmup
|
||||||
|
target := h.fallback
|
||||||
|
if target == nil {
|
||||||
|
target = h.primary
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
sess, err := target.session.Create(ctx, &session.CreateRequest{
|
||||||
|
AppName: "cv-chat-warmup",
|
||||||
|
UserID: "warmup",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
msg := genai.NewContentFromText("hi", genai.RoleUser)
|
||||||
|
for range target.runner.Run(ctx, "warmup", sess.Session.ID(), msg, agent.RunConfig{}) {
|
||||||
|
}
|
||||||
|
log.Printf("💬 Model warmed up (%s)", target.label)
|
||||||
|
}()
|
||||||
|
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
|
||||||
// HandleChat processes POST /api/chat requests.
|
// HandleChat processes POST /api/chat requests.
|
||||||
// Expects form field "message" and optional "session_id".
|
// Tries the primary provider first; falls back to the secondary on error.
|
||||||
// Returns an HTML fragment for HTMX to swap into the chat panel.
|
|
||||||
func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
|
func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
|
||||||
if !h.enabled {
|
if !h.enabled {
|
||||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
@@ -150,45 +203,16 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
sessionID := r.FormValue("session_id")
|
// Try primary, fall back if it fails
|
||||||
if sessionID == "" {
|
response, sessionID, err := h.runAgent(h.primary, message)
|
||||||
sessionID = "default"
|
if err != nil && h.fallback != nil {
|
||||||
|
log.Printf("💬 Primary failed (%s: %v), falling back to %s", h.primary.label, err, h.fallback.label)
|
||||||
|
response, sessionID, err = h.runAgent(h.fallback, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure session exists
|
|
||||||
ctx := r.Context()
|
|
||||||
_, err := h.sessionService.Get(ctx, &session.GetRequest{
|
|
||||||
AppName: "cv-chat",
|
|
||||||
UserID: "visitor",
|
|
||||||
SessionID: sessionID,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
// Create new session
|
|
||||||
created, createErr := h.sessionService.Create(ctx, &session.CreateRequest{
|
|
||||||
AppName: "cv-chat",
|
|
||||||
UserID: "visitor",
|
|
||||||
})
|
|
||||||
if createErr != nil {
|
|
||||||
log.Printf("Chat session create error: %v", createErr)
|
|
||||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
w.WriteHeader(http.StatusInternalServerError)
|
|
||||||
_, _ = fmt.Fprint(w, `<div class="chat-message chat-error">Failed to start chat session.</div>`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sessionID = created.Session.ID()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run the agent with a dedicated context (not tied to HTTP request lifecycle)
|
|
||||||
agentCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
userMsg := genai.NewContentFromText(message, genai.RoleUser)
|
|
||||||
|
|
||||||
var response strings.Builder
|
|
||||||
for event, err := range h.runner.Run(agentCtx, "visitor", sessionID, userMsg, agent.RunConfig{}) {
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Chat agent error: %v", err)
|
|
||||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
|
||||||
errMsg := "Something went wrong. Please try again in a moment."
|
errMsg := "Something went wrong. Please try again in a moment."
|
||||||
if strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "RESOURCE_EXHAUSTED") {
|
if strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "RESOURCE_EXHAUSTED") {
|
||||||
errMsg = "The AI service is temporarily busy. Please try again in a few seconds."
|
errMsg = "The AI service is temporarily busy. Please try again in a few seconds."
|
||||||
@@ -196,6 +220,42 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
|
|||||||
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-error">%s</div>`, errMsg)
|
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-error">%s</div>`, errMsg)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// User message bubble
|
||||||
|
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-user">%s</div>`, html.EscapeString(message))
|
||||||
|
|
||||||
|
// Agent response bubble
|
||||||
|
if response == "" {
|
||||||
|
response = "I couldn't find an answer to that. Try asking about experience, projects, skills, or education."
|
||||||
|
}
|
||||||
|
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-agent">%s</div>`, formatResponse(response))
|
||||||
|
|
||||||
|
// Session ID via OOB swap
|
||||||
|
_, _ = fmt.Fprintf(w, `<input type="hidden" id="chat-session-id" name="session_id" value="%s" form="chat-form" hx-swap-oob="true"/>`, sessionID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// runAgent executes the agent on the given runner and returns the response text.
|
||||||
|
func (h *Handler) runAgent(cr *chatRunner, message string) (string, string, error) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Create a new session for each request (stateless for fallback compatibility)
|
||||||
|
sess, err := cr.session.Create(ctx, &session.CreateRequest{
|
||||||
|
AppName: "cv-chat",
|
||||||
|
UserID: "visitor",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", "", fmt.Errorf("session create: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sessionID := sess.Session.ID()
|
||||||
|
userMsg := genai.NewContentFromText(message, genai.RoleUser)
|
||||||
|
|
||||||
|
var response strings.Builder
|
||||||
|
for event, err := range cr.runner.Run(ctx, "visitor", sessionID, userMsg, agent.RunConfig{}) {
|
||||||
|
if err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
if event.IsFinalResponse() {
|
if event.IsFinalResponse() {
|
||||||
if event.Content != nil {
|
if event.Content != nil {
|
||||||
for _, part := range event.Content.Parts {
|
for _, part := range event.Content.Parts {
|
||||||
@@ -207,35 +267,18 @@ func (h *Handler) HandleChat(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render the response as HTML
|
return response.String(), sessionID, nil
|
||||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
|
||||||
|
|
||||||
// User message bubble
|
|
||||||
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-user">%s</div>`, html.EscapeString(message))
|
|
||||||
|
|
||||||
// Agent response bubble
|
|
||||||
agentText := response.String()
|
|
||||||
if agentText == "" {
|
|
||||||
agentText = "I couldn't find an answer to that. Try asking about experience, projects, skills, or education."
|
|
||||||
}
|
|
||||||
_, _ = fmt.Fprintf(w, `<div class="chat-message chat-agent">%s</div>`, formatResponse(agentText))
|
|
||||||
|
|
||||||
// Update session ID via OOB swap (replaces existing input, avoids duplicates)
|
|
||||||
_, _ = fmt.Fprintf(w, `<input type="hidden" id="chat-session-id" name="session_id" value="%s" form="chat-form" hx-swap-oob="true"/>`, sessionID)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// formatResponse converts basic markdown to HTML for the chat bubble.
|
// formatResponse converts basic markdown to HTML for the chat bubble.
|
||||||
func formatResponse(text string) string {
|
func formatResponse(text string) string {
|
||||||
// Escape HTML first
|
|
||||||
text = html.EscapeString(text)
|
text = html.EscapeString(text)
|
||||||
|
|
||||||
// Bold: **text** → <strong>text</strong>
|
|
||||||
for strings.Contains(text, "**") {
|
for strings.Contains(text, "**") {
|
||||||
text = strings.Replace(text, "**", "<strong>", 1)
|
text = strings.Replace(text, "**", "<strong>", 1)
|
||||||
text = strings.Replace(text, "**", "</strong>", 1)
|
text = strings.Replace(text, "**", "</strong>", 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bullet points: lines starting with "- " → <li>
|
|
||||||
lines := strings.Split(text, "\n")
|
lines := strings.Split(text, "\n")
|
||||||
var result []string
|
var result []string
|
||||||
inList := false
|
inList := false
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ func Setup(cvHandler *handlers.CVHandler, healthHandler *handlers.HealthHandler,
|
|||||||
// Chat endpoint with rate limiting (30 requests/hour per IP)
|
// Chat endpoint with rate limiting (30 requests/hour per IP)
|
||||||
chatRateLimiter := middleware.NewRateLimiter(c.RateLimitChatRequests, c.RateLimitChatWindow)
|
chatRateLimiter := middleware.NewRateLimiter(c.RateLimitChatRequests, c.RateLimitChatWindow)
|
||||||
mux.Handle("/api/chat", chatRateLimiter.Middleware(http.HandlerFunc(chatHandler.HandleChat)))
|
mux.Handle("/api/chat", chatRateLimiter.Middleware(http.HandlerFunc(chatHandler.HandleChat)))
|
||||||
|
mux.HandleFunc("/api/chat/warmup", chatHandler.HandleWarmup) // Pre-load model on chat open
|
||||||
|
|
||||||
// Public routes
|
// Public routes
|
||||||
mux.HandleFunc("/", cvHandler.Home)
|
mux.HandleFunc("/", cvHandler.Home)
|
||||||
|
|||||||
@@ -76,6 +76,7 @@
|
|||||||
<!-- Chat JavaScript — all interactions in plain JS, no Hyperscript -->
|
<!-- Chat JavaScript — all interactions in plain JS, no Hyperscript -->
|
||||||
<script>
|
<script>
|
||||||
// Toggle chat panel open/close
|
// Toggle chat panel open/close
|
||||||
|
var chatWarmedUp = false;
|
||||||
function toggleChatPanel() {
|
function toggleChatPanel() {
|
||||||
var panel = document.getElementById('chat-panel');
|
var panel = document.getElementById('chat-panel');
|
||||||
var btn = document.getElementById('chat-toggle-btn');
|
var btn = document.getElementById('chat-toggle-btn');
|
||||||
@@ -83,6 +84,11 @@ function toggleChatPanel() {
|
|||||||
btn.classList.toggle('mascot-active');
|
btn.classList.toggle('mascot-active');
|
||||||
if (panel.classList.contains('chat-open')) {
|
if (panel.classList.contains('chat-open')) {
|
||||||
document.getElementById('chat-input').focus();
|
document.getElementById('chat-input').focus();
|
||||||
|
// Warm up the model on first open (silent background ping)
|
||||||
|
if (!chatWarmedUp) {
|
||||||
|
chatWarmedUp = true;
|
||||||
|
fetch('/api/chat/warmup', { method: 'POST' }).catch(function() {});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user