feat: Ollama adapter + chat rate limiter (30 req/hour)

Ollama adapter (internal/chat/ollama.go): - Implements model.LLM interface for ADK Go - Talks to Ollama's OpenAI-compatible API (/v1/chat/completions) - Full tool/function calling support (tested with Mistral Small 3.2) - Converts ADK types to OpenAI format (messages, tools, tool_calls) - Configurable via OLLAMA_HOST and OLLAMA_MODEL env vars Multi-provider handler: - MODEL_PROVIDER env: "gemini" (default) or "ollama" - Gemini: requires GOOGLE_API_KEY (pay-as-you-go recommended) - Ollama: connects to local or Tailscale-remote instance Rate limiter: - 30 requests/hour per IP on /api/chat endpoint - Uses existing middleware.NewRateLimiter pattern Tested: Ollama + Mistral Small 3.2 on M4 Pro 64GB — correct answers
2026-04-08 14:47:14 +01:00
parent 4f558ac842
commit 8205a22972
5 changed files with 510 additions and 17 deletions
@@ -80,6 +80,19 @@ SMTP_PASSWORD=your-password
 SMTP_FROM_EMAIL=your-email@yourdomain.com
 CONTACT_EMAIL=recipient@example.com

+# Chat AI Configuration
+#
+# MODEL_PROVIDER: "gemini" (default) or "ollama"
+# MODEL_PROVIDER=gemini
+#
+# Gemini settings (when MODEL_PROVIDER=gemini):
+# GOOGLE_API_KEY=your-google-api-key
+# MODEL_NAME=gemini-2.5-flash
+#
+# Ollama settings (when MODEL_PROVIDER=ollama):
+# OLLAMA_HOST=http://localhost:11434
+# OLLAMA_MODEL=mistral-small3.2
+
 # Production Settings
 # Uncomment for production:
 # GO_ENV=production
@@ -13,6 +13,7 @@ import (
 	"github.com/juanatsap/cv-site/internal/cache"

 	"google.golang.org/adk/agent"
+	"google.golang.org/adk/model"
 	"google.golang.org/adk/model/gemini"
 	"google.golang.org/adk/runner"
 	"google.golang.org/adk/session"
@@ -26,26 +27,28 @@ type Handler struct {
 	enabled        bool
 }

-// NewHandler creates a chat handler. Returns a disabled handler if GOOGLE_API_KEY is not set.
+// NewHandler creates a chat handler. Returns a disabled handler if no model provider is configured.
 func NewHandler(dataCache *cache.DataCache) *Handler {
-	apiKey := os.Getenv("GOOGLE_API_KEY")
-	if apiKey == "" {
-		log.Println("⚠️  GOOGLE_API_KEY not set — chat feature disabled")
+	provider := os.Getenv("MODEL_PROVIDER")
+	if provider == "" {
+		provider = "gemini"
+	}
+
+	var llm model.LLM
+	var providerLabel string
+
+	switch provider {
+	case "ollama":
+		llm, providerLabel = initOllamaProvider()
+	default:
+		var err error
+		llm, providerLabel, err = initGeminiProvider()
+		if err != nil {
 			return &Handler{enabled: false}
 		}
-
-	ctx := context.Background()
-
-	modelName := os.Getenv("MODEL_NAME")
-	if modelName == "" {
-		modelName = "gemini-2.5-flash"
 	}

-	llm, err := gemini.NewModel(ctx, modelName, &genai.ClientConfig{
-		APIKey: apiKey,
-	})
-	if err != nil {
-		log.Printf("⚠️  Failed to initialize Gemini model: %v — chat disabled", err)
+	if llm == nil {
 		return &Handler{enabled: false}
 	}

@@ -68,7 +71,7 @@ func NewHandler(dataCache *cache.DataCache) *Handler {
 		return &Handler{enabled: false}
 	}

-	log.Printf("💬 Chat agent enabled (model: %s)", modelName)
+	log.Printf("💬 Chat agent enabled (%s)", providerLabel)

 	return &Handler{
 		runner:         r,
@@ -77,6 +80,48 @@ func NewHandler(dataCache *cache.DataCache) *Handler {
 	}
 }

+// initGeminiProvider initializes the Gemini LLM provider.
+func initGeminiProvider() (model.LLM, string, error) {
+	apiKey := os.Getenv("GOOGLE_API_KEY")
+	if apiKey == "" {
+		log.Println("⚠️  GOOGLE_API_KEY not set — chat feature disabled")
+		return nil, "", fmt.Errorf("no API key")
+	}
+
+	ctx := context.Background()
+
+	modelName := os.Getenv("MODEL_NAME")
+	if modelName == "" {
+		modelName = "gemini-2.5-flash"
+	}
+
+	llm, err := gemini.NewModel(ctx, modelName, &genai.ClientConfig{
+		APIKey: apiKey,
+	})
+	if err != nil {
+		log.Printf("⚠️  Failed to initialize Gemini model: %v — chat disabled", err)
+		return nil, "", err
+	}
+
+	return llm, fmt.Sprintf("gemini: %s", modelName), nil
+}
+
+// initOllamaProvider initializes the Ollama LLM provider.
+func initOllamaProvider() (model.LLM, string) {
+	host := os.Getenv("OLLAMA_HOST")
+	if host == "" {
+		host = "http://localhost:11434"
+	}
+
+	modelName := os.Getenv("OLLAMA_MODEL")
+	if modelName == "" {
+		modelName = "mistral-small3.2"
+	}
+
+	llm := NewOllamaModel(host, modelName)
+	return llm, fmt.Sprintf("ollama: %s @ %s", modelName, host)
+}
+
 // Enabled returns whether the chat feature is available.
 func (h *Handler) Enabled() bool {
 	return h.enabled
@@ -0,0 +1,430 @@
+// Package chat provides an ADK Go agent that answers questions about CV data.
+package chat
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"iter"
+	"net/http"
+
+	"google.golang.org/adk/model"
+	"google.golang.org/genai"
+)
+
+// OllamaModel implements model.LLM using Ollama's OpenAI-compatible API.
+type OllamaModel struct {
+	host      string // e.g. "http://localhost:11434"
+	modelName string // e.g. "mistral-small3.2"
+	client    *http.Client
+}
+
+// NewOllamaModel creates a new Ollama-backed LLM.
+func NewOllamaModel(host, modelName string) *OllamaModel {
+	return &OllamaModel{
+		host:      host,
+		modelName: modelName,
+		client:    &http.Client{},
+	}
+}
+
+// Name returns the model name.
+func (m *OllamaModel) Name() string {
+	return m.modelName
+}
+
+// Verify OllamaModel implements model.LLM at compile time.
+var _ model.LLM = (*OllamaModel)(nil)
+
+// GenerateContent sends a request to Ollama and returns ADK-compatible responses.
+func (m *OllamaModel) GenerateContent(ctx context.Context, req *model.LLMRequest, stream bool) iter.Seq2[*model.LLMResponse, error] {
+	return func(yield func(*model.LLMResponse, error) bool) {
+		resp, err := m.generate(ctx, req)
+		yield(resp, err)
+	}
+}
+
+// --- OpenAI-compatible request/response types ---
+
+type oaiMessage struct {
+	Role       string        `json:"role"`
+	Content    string        `json:"content,omitempty"`
+	ToolCalls  []oaiToolCall `json:"tool_calls,omitempty"`
+	ToolCallID string        `json:"tool_call_id,omitempty"`
+}
+
+type oaiToolCall struct {
+	ID       string          `json:"id"`
+	Type     string          `json:"type"`
+	Function oaiToolFunction `json:"function"`
+}
+
+type oaiToolFunction struct {
+	Name      string `json:"name"`
+	Arguments string `json:"arguments"` // JSON string
+}
+
+type oaiTool struct {
+	Type     string          `json:"type"`
+	Function oaiToolFuncDecl `json:"function"`
+}
+
+type oaiToolFuncDecl struct {
+	Name        string `json:"name"`
+	Description string `json:"description,omitempty"`
+	Parameters  any    `json:"parameters,omitempty"`
+}
+
+type oaiRequest struct {
+	Model       string       `json:"model"`
+	Messages    []oaiMessage `json:"messages"`
+	Tools       []oaiTool    `json:"tools,omitempty"`
+	Stream      bool         `json:"stream"`
+	Temperature *float32     `json:"temperature,omitempty"`
+}
+
+type oaiResponse struct {
+	Choices []oaiChoice `json:"choices"`
+	Usage   *oaiUsage   `json:"usage,omitempty"`
+	Model   string      `json:"model,omitempty"`
+}
+
+type oaiChoice struct {
+	Message      oaiMessage `json:"message"`
+	FinishReason string     `json:"finish_reason"`
+}
+
+type oaiUsage struct {
+	PromptTokens     int32 `json:"prompt_tokens"`
+	CompletionTokens int32 `json:"completion_tokens"`
+	TotalTokens      int32 `json:"total_tokens"`
+}
+
+// generate performs a synchronous (non-streaming) call to Ollama.
+func (m *OllamaModel) generate(ctx context.Context, req *model.LLMRequest) (*model.LLMResponse, error) {
+	oaiReq := m.buildRequest(req)
+
+	body, err := json.Marshal(oaiReq)
+	if err != nil {
+		return nil, fmt.Errorf("ollama: marshal request: %w", err)
+	}
+
+	url := fmt.Sprintf("%s/v1/chat/completions", m.host)
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("ollama: create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	httpResp, err := m.client.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("ollama: send request: %w", err)
+	}
+	defer func() { _ = httpResp.Body.Close() }()
+
+	respBody, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("ollama: read response: %w", err)
+	}
+
+	if httpResp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("ollama: HTTP %d: %s", httpResp.StatusCode, string(respBody))
+	}
+
+	var oaiResp oaiResponse
+	if err := json.Unmarshal(respBody, &oaiResp); err != nil {
+		return nil, fmt.Errorf("ollama: unmarshal response: %w", err)
+	}
+
+	return m.convertResponse(&oaiResp)
+}
+
+// buildRequest converts an ADK LLMRequest into an OpenAI-compatible request.
+func (m *OllamaModel) buildRequest(req *model.LLMRequest) *oaiRequest {
+	oaiReq := &oaiRequest{
+		Model:  m.modelName,
+		Stream: false,
+	}
+
+	// Convert system instruction
+	if req.Config != nil && req.Config.SystemInstruction != nil {
+		text := extractText(req.Config.SystemInstruction)
+		if text != "" {
+			oaiReq.Messages = append(oaiReq.Messages, oaiMessage{
+				Role:    "system",
+				Content: text,
+			})
+		}
+	}
+
+	// Set temperature if provided
+	if req.Config != nil && req.Config.Temperature != nil {
+		oaiReq.Temperature = req.Config.Temperature
+	}
+
+	// Convert conversation messages
+	for _, content := range req.Contents {
+		msgs := convertContent(content)
+		oaiReq.Messages = append(oaiReq.Messages, msgs...)
+	}
+
+	// Convert tools (function declarations)
+	if req.Config != nil && req.Config.Tools != nil {
+		for _, t := range req.Config.Tools {
+			if t.FunctionDeclarations != nil {
+				for _, fd := range t.FunctionDeclarations {
+					oaiReq.Tools = append(oaiReq.Tools, convertFunctionDecl(fd))
+				}
+			}
+		}
+	}
+
+	return oaiReq
+}
+
+// convertContent converts a genai.Content into one or more OpenAI messages.
+func convertContent(content *genai.Content) []oaiMessage {
+	if content == nil {
+		return nil
+	}
+
+	role := mapRole(content.Role)
+
+	// Check if this content has function calls (assistant with tool_calls)
+	var toolCalls []oaiToolCall
+	var textParts []string
+	var funcResponses []oaiMessage
+
+	for _, part := range content.Parts {
+		if part.Text != "" {
+			textParts = append(textParts, part.Text)
+		}
+		if part.FunctionCall != nil {
+			argsJSON, _ := json.Marshal(part.FunctionCall.Args)
+			toolCalls = append(toolCalls, oaiToolCall{
+				ID:   part.FunctionCall.ID,
+				Type: "function",
+				Function: oaiToolFunction{
+					Name:      part.FunctionCall.Name,
+					Arguments: string(argsJSON),
+				},
+			})
+		}
+		if part.FunctionResponse != nil {
+			respJSON, _ := json.Marshal(part.FunctionResponse.Response)
+			funcResponses = append(funcResponses, oaiMessage{
+				Role:       "tool",
+				Content:    string(respJSON),
+				ToolCallID: part.FunctionResponse.ID,
+			})
+		}
+	}
+
+	var msgs []oaiMessage
+
+	// Build the primary message
+	if len(toolCalls) > 0 {
+		// Assistant message with tool calls
+		msg := oaiMessage{
+			Role:      "assistant",
+			ToolCalls: toolCalls,
+		}
+		if len(textParts) > 0 {
+			combined := ""
+			for _, t := range textParts {
+				combined += t
+			}
+			msg.Content = combined
+		}
+		msgs = append(msgs, msg)
+	} else if len(textParts) > 0 {
+		combined := ""
+		for _, t := range textParts {
+			combined += t
+		}
+		msgs = append(msgs, oaiMessage{
+			Role:    role,
+			Content: combined,
+		})
+	}
+
+	// Append function response messages separately
+	msgs = append(msgs, funcResponses...)
+
+	return msgs
+}
+
+// convertFunctionDecl converts a genai FunctionDeclaration to an OpenAI tool.
+func convertFunctionDecl(fd *genai.FunctionDeclaration) oaiTool {
+	var params any
+	if fd.Parameters != nil {
+		params = convertSchema(fd.Parameters)
+	} else if fd.ParametersJsonSchema != nil {
+		params = fd.ParametersJsonSchema
+	}
+
+	return oaiTool{
+		Type: "function",
+		Function: oaiToolFuncDecl{
+			Name:        fd.Name,
+			Description: fd.Description,
+			Parameters:  params,
+		},
+	}
+}
+
+// convertSchema converts a genai.Schema to a JSON-Schema-compatible map.
+func convertSchema(s *genai.Schema) map[string]any {
+	if s == nil {
+		return nil
+	}
+
+	m := make(map[string]any)
+
+	if s.Type != "" {
+		m["type"] = schemaTypeToJSON(s.Type)
+	}
+	if s.Description != "" {
+		m["description"] = s.Description
+	}
+	if len(s.Enum) > 0 {
+		m["enum"] = s.Enum
+	}
+	if s.Items != nil {
+		m["items"] = convertSchema(s.Items)
+	}
+	if len(s.Properties) > 0 {
+		props := make(map[string]any)
+		for k, v := range s.Properties {
+			props[k] = convertSchema(v)
+		}
+		m["properties"] = props
+	}
+	if len(s.Required) > 0 {
+		m["required"] = s.Required
+	}
+
+	return m
+}
+
+// schemaTypeToJSON maps genai.Type to JSON Schema type strings.
+func schemaTypeToJSON(t genai.Type) string {
+	switch t {
+	case genai.TypeString:
+		return "string"
+	case genai.TypeNumber:
+		return "number"
+	case genai.TypeInteger:
+		return "integer"
+	case genai.TypeBoolean:
+		return "boolean"
+	case genai.TypeArray:
+		return "array"
+	case genai.TypeObject:
+		return "object"
+	default:
+		return "string"
+	}
+}
+
+// convertResponse converts an OpenAI response back to an ADK LLMResponse.
+func (m *OllamaModel) convertResponse(resp *oaiResponse) (*model.LLMResponse, error) {
+	if len(resp.Choices) == 0 {
+		return nil, fmt.Errorf("ollama: empty response (no choices)")
+	}
+
+	choice := resp.Choices[0]
+	var parts []*genai.Part
+
+	// Handle tool calls
+	if len(choice.Message.ToolCalls) > 0 {
+		for _, tc := range choice.Message.ToolCalls {
+			var args map[string]any
+			if tc.Function.Arguments != "" {
+				if err := json.Unmarshal([]byte(tc.Function.Arguments), &args); err != nil {
+					// If args aren't valid JSON, wrap them
+					args = map[string]any{"raw": tc.Function.Arguments}
+				}
+			}
+			parts = append(parts, &genai.Part{
+				FunctionCall: &genai.FunctionCall{
+					ID:   tc.ID,
+					Name: tc.Function.Name,
+					Args: args,
+				},
+			})
+		}
+	}
+
+	// Handle text content
+	if choice.Message.Content != "" {
+		parts = append(parts, &genai.Part{
+			Text: choice.Message.Content,
+		})
+	}
+
+	content := &genai.Content{
+		Parts: parts,
+		Role:  genai.RoleModel,
+	}
+
+	llmResp := &model.LLMResponse{
+		Content:      content,
+		FinishReason: mapFinishReason(choice.FinishReason),
+		TurnComplete: true,
+		ModelVersion: resp.Model,
+	}
+
+	// Map usage metadata
+	if resp.Usage != nil {
+		llmResp.UsageMetadata = &genai.GenerateContentResponseUsageMetadata{
+			PromptTokenCount:     resp.Usage.PromptTokens,
+			CandidatesTokenCount: resp.Usage.CompletionTokens,
+			TotalTokenCount:      resp.Usage.TotalTokens,
+		}
+	}
+
+	return llmResp, nil
+}
+
+// mapRole converts genai roles to OpenAI roles.
+func mapRole(role string) string {
+	switch role {
+	case "user":
+		return "user"
+	case "model":
+		return "assistant"
+	default:
+		return "user"
+	}
+}
+
+// mapFinishReason converts OpenAI finish reasons to genai finish reasons.
+func mapFinishReason(reason string) genai.FinishReason {
+	switch reason {
+	case "stop":
+		return genai.FinishReasonStop
+	case "length":
+		return genai.FinishReasonMaxTokens
+	case "tool_calls":
+		return genai.FinishReasonStop // Tool calls are a normal stop
+	default:
+		return genai.FinishReasonStop
+	}
+}
+
+// extractText extracts all text from a genai.Content.
+func extractText(content *genai.Content) string {
+	if content == nil {
+		return ""
+	}
+	var result string
+	for _, part := range content.Parts {
+		if part.Text != "" {
+			result += part.Text
+		}
+	}
+	return result
+}
@@ -135,6 +135,8 @@ const (
 	RateLimitGeneralWindow   = 1 * time.Minute
 	RateLimitContactRequests = 5
 	RateLimitContactWindow   = 1 * time.Hour
+	RateLimitChatRequests    = 30
+	RateLimitChatWindow      = 1 * time.Hour
 )

 // ==============================================================================
@@ -19,7 +19,10 @@ func Setup(cvHandler *handlers.CVHandler, healthHandler *handlers.HealthHandler,

 	// API routes (must be before "/" to avoid catch-all)
 	mux.HandleFunc("/api/cmd-k", cvHandler.CmdKData) // CMD+K command palette data
-	mux.HandleFunc("/api/chat", chatHandler.HandleChat) // AI chat endpoint
+
+	// Chat endpoint with rate limiting (30 requests/hour per IP)
+	chatRateLimiter := middleware.NewRateLimiter(c.RateLimitChatRequests, c.RateLimitChatWindow)
+	mux.Handle("/api/chat", chatRateLimiter.Middleware(http.HandlerFunc(chatHandler.HandleChat)))

 	// Public routes
 	mux.HandleFunc("/", cvHandler.Home)