package api import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "os" "path/filepath" "regexp" "strings" "time" "unicode/utf8" "github.com/muyue/muyue/internal/agent" "github.com/muyue/muyue/internal/orchestrator" "github.com/muyue/muyue/internal/platform" ) var thinkingTagRegex = regexp.MustCompile(`(?s)<[Tt]hink[^>]*>.*?`) var fileMentionRegex = regexp.MustCompile(`@(\S+\.[a-zA-Z0-9]+)`) type ImageAttachment struct { Data string `json:"data"` Filename string `json:"filename"` MimeType string `json:"mime_type"` } func resolveFileMentions(text string) string { return fileMentionRegex.ReplaceAllStringFunc(text, func(match string) string { filePath := match[1:] if strings.HasPrefix(filePath, "~/") { if home, err := os.UserHomeDir(); err == nil { filePath = filepath.Join(home, filePath[2:]) } } if !filepath.IsAbs(filePath) { if home, err := os.UserHomeDir(); err == nil { filePath = filepath.Join(home, filePath) } } data, err := os.ReadFile(filePath) if err != nil { return match + fmt.Sprintf(" (erreur: fichier non trouve)") } content := string(data) if len(content) > 50000 { content = content[:50000] + "\n... (tronque a 50Ko)" } return fmt.Sprintf("[Fichier: %s]\n%s\n[Fin du fichier: %s]", filepath.Base(filePath), content, filepath.Base(filePath)) }) } var vlmClient = &http.Client{Timeout: 60 * time.Second} func (s *Server) describeImages(images []ImageAttachment) []string { var apiKey string for i := range s.config.AI.Providers { if s.config.AI.Providers[i].Active { apiKey = s.config.AI.Providers[i].APIKey break } } if apiKey == "" { return nil } descriptions := make([]string, 0, len(images)) for _, img := range images { desc, err := s.callVLM(apiKey, img) if err != nil { descriptions = append(descriptions, fmt.Sprintf("(description unavailable: %v)", err)) } else { descriptions = append(descriptions, desc) } } return descriptions } func (s *Server) callVLM(apiKey string, img ImageAttachment) (string, error) { payload := map[string]string{ "prompt": "Describe this image in detail. Include all text, UI elements, code, diagrams, or data visible. Be thorough and specific.", "image_url": img.Data, } body, err := json.Marshal(payload) if err != nil { return "", fmt.Errorf("marshal vlm request: %w", err) } ctx, cancel := context.WithTimeout(context.Background(), 55*time.Second) defer cancel() req, err := http.NewRequestWithContext(ctx, "POST", "https://api.minimax.io/v1/coding_plan/vlm", bytes.NewReader(body)) if err != nil { return "", fmt.Errorf("create vlm request: %w", err) } req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+apiKey) resp, err := vlmClient.Do(req) if err != nil { return "", fmt.Errorf("vlm request: %w", err) } defer resp.Body.Close() respBody, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("read vlm response: %w", err) } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("vlm API error (%d): %s", resp.StatusCode, string(respBody)) } var result struct { Content string `json:"content"` } if err := json.Unmarshal(respBody, &result); err != nil { return "", fmt.Errorf("parse vlm response: %w", err) } if result.Content == "" { return "(empty description)", nil } return result.Content, nil } func (s *Server) handleChat(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { writeError(w, "POST only", http.StatusMethodNotAllowed) return } r.Body = http.MaxBytesReader(w, r.Body, 50*1024*1024) var body struct { Message string `json:"message"` Stream bool `json:"stream"` Images []ImageAttachment `json:"images"` AdvancedReflection bool `json:"advanced_reflection"` } if err := json.NewDecoder(r.Body).Decode(&body); err != nil { writeError(w, err.Error(), http.StatusBadRequest) return } if body.Message == "" { writeError(w, "no message", http.StatusMethodNotAllowed) return } if len(body.Images) > 3 { writeError(w, "max 3 images", http.StatusBadRequest) return } enrichedMessage := resolveFileMentions(body.Message) var imageIDs []string if len(body.Images) > 0 { descriptions := s.describeImages(body.Images) var imgContext strings.Builder for i, desc := range descriptions { imgContext.WriteString(fmt.Sprintf("\n[Image %d (%s): %s]\n", i+1, body.Images[i].Filename, desc)) id, err := saveImage(body.Images[i].Data, body.Images[i].Filename, body.Images[i].MimeType) if err != nil { _ = err } else { imageIDs = append(imageIDs, id) } } enrichedMessage = imgContext.String() + enrichedMessage } displayMsg := body.Message if len(body.Images) > 0 { imgNames := make([]string, len(body.Images)) for i, img := range body.Images { imgNames[i] = img.Filename } displayMsg += " [" + strings.Join(imgNames, ", ") + "]" } if len(imageIDs) > 0 { s.convStore.AddWithImages("user", displayMsg, imageIDs) } else { s.convStore.Add("user", displayMsg) } if s.convStore.NeedsSummarization() { s.autoSummarize() } orb, err := orchestrator.New(s.config) if err != nil { writeError(w, err.Error(), http.StatusServiceUnavailable) return } var studioPrompt strings.Builder studioPrompt.WriteString(agent.StudioSystemPrompt()) sysInfo := platform.Detect() osName := sysInfo.OSName if osName == "" { osName = string(sysInfo.OS) } studioPrompt.WriteString(fmt.Sprintf("\nDate: %s\nHeure: %s\nSystème: %s\n", time.Now().Format("02/01/2006"), time.Now().Format("15:04:05"), osName)) canSudo := !agent.NeedsSudoPassword() studioPrompt.WriteString(fmt.Sprintf("Root: %t\n", !canSudo)) if !canSudo { studioPrompt.WriteString("⚠️ Session sans sudo sans mot de passe — les commandes sudo/doas nécessitent une autorisation. N'utilise PAS sudo ou doas sans demander.\n") } else { studioPrompt.WriteString("⚠️ Session avec privilèges sudo sans mot de passe — les commandes sudo s'exécuteront directement.\n") } orb.SetSystemPrompt(studioPrompt.String()) orb.SetTools(s.agentToolsJSON) if memBlock := s.buildMemoryContext(enrichedMessage); memBlock != "" { orb.AppendHistory(orchestrator.Message{ Role: "system", Content: orchestrator.TextContent(memBlock), }) } // Auto-force advanced reflection while a browser-test session is active: // the user is doing AI-driven UI testing, where having a second model // produce a preliminary report (when one is configured) materially // improves which clicks the active model decides to perform. The toggle // remains user-controllable for non-test conversations. wantReflection := body.AdvancedReflection if !wantReflection && s.browserTestStore != nil && len(s.browserTestStore.List()) > 0 { wantReflection = true } if wantReflection { if report, ok := s.runReflectionReport(enrichedMessage); ok { enrichedMessage = enrichedMessage + "\n\n[RAPPORT PRÉALABLE — produit par un autre modèle, à valider]\n" + report + "\n[/RAPPORT PRÉALABLE]" } } if body.Stream { s.handleStreamChat(w, orb, enrichedMessage) } else { s.handleNonStreamChat(w, orb, enrichedMessage) } } func (s *Server) handleStreamChat(w http.ResponseWriter, orb *orchestrator.Orchestrator, userMessage string) { SetupSSEHeaders(w) flusher, canFlush := w.(http.Flusher) sseWriter := NewSSEWriter(w) ctx := context.Background() messages := s.buildContextMessages(userMessage) engine := NewChatEngine(orb, s.agentRegistry, s.agentToolsJSON) engine.SetLimiter(s.AcquireAgentSlot) engine.OnChunk(func(data map[string]interface{}) { if data == nil { return } sseWriter.Write(data) if canFlush { flusher.Flush() } }) finalContent, allToolCalls, allToolResults, err := engine.RunWithTools(ctx, messages) if err != nil { sseWriter.Write(map[string]interface{}{"error": err.Error()}) return } storeContent := finalContent if len(allToolCalls) > 0 { storeObj := map[string]interface{}{ "content": storeContent, "tool_calls": allToolCalls, "tool_results": allToolResults, } storeJSON, _ := json.Marshal(storeObj) storeContent = string(storeJSON) } s.convStore.Add("assistant", storeContent) s.consumption.Record(engine.ProviderName(), engine.TotalTokens) sseWriter.Write(map[string]interface{}{"done": "true"}) } func (s *Server) handleNonStreamChat(w http.ResponseWriter, orb *orchestrator.Orchestrator, userMessage string) { ctx := context.Background() messages := s.buildContextMessages(userMessage) engine := NewChatEngine(orb, s.agentRegistry, s.agentToolsJSON) engine.SetLimiter(s.AcquireAgentSlot) finalContent, err := engine.RunNonStream(ctx, messages) if err != nil { writeError(w, err.Error(), http.StatusInternalServerError) return } s.convStore.Add("assistant", finalContent) s.consumption.Record(engine.ProviderName(), engine.TotalTokens) writeJSON(w, map[string]string{"content": finalContent}) } func cleanThinkingTags(content string) string { return strings.TrimSpace(thinkingTagRegex.ReplaceAllString(content, "")) } // runReflectionReport runs the inactive AI provider on the user message to // produce a preliminary analysis report that the active provider will then // use as additional context. Returns ("", false) if no inactive provider is // configured or on error — the caller falls back to a normal chat flow. func (s *Server) runReflectionReport(userMessage string) (string, bool) { orb, err := orchestrator.NewForInactiveProvider(s.config) if err != nil { return "", false } orb.SetSystemPrompt("Tu es un analyste. Pour la question ci-dessous, produis un rapport bref (max 15 lignes) qui : (1) reformule l'objectif de l'utilisateur, (2) liste les points à clarifier ou les risques, (3) suggère une approche structurée. Pas de code, pas d'action — uniquement de l'analyse.") resp, err := orb.SendNoTools(userMessage) if err != nil { return "", false } return strings.TrimSpace(resp), true } func (s *Server) buildContextMessages(userMessage string) []orchestrator.Message { history := s.convStore.Get() sysPromptTokens := utf8.RuneCountInString(agent.StudioSystemPrompt())/charsPerToken + 50 toolsTokens := utf8.RuneCountInString(string(s.agentToolsJSON)) / charsPerToken responseMargin := 4000 userMsgTokens := utf8.RuneCountInString(userMessage) / charsPerToken overhead := sysPromptTokens + toolsTokens + responseMargin + userMsgTokens available := contextWindowTokens - overhead if available < 1000 { available = 1000 } included := 0 tokensUsed := 0 for i := len(history) - 1; i >= 0; i-- { if history[i].Summarized { break } displayContent := extractDisplayContent(history[i].Role, history[i].Content) msgTokens := utf8.RuneCountInString(displayContent) / charsPerToken if msgTokens == 0 { msgTokens = 1 } if tokensUsed+msgTokens > available { break } tokensUsed += msgTokens included++ } start := len(history) - included if start < 0 { start = 0 } hasSummarized := false for i := 0; i < start; i++ { if history[i].Summarized { hasSummarized = true break } } if start > 0 { _ = start } messages := make([]orchestrator.Message, 0, included+2) summary := s.convStore.GetSummary() if summary != "" && (start > 0 || hasSummarized) { messages = append(messages, orchestrator.Message{ Role: "system", Content: orchestrator.TextContent("Résumé de la conversation précédente:\n" + summary), }) } for _, m := range history[start:] { if m.Role == "system" { continue } displayContent := extractDisplayContent(m.Role, m.Content) messages = append(messages, orchestrator.Message{ Role: m.Role, Content: orchestrator.TextContent(displayContent), }) } messages = append(messages, orchestrator.Message{ Role: "user", Content: orchestrator.TextContent(userMessage), }) return messages } func (s *Server) autoSummarize() { messages := s.convStore.Get() if len(messages) < 10 { return } half := len(messages) / 2 var oldText string for _, m := range messages[:half] { oldText += m.Role + ": " + m.Content + "\n\n" } summary := s.convStore.GetSummary() if summary != "" { oldText = "Résumé précédent:\n" + summary + "\n\nNouveaux échanges:\n" + oldText } orb, err := orchestrator.New(s.config) if err != nil { return } orb.SetSystemPrompt(summarizePrompt) result, err := orb.Send(oldText) if err != nil { return } s.convStore.SetSummary(result) s.convStore.MarkSummarized(half) } func (s *Server) handleChatHistory(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } messages := s.convStore.Get() writeJSON(w, map[string]interface{}{ "messages": messages, "tokens": s.convStore.ApproxTokenCount(), "max_tokens": contextWindowTokens, "summarize_at": int(float64(contextWindowTokens) * summarizeRatio), "summary": s.convStore.GetSummary(), }) } func (s *Server) handleChatClear(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { writeError(w, "POST only", http.StatusMethodNotAllowed) return } s.convStore.Clear() writeJSON(w, map[string]string{"status": "ok"}) } func (s *Server) handleChatSummarize(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { writeError(w, "POST only", http.StatusMethodNotAllowed) return } s.autoSummarize() writeJSON(w, map[string]interface{}{ "status": "ok", "tokens": s.convStore.ApproxTokenCount(), "summary": s.convStore.GetSummary(), }) }