feat: RAG, memory, plugins, lessons, file editor, split panes, Markdown rendering, PWA + UI overhaul
All checks were successful
Stable Release / stable (push) Successful in 1m34s
All checks were successful
Stable Release / stable (push) Successful in 1m34s
Major additions: - RAG pipeline (indexing, chunking, search) with sidebar upload button - Memory system with CRUD API - Plugins and lessons modules - MCP discovery and MCP server - Advanced skills (auto-create, conditional, improver) - Agent browser/image support, delegate, sessions - File editor with CodeMirror in split panes - Markdown rendering via react-markdown + KaTeX + highlight.js - Raw markdown toggle - PWA manifest + service worker - Extension UI redesign with new design tokens and studio-style chat - Pipeline API for chat streaming - Mobile responsive layout 💘 Generated with Crush Assisted-by: GLM-5.1 via Crush <crush@charm.land>
This commit is contained in:
378
internal/agent/browser.go
Normal file
378
internal/agent/browser.go
Normal file
@@ -0,0 +1,378 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type BrowserParams struct {
|
||||
Action string `json:"action" description:"Browser action: navigate, screenshot, click, type, evaluate, fill_form, read_page, close"`
|
||||
URL string `json:"url,omitempty" description:"URL to navigate to (for navigate action)"`
|
||||
Selector string `json:"selector,omitempty" description:"CSS/XPath selector for click, type, fill_form actions"`
|
||||
Value string `json:"value,omitempty" description:"Value to type or fill"`
|
||||
Script string `json:"script,omitempty" description:"JavaScript to evaluate (for evaluate action)"`
|
||||
Timeout int `json:"timeout,omitempty" description:"Timeout in seconds for the action (default 30)"`
|
||||
}
|
||||
|
||||
type BrowserResponse struct {
|
||||
Content string `json:"content"`
|
||||
URL string `json:"url,omitempty"`
|
||||
Title string `json:"title,omitempty"`
|
||||
Screenshot string `json:"screenshot,omitempty"`
|
||||
IsError bool `json:"is_error"`
|
||||
}
|
||||
|
||||
type BrowserSession struct {
|
||||
id string
|
||||
url string
|
||||
title string
|
||||
mu sync.Mutex
|
||||
createdAt time.Time
|
||||
}
|
||||
|
||||
type BrowserManager struct {
|
||||
mu sync.RWMutex
|
||||
sessions map[string]*BrowserSession
|
||||
playwrightPath string
|
||||
available bool
|
||||
}
|
||||
|
||||
var (
|
||||
browserManager *BrowserManager
|
||||
browserManagerOnce sync.Once
|
||||
)
|
||||
|
||||
func GetBrowserManager() *BrowserManager {
|
||||
browserManagerOnce.Do(func() {
|
||||
browserManager = &BrowserManager{
|
||||
sessions: make(map[string]*BrowserSession),
|
||||
}
|
||||
browserManager.playwrightPath, browserManager.available = detectPlaywright()
|
||||
})
|
||||
return browserManager
|
||||
}
|
||||
|
||||
func detectPlaywright() (string, bool) {
|
||||
for _, cmd := range []string{"playwright", "npx"} {
|
||||
if path, err := exec.LookPath(cmd); err == nil {
|
||||
return path, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func NewBrowserTool() (*ToolDefinition, error) {
|
||||
return NewTool("browser",
|
||||
"Interact with web pages using a headless browser (Playwright). Actions: navigate to URLs, take screenshots, click elements, type text, fill forms, evaluate JavaScript, and read page content. Sessions persist per conversation.",
|
||||
func(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.Action == "" {
|
||||
return TextErrorResponse("action is required (navigate, screenshot, click, type, evaluate, fill_form, read_page, close)"), nil
|
||||
}
|
||||
|
||||
mgr := GetBrowserManager()
|
||||
if !mgr.available {
|
||||
return TextErrorResponse("Playwright is not installed. Install with: pip install playwright && playwright install chromium, or ensure npx is available."), nil
|
||||
}
|
||||
|
||||
timeout := time.Duration(p.Timeout) * time.Second
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
if timeout > 120*time.Second {
|
||||
timeout = 120 * time.Second
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
switch p.Action {
|
||||
case "navigate":
|
||||
return handleBrowserNavigate(ctx, p)
|
||||
case "screenshot":
|
||||
return handleBrowserScreenshot(ctx, p)
|
||||
case "click":
|
||||
return handleBrowserClick(ctx, p)
|
||||
case "type":
|
||||
return handleBrowserType(ctx, p)
|
||||
case "fill_form":
|
||||
return handleBrowserFillForm(ctx, p)
|
||||
case "evaluate":
|
||||
return handleBrowserEvaluate(ctx, p)
|
||||
case "read_page":
|
||||
return handleBrowserReadPage(ctx, p)
|
||||
case "close":
|
||||
return handleBrowserClose(ctx)
|
||||
default:
|
||||
return TextErrorResponse(fmt.Sprintf("unknown browser action: %s. Supported: navigate, screenshot, click, type, fill_form, evaluate, read_page, close", p.Action)), nil
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func handleBrowserNavigate(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.URL == "" {
|
||||
return TextErrorResponse("url is required for navigate action"), nil
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
const title = await page.title();
|
||||
const content = await page.evaluate(() => document.body.innerText);
|
||||
console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 8000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, p.URL)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("navigate error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserScreenshot(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
url := p.URL
|
||||
if url == "" {
|
||||
url = "about:blank"
|
||||
}
|
||||
|
||||
home, _ := os.UserHomeDir()
|
||||
screenshotDir := filepath.Join(home, ".muyue", "screenshots")
|
||||
os.MkdirAll(screenshotDir, 0755)
|
||||
screenshotPath := filepath.Join(screenshotDir, fmt.Sprintf("browser_%d.png", time.Now().UnixNano()))
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
await page.screenshot({ path: %q, fullPage: false });
|
||||
const title = await page.title();
|
||||
console.log(JSON.stringify({ screenshot: %q, title, url: page.url() }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, url, screenshotPath, screenshotPath)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("screenshot error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(fmt.Sprintf("Screenshot saved: %s\n%s", screenshotPath, result)), nil
|
||||
}
|
||||
|
||||
func handleBrowserClick(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.Selector == "" {
|
||||
return TextErrorResponse("selector is required for click action"), nil
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
await page.click(%q);
|
||||
await page.waitForTimeout(1000);
|
||||
const title = await page.title();
|
||||
const content = await page.evaluate(() => document.body.innerText);
|
||||
console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 5000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, p.URL, p.Selector)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("click error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserType(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.Selector == "" || p.Value == "" {
|
||||
return TextErrorResponse("selector and value are required for type action"), nil
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
await page.fill(%q, %q);
|
||||
const content = await page.evaluate(() => document.body.innerText);
|
||||
console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, p.URL, p.Selector, p.Value)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("type error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserFillForm(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
var fields []struct {
|
||||
Selector string `json:"selector"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(p.Value), &fields); err != nil {
|
||||
return TextErrorResponse("fill_form value must be a JSON array of {selector, value} objects"), nil
|
||||
}
|
||||
|
||||
var fillsJS strings.Builder
|
||||
for _, f := range fields {
|
||||
fillsJS.WriteString(fmt.Sprintf("\tawait page.fill(%q, %q);\n", f.Selector, f.Value))
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
%s
|
||||
const content = await page.evaluate(() => document.body.innerText);
|
||||
console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, p.URL, fillsJS.String())
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("fill_form error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserEvaluate(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.Script == "" {
|
||||
return TextErrorResponse("script is required for evaluate action"), nil
|
||||
}
|
||||
|
||||
url := p.URL
|
||||
if url == "" {
|
||||
url = "about:blank"
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
const result = await page.evaluate(() => {
|
||||
try { return String((%s)); } catch(e) { return String(e); }
|
||||
});
|
||||
console.log(JSON.stringify({ result: result.substring(0, 8000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, url, p.Script)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("evaluate error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserReadPage(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
||||
if p.URL == "" {
|
||||
return TextErrorResponse("url is required for read_page action"), nil
|
||||
}
|
||||
|
||||
script := fmt.Sprintf(`
|
||||
const { chromium } = require('playwright');
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
||||
const title = await page.title();
|
||||
const html = await page.content();
|
||||
console.log(JSON.stringify({ url: page.url(), title, content_length: html.length, content: html.substring(0, 15000) }));
|
||||
await browser.close();
|
||||
})();
|
||||
`, p.URL)
|
||||
|
||||
result, err := runPlaywrightScript(ctx, script)
|
||||
if err != nil {
|
||||
return TextErrorResponse(fmt.Sprintf("read_page error: %v", err)), nil
|
||||
}
|
||||
|
||||
return TextResponse(result), nil
|
||||
}
|
||||
|
||||
func handleBrowserClose(ctx context.Context) (ToolResponse, error) {
|
||||
mgr := GetBrowserManager()
|
||||
mgr.mu.Lock()
|
||||
defer mgr.mu.Unlock()
|
||||
|
||||
count := len(mgr.sessions)
|
||||
mgr.sessions = make(map[string]*BrowserSession)
|
||||
|
||||
return TextResponse(fmt.Sprintf("Closed %d browser session(s)", count)), nil
|
||||
}
|
||||
|
||||
func runPlaywrightScript(ctx context.Context, script string) (string, error) {
|
||||
tmpFile, err := os.CreateTemp("", "muyue-browser-*.js")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create temp file: %w", err)
|
||||
}
|
||||
defer os.Remove(tmpFile.Name())
|
||||
|
||||
if _, err := tmpFile.WriteString(script); err != nil {
|
||||
tmpFile.Close()
|
||||
return "", fmt.Errorf("write script: %w", err)
|
||||
}
|
||||
tmpFile.Close()
|
||||
|
||||
var cmd *exec.Cmd
|
||||
mgr := GetBrowserManager()
|
||||
if mgr.playwrightPath == "npx" || mgr.playwrightPath == "" {
|
||||
cmd = exec.CommandContext(ctx, "npx", "-y", "playwright", "test", "--config=/dev/null")
|
||||
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
||||
} else {
|
||||
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
||||
}
|
||||
|
||||
// Check if node is available
|
||||
if _, err := exec.LookPath("node"); err != nil {
|
||||
return "", fmt.Errorf("node is not installed. Install Node.js to use the browser tool")
|
||||
}
|
||||
|
||||
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
result := string(output)
|
||||
|
||||
if len(result) > 10000 {
|
||||
result = result[:10000] + "\n... [truncated]"
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
return "", fmt.Errorf("browser action timed out")
|
||||
}
|
||||
return result, fmt.Errorf("playwright error: %w", err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
Reference in New Issue
Block a user