All checks were successful
Stable Release / stable (push) Successful in 1m34s
Major additions: - RAG pipeline (indexing, chunking, search) with sidebar upload button - Memory system with CRUD API - Plugins and lessons modules - MCP discovery and MCP server - Advanced skills (auto-create, conditional, improver) - Agent browser/image support, delegate, sessions - File editor with CodeMirror in split panes - Markdown rendering via react-markdown + KaTeX + highlight.js - Raw markdown toggle - PWA manifest + service worker - Extension UI redesign with new design tokens and studio-style chat - Pipeline API for chat streaming - Mobile responsive layout 💘 Generated with Crush Assisted-by: GLM-5.1 via Crush <crush@charm.land>
379 lines
12 KiB
Go
379 lines
12 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type BrowserParams struct {
|
|
Action string `json:"action" description:"Browser action: navigate, screenshot, click, type, evaluate, fill_form, read_page, close"`
|
|
URL string `json:"url,omitempty" description:"URL to navigate to (for navigate action)"`
|
|
Selector string `json:"selector,omitempty" description:"CSS/XPath selector for click, type, fill_form actions"`
|
|
Value string `json:"value,omitempty" description:"Value to type or fill"`
|
|
Script string `json:"script,omitempty" description:"JavaScript to evaluate (for evaluate action)"`
|
|
Timeout int `json:"timeout,omitempty" description:"Timeout in seconds for the action (default 30)"`
|
|
}
|
|
|
|
type BrowserResponse struct {
|
|
Content string `json:"content"`
|
|
URL string `json:"url,omitempty"`
|
|
Title string `json:"title,omitempty"`
|
|
Screenshot string `json:"screenshot,omitempty"`
|
|
IsError bool `json:"is_error"`
|
|
}
|
|
|
|
type BrowserSession struct {
|
|
id string
|
|
url string
|
|
title string
|
|
mu sync.Mutex
|
|
createdAt time.Time
|
|
}
|
|
|
|
type BrowserManager struct {
|
|
mu sync.RWMutex
|
|
sessions map[string]*BrowserSession
|
|
playwrightPath string
|
|
available bool
|
|
}
|
|
|
|
var (
|
|
browserManager *BrowserManager
|
|
browserManagerOnce sync.Once
|
|
)
|
|
|
|
func GetBrowserManager() *BrowserManager {
|
|
browserManagerOnce.Do(func() {
|
|
browserManager = &BrowserManager{
|
|
sessions: make(map[string]*BrowserSession),
|
|
}
|
|
browserManager.playwrightPath, browserManager.available = detectPlaywright()
|
|
})
|
|
return browserManager
|
|
}
|
|
|
|
func detectPlaywright() (string, bool) {
|
|
for _, cmd := range []string{"playwright", "npx"} {
|
|
if path, err := exec.LookPath(cmd); err == nil {
|
|
return path, true
|
|
}
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
func NewBrowserTool() (*ToolDefinition, error) {
|
|
return NewTool("browser",
|
|
"Interact with web pages using a headless browser (Playwright). Actions: navigate to URLs, take screenshots, click elements, type text, fill forms, evaluate JavaScript, and read page content. Sessions persist per conversation.",
|
|
func(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.Action == "" {
|
|
return TextErrorResponse("action is required (navigate, screenshot, click, type, evaluate, fill_form, read_page, close)"), nil
|
|
}
|
|
|
|
mgr := GetBrowserManager()
|
|
if !mgr.available {
|
|
return TextErrorResponse("Playwright is not installed. Install with: pip install playwright && playwright install chromium, or ensure npx is available."), nil
|
|
}
|
|
|
|
timeout := time.Duration(p.Timeout) * time.Second
|
|
if timeout == 0 {
|
|
timeout = 30 * time.Second
|
|
}
|
|
if timeout > 120*time.Second {
|
|
timeout = 120 * time.Second
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
defer cancel()
|
|
|
|
switch p.Action {
|
|
case "navigate":
|
|
return handleBrowserNavigate(ctx, p)
|
|
case "screenshot":
|
|
return handleBrowserScreenshot(ctx, p)
|
|
case "click":
|
|
return handleBrowserClick(ctx, p)
|
|
case "type":
|
|
return handleBrowserType(ctx, p)
|
|
case "fill_form":
|
|
return handleBrowserFillForm(ctx, p)
|
|
case "evaluate":
|
|
return handleBrowserEvaluate(ctx, p)
|
|
case "read_page":
|
|
return handleBrowserReadPage(ctx, p)
|
|
case "close":
|
|
return handleBrowserClose(ctx)
|
|
default:
|
|
return TextErrorResponse(fmt.Sprintf("unknown browser action: %s. Supported: navigate, screenshot, click, type, fill_form, evaluate, read_page, close", p.Action)), nil
|
|
}
|
|
})
|
|
}
|
|
|
|
func handleBrowserNavigate(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.URL == "" {
|
|
return TextErrorResponse("url is required for navigate action"), nil
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
const title = await page.title();
|
|
const content = await page.evaluate(() => document.body.innerText);
|
|
console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 8000) }));
|
|
await browser.close();
|
|
})();
|
|
`, p.URL)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("navigate error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserScreenshot(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
url := p.URL
|
|
if url == "" {
|
|
url = "about:blank"
|
|
}
|
|
|
|
home, _ := os.UserHomeDir()
|
|
screenshotDir := filepath.Join(home, ".muyue", "screenshots")
|
|
os.MkdirAll(screenshotDir, 0755)
|
|
screenshotPath := filepath.Join(screenshotDir, fmt.Sprintf("browser_%d.png", time.Now().UnixNano()))
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
await page.screenshot({ path: %q, fullPage: false });
|
|
const title = await page.title();
|
|
console.log(JSON.stringify({ screenshot: %q, title, url: page.url() }));
|
|
await browser.close();
|
|
})();
|
|
`, url, screenshotPath, screenshotPath)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("screenshot error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(fmt.Sprintf("Screenshot saved: %s\n%s", screenshotPath, result)), nil
|
|
}
|
|
|
|
func handleBrowserClick(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.Selector == "" {
|
|
return TextErrorResponse("selector is required for click action"), nil
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
await page.click(%q);
|
|
await page.waitForTimeout(1000);
|
|
const title = await page.title();
|
|
const content = await page.evaluate(() => document.body.innerText);
|
|
console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 5000) }));
|
|
await browser.close();
|
|
})();
|
|
`, p.URL, p.Selector)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("click error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserType(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.Selector == "" || p.Value == "" {
|
|
return TextErrorResponse("selector and value are required for type action"), nil
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
await page.fill(%q, %q);
|
|
const content = await page.evaluate(() => document.body.innerText);
|
|
console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) }));
|
|
await browser.close();
|
|
})();
|
|
`, p.URL, p.Selector, p.Value)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("type error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserFillForm(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
var fields []struct {
|
|
Selector string `json:"selector"`
|
|
Value string `json:"value"`
|
|
}
|
|
if err := json.Unmarshal([]byte(p.Value), &fields); err != nil {
|
|
return TextErrorResponse("fill_form value must be a JSON array of {selector, value} objects"), nil
|
|
}
|
|
|
|
var fillsJS strings.Builder
|
|
for _, f := range fields {
|
|
fillsJS.WriteString(fmt.Sprintf("\tawait page.fill(%q, %q);\n", f.Selector, f.Value))
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
%s
|
|
const content = await page.evaluate(() => document.body.innerText);
|
|
console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) }));
|
|
await browser.close();
|
|
})();
|
|
`, p.URL, fillsJS.String())
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("fill_form error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserEvaluate(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.Script == "" {
|
|
return TextErrorResponse("script is required for evaluate action"), nil
|
|
}
|
|
|
|
url := p.URL
|
|
if url == "" {
|
|
url = "about:blank"
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
const result = await page.evaluate(() => {
|
|
try { return String((%s)); } catch(e) { return String(e); }
|
|
});
|
|
console.log(JSON.stringify({ result: result.substring(0, 8000) }));
|
|
await browser.close();
|
|
})();
|
|
`, url, p.Script)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("evaluate error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserReadPage(ctx context.Context, p BrowserParams) (ToolResponse, error) {
|
|
if p.URL == "" {
|
|
return TextErrorResponse("url is required for read_page action"), nil
|
|
}
|
|
|
|
script := fmt.Sprintf(`
|
|
const { chromium } = require('playwright');
|
|
(async () => {
|
|
const browser = await chromium.launch({ headless: true });
|
|
const page = await browser.newPage();
|
|
await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
const title = await page.title();
|
|
const html = await page.content();
|
|
console.log(JSON.stringify({ url: page.url(), title, content_length: html.length, content: html.substring(0, 15000) }));
|
|
await browser.close();
|
|
})();
|
|
`, p.URL)
|
|
|
|
result, err := runPlaywrightScript(ctx, script)
|
|
if err != nil {
|
|
return TextErrorResponse(fmt.Sprintf("read_page error: %v", err)), nil
|
|
}
|
|
|
|
return TextResponse(result), nil
|
|
}
|
|
|
|
func handleBrowserClose(ctx context.Context) (ToolResponse, error) {
|
|
mgr := GetBrowserManager()
|
|
mgr.mu.Lock()
|
|
defer mgr.mu.Unlock()
|
|
|
|
count := len(mgr.sessions)
|
|
mgr.sessions = make(map[string]*BrowserSession)
|
|
|
|
return TextResponse(fmt.Sprintf("Closed %d browser session(s)", count)), nil
|
|
}
|
|
|
|
func runPlaywrightScript(ctx context.Context, script string) (string, error) {
|
|
tmpFile, err := os.CreateTemp("", "muyue-browser-*.js")
|
|
if err != nil {
|
|
return "", fmt.Errorf("create temp file: %w", err)
|
|
}
|
|
defer os.Remove(tmpFile.Name())
|
|
|
|
if _, err := tmpFile.WriteString(script); err != nil {
|
|
tmpFile.Close()
|
|
return "", fmt.Errorf("write script: %w", err)
|
|
}
|
|
tmpFile.Close()
|
|
|
|
var cmd *exec.Cmd
|
|
mgr := GetBrowserManager()
|
|
if mgr.playwrightPath == "npx" || mgr.playwrightPath == "" {
|
|
cmd = exec.CommandContext(ctx, "npx", "-y", "playwright", "test", "--config=/dev/null")
|
|
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
|
} else {
|
|
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
|
}
|
|
|
|
// Check if node is available
|
|
if _, err := exec.LookPath("node"); err != nil {
|
|
return "", fmt.Errorf("node is not installed. Install Node.js to use the browser tool")
|
|
}
|
|
|
|
cmd = exec.CommandContext(ctx, "node", tmpFile.Name())
|
|
|
|
output, err := cmd.CombinedOutput()
|
|
result := string(output)
|
|
|
|
if len(result) > 10000 {
|
|
result = result[:10000] + "\n... [truncated]"
|
|
}
|
|
|
|
if err != nil {
|
|
if ctx.Err() == context.DeadlineExceeded {
|
|
return "", fmt.Errorf("browser action timed out")
|
|
}
|
|
return result, fmt.Errorf("playwright error: %w", err)
|
|
}
|
|
|
|
return result, nil
|
|
}
|