package agent import ( "context" "encoding/json" "fmt" "os" "os/exec" "path/filepath" "strings" "sync" "time" ) type BrowserParams struct { Action string `json:"action" description:"Browser action: navigate, screenshot, click, type, evaluate, fill_form, read_page, close"` URL string `json:"url,omitempty" description:"URL to navigate to (for navigate action)"` Selector string `json:"selector,omitempty" description:"CSS/XPath selector for click, type, fill_form actions"` Value string `json:"value,omitempty" description:"Value to type or fill"` Script string `json:"script,omitempty" description:"JavaScript to evaluate (for evaluate action)"` Timeout int `json:"timeout,omitempty" description:"Timeout in seconds for the action (default 30)"` } type BrowserResponse struct { Content string `json:"content"` URL string `json:"url,omitempty"` Title string `json:"title,omitempty"` Screenshot string `json:"screenshot,omitempty"` IsError bool `json:"is_error"` } type BrowserSession struct { id string url string title string mu sync.Mutex createdAt time.Time } type BrowserManager struct { mu sync.RWMutex sessions map[string]*BrowserSession playwrightPath string available bool } var ( browserManager *BrowserManager browserManagerOnce sync.Once ) func GetBrowserManager() *BrowserManager { browserManagerOnce.Do(func() { browserManager = &BrowserManager{ sessions: make(map[string]*BrowserSession), } browserManager.playwrightPath, browserManager.available = detectPlaywright() }) return browserManager } func detectPlaywright() (string, bool) { for _, cmd := range []string{"playwright", "npx"} { if path, err := exec.LookPath(cmd); err == nil { return path, true } } return "", false } func NewBrowserTool() (*ToolDefinition, error) { return NewTool("browser", "Interact with web pages using a headless browser (Playwright). Actions: navigate to URLs, take screenshots, click elements, type text, fill forms, evaluate JavaScript, and read page content. Sessions persist per conversation.", func(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.Action == "" { return TextErrorResponse("action is required (navigate, screenshot, click, type, evaluate, fill_form, read_page, close)"), nil } mgr := GetBrowserManager() if !mgr.available { return TextErrorResponse("Playwright is not installed. Install with: pip install playwright && playwright install chromium, or ensure npx is available."), nil } timeout := time.Duration(p.Timeout) * time.Second if timeout == 0 { timeout = 30 * time.Second } if timeout > 120*time.Second { timeout = 120 * time.Second } ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() switch p.Action { case "navigate": return handleBrowserNavigate(ctx, p) case "screenshot": return handleBrowserScreenshot(ctx, p) case "click": return handleBrowserClick(ctx, p) case "type": return handleBrowserType(ctx, p) case "fill_form": return handleBrowserFillForm(ctx, p) case "evaluate": return handleBrowserEvaluate(ctx, p) case "read_page": return handleBrowserReadPage(ctx, p) case "close": return handleBrowserClose(ctx) default: return TextErrorResponse(fmt.Sprintf("unknown browser action: %s. Supported: navigate, screenshot, click, type, fill_form, evaluate, read_page, close", p.Action)), nil } }) } func handleBrowserNavigate(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.URL == "" { return TextErrorResponse("url is required for navigate action"), nil } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); const title = await page.title(); const content = await page.evaluate(() => document.body.innerText); console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 8000) })); await browser.close(); })(); `, p.URL) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("navigate error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserScreenshot(ctx context.Context, p BrowserParams) (ToolResponse, error) { url := p.URL if url == "" { url = "about:blank" } home, _ := os.UserHomeDir() screenshotDir := filepath.Join(home, ".muyue", "screenshots") os.MkdirAll(screenshotDir, 0755) screenshotPath := filepath.Join(screenshotDir, fmt.Sprintf("browser_%d.png", time.Now().UnixNano())) script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.screenshot({ path: %q, fullPage: false }); const title = await page.title(); console.log(JSON.stringify({ screenshot: %q, title, url: page.url() })); await browser.close(); })(); `, url, screenshotPath, screenshotPath) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("screenshot error: %v", err)), nil } return TextResponse(fmt.Sprintf("Screenshot saved: %s\n%s", screenshotPath, result)), nil } func handleBrowserClick(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.Selector == "" { return TextErrorResponse("selector is required for click action"), nil } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.click(%q); await page.waitForTimeout(1000); const title = await page.title(); const content = await page.evaluate(() => document.body.innerText); console.log(JSON.stringify({ url: page.url(), title, content: content.substring(0, 5000) })); await browser.close(); })(); `, p.URL, p.Selector) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("click error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserType(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.Selector == "" || p.Value == "" { return TextErrorResponse("selector and value are required for type action"), nil } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.fill(%q, %q); const content = await page.evaluate(() => document.body.innerText); console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) })); await browser.close(); })(); `, p.URL, p.Selector, p.Value) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("type error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserFillForm(ctx context.Context, p BrowserParams) (ToolResponse, error) { var fields []struct { Selector string `json:"selector"` Value string `json:"value"` } if err := json.Unmarshal([]byte(p.Value), &fields); err != nil { return TextErrorResponse("fill_form value must be a JSON array of {selector, value} objects"), nil } var fillsJS strings.Builder for _, f := range fields { fillsJS.WriteString(fmt.Sprintf("\tawait page.fill(%q, %q);\n", f.Selector, f.Value)) } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); %s const content = await page.evaluate(() => document.body.innerText); console.log(JSON.stringify({ url: page.url(), content: content.substring(0, 5000) })); await browser.close(); })(); `, p.URL, fillsJS.String()) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("fill_form error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserEvaluate(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.Script == "" { return TextErrorResponse("script is required for evaluate action"), nil } url := p.URL if url == "" { url = "about:blank" } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); const result = await page.evaluate(() => { try { return String((%s)); } catch(e) { return String(e); } }); console.log(JSON.stringify({ result: result.substring(0, 8000) })); await browser.close(); })(); `, url, p.Script) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("evaluate error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserReadPage(ctx context.Context, p BrowserParams) (ToolResponse, error) { if p.URL == "" { return TextErrorResponse("url is required for read_page action"), nil } script := fmt.Sprintf(` const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); await page.goto(%q, { waitUntil: 'domcontentloaded', timeout: 30000 }); const title = await page.title(); const html = await page.content(); console.log(JSON.stringify({ url: page.url(), title, content_length: html.length, content: html.substring(0, 15000) })); await browser.close(); })(); `, p.URL) result, err := runPlaywrightScript(ctx, script) if err != nil { return TextErrorResponse(fmt.Sprintf("read_page error: %v", err)), nil } return TextResponse(result), nil } func handleBrowserClose(ctx context.Context) (ToolResponse, error) { mgr := GetBrowserManager() mgr.mu.Lock() defer mgr.mu.Unlock() count := len(mgr.sessions) mgr.sessions = make(map[string]*BrowserSession) return TextResponse(fmt.Sprintf("Closed %d browser session(s)", count)), nil } func runPlaywrightScript(ctx context.Context, script string) (string, error) { tmpFile, err := os.CreateTemp("", "muyue-browser-*.js") if err != nil { return "", fmt.Errorf("create temp file: %w", err) } defer os.Remove(tmpFile.Name()) if _, err := tmpFile.WriteString(script); err != nil { tmpFile.Close() return "", fmt.Errorf("write script: %w", err) } tmpFile.Close() var cmd *exec.Cmd mgr := GetBrowserManager() if mgr.playwrightPath == "npx" || mgr.playwrightPath == "" { cmd = exec.CommandContext(ctx, "npx", "-y", "playwright", "test", "--config=/dev/null") cmd = exec.CommandContext(ctx, "node", tmpFile.Name()) } else { cmd = exec.CommandContext(ctx, "node", tmpFile.Name()) } // Check if node is available if _, err := exec.LookPath("node"); err != nil { return "", fmt.Errorf("node is not installed. Install Node.js to use the browser tool") } cmd = exec.CommandContext(ctx, "node", tmpFile.Name()) output, err := cmd.CombinedOutput() result := string(output) if len(result) > 10000 { result = result[:10000] + "\n... [truncated]" } if err != nil { if ctx.Err() == context.DeadlineExceeded { return "", fmt.Errorf("browser action timed out") } return result, fmt.Errorf("playwright error: %w", err) } return result, nil }