package api // Browser-test feature: an out-of-process page (the user's target tab) // connects to Muyue via WebSocket using a short-lived token, and exposes a // thin RPC: Studio's AI can list clickable elements, click them, evaluate JS, // read the recent console buffer, and observe what changes after each action. // // Threat model: an injected snippet runs in the user's chosen page only, with // the same origin as that page; the WS endpoint is bound to localhost and // gated by a 5-minute token issued by the local Muyue server. import ( "context" "crypto/rand" "encoding/base64" "encoding/hex" "encoding/json" "fmt" "net/http" "os" "strings" "sync" "time" "github.com/gorilla/websocket" "github.com/muyue/muyue/internal/agent" ) // thin os wrappers (kept here so saveScreenshot stays independent of any // existing helper file's evolution) func osUserHomeDir() (string, error) { return os.UserHomeDir() } func mkdirAll(p string, m os.FileMode) error { return os.MkdirAll(p, m) } func writeFile(p string, b []byte, m os.FileMode) error { return os.WriteFile(p, b, m) } func base64StdDecode(s string) ([]byte, error) { return base64.StdEncoding.DecodeString(s) } const ( // browserTestTokenTTL is a sliding window: every successful WS connect // using the token resets it. So the user re-pasting the snippet after a // page reload / navigation seamlessly resumes (same token, same session // continuation in the AI's view), as long as no more than this gap of // inactivity occurs. browserTestTokenTTL = 60 * time.Minute browserTestCommandTTL = 30 * time.Second browserTestConsoleMax = 200 browserTestSessionsMax = 16 ) // BrowserTestSession represents one connected browser tab. type BrowserTestSession struct { ID string URL string Title string conn *websocket.Conn mu sync.Mutex console []ConsoleEntry pending map[string]chan json.RawMessage pendingMu sync.Mutex connectedAt time.Time writeMu sync.Mutex } // ConsoleEntry is a captured console message from the connected page. type ConsoleEntry struct { Level string `json:"level"` // log, info, warn, error, debug Message string `json:"message"` Time string `json:"time"` } // BrowserTestStore manages active sessions + pending one-shot connect tokens. type BrowserTestStore struct { mu sync.RWMutex sessions map[string]*BrowserTestSession tokens map[string]time.Time tokensMu sync.Mutex } func NewBrowserTestStore() *BrowserTestStore { return &BrowserTestStore{ sessions: map[string]*BrowserTestSession{}, tokens: map[string]time.Time{}, } } // IssueToken creates a single-use token used by the snippet to authenticate. func (s *BrowserTestStore) IssueToken() string { buf := make([]byte, 16) if _, err := rand.Read(buf); err != nil { return fmt.Sprintf("fallback-%d", time.Now().UnixNano()) } tok := hex.EncodeToString(buf) s.tokensMu.Lock() now := time.Now() for k, v := range s.tokens { if now.Sub(v) > browserTestTokenTTL { delete(s.tokens, k) } } s.tokens[tok] = now s.tokensMu.Unlock() return tok } // ConsumeToken validates a token. Tokens are no longer single-use: // the test snippet re-establishes the WS after every page reload / // navigation, so the same token must work multiple times. We slide the // expiration on each successful use so a long active test session keeps // the token alive. func (s *BrowserTestStore) ConsumeToken(tok string) bool { s.tokensMu.Lock() defer s.tokensMu.Unlock() t, ok := s.tokens[tok] if !ok { return false } if time.Since(t) > browserTestTokenTTL { delete(s.tokens, tok) return false } s.tokens[tok] = time.Now() // sliding refresh return true } // Register inserts a new session, evicting the oldest if at capacity. func (s *BrowserTestStore) Register(session *BrowserTestSession) { s.mu.Lock() defer s.mu.Unlock() if len(s.sessions) >= browserTestSessionsMax { var oldestID string var oldest time.Time for id, sess := range s.sessions { if oldestID == "" || sess.connectedAt.Before(oldest) { oldestID = id oldest = sess.connectedAt } } if old, ok := s.sessions[oldestID]; ok { old.conn.Close() delete(s.sessions, oldestID) } } s.sessions[session.ID] = session } func (s *BrowserTestStore) Remove(id string) { s.mu.Lock() defer s.mu.Unlock() if sess, ok := s.sessions[id]; ok { sess.conn.Close() delete(s.sessions, id) } } func (s *BrowserTestStore) Get(id string) *BrowserTestSession { s.mu.RLock() defer s.mu.RUnlock() return s.sessions[id] } // Pick returns the requested session by ID, or the most-recently-connected // session if id is empty. Returns nil if no session matches. func (s *BrowserTestStore) Pick(id string) *BrowserTestSession { s.mu.RLock() defer s.mu.RUnlock() if id != "" { return s.sessions[id] } var picked *BrowserTestSession for _, sess := range s.sessions { if picked == nil || sess.connectedAt.After(picked.connectedAt) { picked = sess } } return picked } func (s *BrowserTestStore) List() []map[string]interface{} { s.mu.RLock() defer s.mu.RUnlock() out := make([]map[string]interface{}, 0, len(s.sessions)) for _, sess := range s.sessions { out = append(out, map[string]interface{}{ "id": sess.ID, "url": sess.URL, "title": sess.Title, "connected_at": sess.connectedAt.Format(time.RFC3339), }) } return out } // Send issues an RPC command to the browser session and waits up to TTL for // the matching reply. Returns the raw payload or an error. func (sess *BrowserTestSession) Send(action string, params map[string]interface{}) (json.RawMessage, error) { cid := newCorrelationID() ch := make(chan json.RawMessage, 1) sess.pendingMu.Lock() sess.pending[cid] = ch sess.pendingMu.Unlock() defer func() { sess.pendingMu.Lock() delete(sess.pending, cid) sess.pendingMu.Unlock() }() cmd := map[string]interface{}{ "id": cid, "action": action, "params": params, } sess.writeMu.Lock() err := sess.conn.WriteJSON(cmd) sess.writeMu.Unlock() if err != nil { return nil, fmt.Errorf("write: %w", err) } select { case payload := <-ch: return payload, nil case <-time.After(browserTestCommandTTL): return nil, fmt.Errorf("browser session did not reply within %s", browserTestCommandTTL) } } // AppendConsole records a console line, trimming to the buffer cap. func (sess *BrowserTestSession) AppendConsole(level, message string) { sess.mu.Lock() defer sess.mu.Unlock() sess.console = append(sess.console, ConsoleEntry{ Level: level, Message: message, Time: time.Now().Format(time.RFC3339), }) if len(sess.console) > browserTestConsoleMax { sess.console = sess.console[len(sess.console)-browserTestConsoleMax:] } } // SnapshotConsole returns a copy of the current console buffer. func (sess *BrowserTestSession) SnapshotConsole() []ConsoleEntry { sess.mu.Lock() defer sess.mu.Unlock() out := make([]ConsoleEntry, len(sess.console)) copy(out, sess.console) return out } func newCorrelationID() string { buf := make([]byte, 8) rand.Read(buf) return hex.EncodeToString(buf) } // HTTP handlers -------------------------------------------------------------- func (s *Server) handleBrowserTestSnippet(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } tok := s.browserTestStore.IssueToken() host := r.Host if host == "" { host = "127.0.0.1" } scheme := "ws" if r.TLS != nil { scheme = "wss" } wsURL := fmt.Sprintf("%s://%s/api/ws/browser-test?token=%s", scheme, host, tok) snippet := buildBrowserTestSnippet(wsURL) writeJSON(w, map[string]interface{}{ "token": tok, "ws_url": wsURL, "snippet": snippet, "expires_in": int(browserTestTokenTTL / time.Second), }) } func (s *Server) handleBrowserTestSessions(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } writeJSON(w, map[string]interface{}{ "sessions": s.browserTestStore.List(), }) } func (s *Server) handleBrowserTestConsole(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } id := strings.TrimPrefix(r.URL.Path, "/api/test/console/") sess := s.browserTestStore.Pick(id) if sess == nil { writeError(w, "no active browser test session", http.StatusNotFound) return } writeJSON(w, map[string]interface{}{ "session_id": sess.ID, "url": sess.URL, "console": sess.SnapshotConsole(), }) } // browserTestUpgrader accepts any origin: the connection is gated by a // short-lived token issued to the local UI, not by Origin checking. var browserTestUpgrader = websocket.Upgrader{ CheckOrigin: func(r *http.Request) bool { return true }, } func (s *Server) handleBrowserTestWS(w http.ResponseWriter, r *http.Request) { tok := r.URL.Query().Get("token") if tok == "" || !s.browserTestStore.ConsumeToken(tok) { writeError(w, "invalid or expired token", http.StatusUnauthorized) return } conn, err := browserTestUpgrader.Upgrade(w, r, nil) if err != nil { return } conn.SetReadLimit(2 << 20) // Read the hello message: page sends {"type":"hello","url":"...","title":"..."}. conn.SetReadDeadline(time.Now().Add(10 * time.Second)) var hello struct { Type string `json:"type"` URL string `json:"url"` Title string `json:"title"` } if err := conn.ReadJSON(&hello); err != nil || hello.Type != "hello" { conn.WriteJSON(map[string]string{"type": "error", "message": "expected hello"}) conn.Close() return } conn.SetReadDeadline(time.Time{}) id := newCorrelationID() sess := &BrowserTestSession{ ID: id, URL: hello.URL, Title: hello.Title, conn: conn, pending: map[string]chan json.RawMessage{}, connectedAt: time.Now(), } s.browserTestStore.Register(sess) defer s.browserTestStore.Remove(id) // Acknowledge with the assigned session ID. sess.writeMu.Lock() conn.WriteJSON(map[string]string{"type": "registered", "session_id": id}) sess.writeMu.Unlock() for { _, raw, err := conn.ReadMessage() if err != nil { return } var msg struct { Type string `json:"type"` ID string `json:"id,omitempty"` Level string `json:"level,omitempty"` Text string `json:"text,omitempty"` URL string `json:"url,omitempty"` Data json.RawMessage `json:"data,omitempty"` } if err := json.Unmarshal(raw, &msg); err != nil { continue } switch msg.Type { case "console": sess.AppendConsole(msg.Level, msg.Text) case "url_change": sess.mu.Lock() sess.URL = msg.URL sess.mu.Unlock() case "reply": sess.pendingMu.Lock() ch, ok := sess.pending[msg.ID] sess.pendingMu.Unlock() if ok { select { case ch <- msg.Data: default: } } case "ping": sess.writeMu.Lock() conn.WriteJSON(map[string]string{"type": "pong"}) sess.writeMu.Unlock() } } } // Agent tool ----------------------------------------------------------------- // BrowserTestParams is the schema exposed to the AI for the browser_test tool. type BrowserTestParams struct { Action string `json:"action" description:"One of: list_clickables, click, eval, console, current_url, wait, type, summary, screenshot"` SessionID string `json:"session_id,omitempty" description:"Browser session id (optional, defaults to most recent)"` Selector string `json:"selector,omitempty" description:"CSS selector for click/type/screenshot actions (screenshot defaults to whole viewport when omitted)"` Index int `json:"index,omitempty" description:"Alternative to selector: index into the last list_clickables result (0-based)"` Expr string `json:"expr,omitempty" description:"JS expression to evaluate (eval action only)"` Text string `json:"text,omitempty" description:"Text to type (type action only)"` WaitMs int `json:"wait_ms,omitempty" description:"Milliseconds to wait (wait action only, max 5000)"` Tail int `json:"tail,omitempty" description:"Console action: how many recent lines to return (default 50, max 200)"` Filename string `json:"filename,omitempty" description:"Screenshot action: optional file name (no path, no extension); defaults to a timestamp"` } // RegisterBrowserTestTool wires the agent tool against a session store. func RegisterBrowserTestTool(reg *agent.Registry, store *BrowserTestStore) error { tool, err := agent.NewTool("browser_test", "Drive the user's connected browser tab for end-to-end testing. Available actions: list_clickables (returns indexed clickable elements), click (by selector or index), eval (run a JS expression and return result), console (read recent console output, ideal to spot errors after a click), current_url, wait (sleep ms before next check), type (set value on an input), summary (URL+title+last console entries). Always start with list_clickables; click; then console to verify no errors.", func(ctx context.Context, p BrowserTestParams) (agent.ToolResponse, error) { sess := store.Pick(p.SessionID) if sess == nil { return agent.TextErrorResponse("no active browser session — ask the user to paste the snippet from the Tests tab in their target page"), nil } action := strings.ToLower(strings.TrimSpace(p.Action)) switch action { case "": return agent.TextErrorResponse("action is required"), nil case "list_clickables", "click", "eval", "current_url", "type", "screenshot": case "console", "summary", "wait": default: return agent.TextErrorResponse("unknown action: " + p.Action), nil } if action == "console" { tail := p.Tail if tail <= 0 { tail = 50 } if tail > browserTestConsoleMax { tail = browserTestConsoleMax } entries := sess.SnapshotConsole() if len(entries) > tail { entries = entries[len(entries)-tail:] } out, _ := json.MarshalIndent(map[string]interface{}{ "session_id": sess.ID, "console": entries, }, "", " ") return agent.TextResponse(string(out)), nil } if action == "summary" { entries := sess.SnapshotConsole() if len(entries) > 20 { entries = entries[len(entries)-20:] } out, _ := json.MarshalIndent(map[string]interface{}{ "session_id": sess.ID, "url": sess.URL, "title": sess.Title, "recent_console": entries, }, "", " ") return agent.TextResponse(string(out)), nil } if action == "wait" { ms := p.WaitMs if ms <= 0 { ms = 200 } if ms > 5000 { ms = 5000 } select { case <-ctx.Done(): return agent.TextErrorResponse("cancelled"), nil case <-time.After(time.Duration(ms) * time.Millisecond): } return agent.TextResponse(fmt.Sprintf("waited %dms", ms)), nil } // Capture console snapshot length before so we can return only the delta // after the action — useful so the AI can spot errors caused by the click. pre := len(sess.SnapshotConsole()) params := map[string]interface{}{} if p.Selector != "" { params["selector"] = p.Selector } if p.Index > 0 || (action == "click" && p.Selector == "") { params["index"] = p.Index } if p.Expr != "" { params["expr"] = p.Expr } if p.Text != "" { params["text"] = p.Text } payload, err := sess.Send(action, params) if err != nil { return agent.TextErrorResponse(err.Error()), nil } // Screenshot post-processing: snippet returns a base64 data URL; // decode and write to ~/.muyue/screenshots/.png so the // AI can reference an on-disk path rather than streaming megabytes // of base64 back through its context. if action == "screenshot" { saved, perr := saveScreenshot(payload, p.Filename) if perr != nil { return agent.TextErrorResponse("screenshot save: " + perr.Error()), nil } out, _ := json.MarshalIndent(map[string]interface{}{ "action": "screenshot", "saved_to": saved, "current_url": sess.URL, }, "", " ") return agent.TextResponse(string(out)), nil } // Console delta: messages logged during this command. post := sess.SnapshotConsole() var delta []ConsoleEntry if len(post) > pre { delta = post[pre:] } result := map[string]interface{}{ "action": action, "reply": json.RawMessage(payload), "console_delta": delta, "current_url": sess.URL, } out, _ := json.MarshalIndent(result, "", " ") return agent.TextResponse(string(out)), nil }) if err != nil { return err } return reg.Register(tool) } // Snippet generator ---------------------------------------------------------- func buildBrowserTestSnippet(wsURL string) string { // Inline JS injected into the user's target page. Responsibilities: // - open the WS, with auto-reconnect (exponential backoff capped at 5s) // - hook console.log/info/warn/error/debug + window.onerror + unhandledrejection // - dispatch RPC commands: list_clickables, click, type, eval, current_url, screenshot // - re-establish WS on transient close (network blip, server restart, etc.) // // Across full page navigation / reload the JS context is destroyed — // no JS-only mechanism can survive that. The token is reusable (sliding // 60-min TTL server-side), so the user just re-pastes the same snippet // from the Tests tab to resume. return `(function(){ if (window.__muyueTestRunner) { console.log('[Muyue] runner already attached'); return; } var WS_URL = ` + jsString(wsURL) + `; var ws = null, lastList = [], retry = 0; function send(obj){ try{ if (ws && ws.readyState === 1) ws.send(JSON.stringify(obj)); }catch(e){} } function reply(id, data){ send({type:'reply', id:id, data:data}); } function safeText(el){ var t = (el.innerText || el.textContent || '').trim(); if (t.length > 80) t = t.slice(0,80)+'…'; return t; } function describe(el){ var sel = el.id ? '#'+el.id : el.tagName.toLowerCase(); if (!el.id && el.className && typeof el.className === 'string') { sel += '.' + el.className.trim().split(/\s+/).slice(0,2).join('.'); } var label = el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('name') || ''; return { tag: el.tagName.toLowerCase(), selector: sel, text: safeText(el), label: label, type: el.getAttribute('type')||'', disabled: !!el.disabled }; } function list(){ var els = Array.from(document.querySelectorAll('button, a[href], input[type=submit], input[type=button], [role=button], [onclick]')); lastList = els.filter(function(e){ var r=e.getBoundingClientRect(); return r.width>0 && r.height>0; }); return lastList.map(describe).map(function(d,i){ d.index = i; return d; }); } function clickEl(el){ if (!el) return { ok:false, error:'element not found' }; if (el.disabled) return { ok:false, error:'element is disabled' }; try { el.scrollIntoView({block:'center'}); el.click(); return { ok:true }; } catch(e){ return { ok:false, error:String(e) }; } } // Best-effort viewport screenshot via SVG foreignObject — works on most // pages, but external CSS / images / iframes won't be inlined. Returns a // base64 PNG data URL the server will save to disk. function screenshot(p){ return new Promise(function(resolve){ try { var w = Math.max(document.documentElement.clientWidth, 1024); var h = Math.max(window.innerHeight, 768); var node = (p && p.selector) ? document.querySelector(p.selector) : document.documentElement; if (!node) { resolve({ ok:false, error:'selector not found' }); return; } var rect = node.getBoundingClientRect(); if (node === document.documentElement) { rect = { width:w, height:h }; } var clone = node.cloneNode(true); var ser = new XMLSerializer().serializeToString(clone); var svg = '' + '
' + ser + '
'; var img = new Image(); img.onload = function(){ try { var c = document.createElement('canvas'); c.width = Math.round(rect.width); c.height = Math.round(rect.height); c.getContext('2d').drawImage(img, 0, 0); resolve({ ok:true, data_url: c.toDataURL('image/png'), width: c.width, height: c.height }); } catch(e){ resolve({ ok:false, error:'canvas: '+String(e) }); } }; img.onerror = function(){ resolve({ ok:false, error:'image load failed (CSP or invalid SVG)' }); }; img.src = 'data:image/svg+xml;charset=utf-8,' + encodeURIComponent(svg); } catch(e){ resolve({ ok:false, error:String(e) }); } }); } function dispatch(msg){ var p = msg.params || {}; switch(msg.action){ case 'list_clickables': return list(); case 'click': { var el; if (p.selector) el = document.querySelector(p.selector); else if (typeof p.index === 'number') el = lastList[p.index]; return clickEl(el); } case 'eval': { try { var r = (0,eval)(p.expr); return { ok:true, value: serialize(r) }; } catch(e){ return { ok:false, error:String(e) }; } } case 'current_url': return { url: location.href, title: document.title }; case 'type': { var el = p.selector ? document.querySelector(p.selector) : (lastList[p.index]); if (!el) return { ok:false, error:'element not found' }; var proto = Object.getPrototypeOf(el); var setter = Object.getOwnPropertyDescriptor(proto, 'value'); try { setter && setter.set ? setter.set.call(el, p.text||'') : (el.value = p.text||''); } catch(e){ el.value = p.text||''; } el.dispatchEvent(new Event('input', {bubbles:true})); el.dispatchEvent(new Event('change', {bubbles:true})); return { ok:true }; } case 'screenshot': return screenshot(p); } return { ok:false, error:'unknown action' }; } function serialize(v){ if (v === undefined) return 'undefined'; try { return JSON.parse(JSON.stringify(v)); } catch(e){ return String(v); } } ['log','info','warn','error','debug'].forEach(function(lvl){ var orig = console[lvl]; console[lvl] = function(){ try { var parts = Array.from(arguments).map(function(a){ if (typeof a === 'string') return a; try { return JSON.stringify(a); } catch(e){ return String(a); } }); send({type:'console', level: lvl, text: parts.join(' ')}); } catch(e){} return orig.apply(console, arguments); }; }); window.addEventListener('error', function(e){ send({type:'console', level:'error', text:'window.onerror: '+(e.message||e.error||'unknown')}); }); window.addEventListener('unhandledrejection', function(e){ send({type:'console', level:'error', text:'unhandledrejection: '+String(e.reason)}); }); var lastUrl = location.href; setInterval(function(){ if (location.href !== lastUrl){ lastUrl = location.href; send({type:'url_change', url: lastUrl}); } }, 500); function connect(){ ws = new WebSocket(WS_URL); ws.onopen = function(){ retry = 0; send({type:'hello', url: location.href, title: document.title}); }; ws.onmessage = function(ev){ try { var msg = JSON.parse(ev.data); } catch(e){ return; } if (msg.type === 'registered') { console.log('[Muyue] connected — session', msg.session_id); return; } if (msg.action) { var out = dispatch(msg); if (out && typeof out.then === 'function') { out.then(function(r){ reply(msg.id, r); }); } else { reply(msg.id, out); } } }; ws.onclose = function(){ // Same-page transient disconnect → reconnect with backoff up to ~5s. // Full navigation kills the JS context entirely — this never runs in // that case; the user re-pastes the snippet (same token works). retry = Math.min(retry + 1, 5); setTimeout(connect, 500 * retry); }; ws.onerror = function(){ /* onclose will fire next */ }; } connect(); window.__muyueTestRunner = { reconnect: connect, list: list }; })();` } func jsString(s string) string { b, _ := json.Marshal(s) return string(b) } // saveScreenshot decodes the base64 PNG returned by the snippet's // screenshot action and writes it to ~/.muyue/screenshots/.png. // Returns the absolute path saved, or an error. func saveScreenshot(replyPayload json.RawMessage, requestedName string) (string, error) { var reply struct { OK bool `json:"ok"` Error string `json:"error,omitempty"` DataURL string `json:"data_url,omitempty"` } if err := json.Unmarshal(replyPayload, &reply); err != nil { return "", fmt.Errorf("invalid reply: %w", err) } if !reply.OK { if reply.Error != "" { return "", fmt.Errorf("snippet: %s", reply.Error) } return "", fmt.Errorf("snippet returned ok=false") } const prefix = "data:image/png;base64," if !strings.HasPrefix(reply.DataURL, prefix) { return "", fmt.Errorf("unexpected data URL prefix") } raw, err := base64StdDecode(reply.DataURL[len(prefix):]) if err != nil { return "", fmt.Errorf("base64: %w", err) } dir, err := screenshotDir() if err != nil { return "", err } name := sanitizeFilename(requestedName) if name == "" { name = time.Now().Format("20060102-150405") } path := dir + "/" + name + ".png" if err := writeFile(path, raw, 0644); err != nil { return "", err } return path, nil } func screenshotDir() (string, error) { home, err := osUserHomeDir() if err != nil { return "", err } dir := home + "/.muyue/screenshots" if err := mkdirAll(dir, 0755); err != nil { return "", err } return dir, nil } // sanitizeFilename keeps a safe subset (letters / digits / _ / - / .) so // the user-supplied name cannot escape the screenshots directory. func sanitizeFilename(s string) string { var b strings.Builder for _, r := range s { switch { case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '_', r == '-', r == '.': b.WriteRune(r) } } return b.String() }