package api // Browser-test feature: an out-of-process page (the user's target tab) // connects to Muyue via WebSocket using a short-lived token, and exposes a // thin RPC: Studio's AI can list clickable elements, click them, evaluate JS, // read the recent console buffer, and observe what changes after each action. // // Threat model: an injected snippet runs in the user's chosen page only, with // the same origin as that page; the WS endpoint is bound to localhost and // gated by a 5-minute token issued by the local Muyue server. import ( "context" "crypto/rand" "encoding/hex" "encoding/json" "fmt" "net/http" "strings" "sync" "time" "github.com/gorilla/websocket" "github.com/muyue/muyue/internal/agent" ) const ( browserTestTokenTTL = 5 * time.Minute browserTestCommandTTL = 30 * time.Second browserTestConsoleMax = 200 browserTestSessionsMax = 16 ) // BrowserTestSession represents one connected browser tab. type BrowserTestSession struct { ID string URL string Title string conn *websocket.Conn mu sync.Mutex console []ConsoleEntry pending map[string]chan json.RawMessage pendingMu sync.Mutex connectedAt time.Time writeMu sync.Mutex } // ConsoleEntry is a captured console message from the connected page. type ConsoleEntry struct { Level string `json:"level"` // log, info, warn, error, debug Message string `json:"message"` Time string `json:"time"` } // BrowserTestStore manages active sessions + pending one-shot connect tokens. type BrowserTestStore struct { mu sync.RWMutex sessions map[string]*BrowserTestSession tokens map[string]time.Time tokensMu sync.Mutex } func NewBrowserTestStore() *BrowserTestStore { return &BrowserTestStore{ sessions: map[string]*BrowserTestSession{}, tokens: map[string]time.Time{}, } } // IssueToken creates a single-use token used by the snippet to authenticate. func (s *BrowserTestStore) IssueToken() string { buf := make([]byte, 16) if _, err := rand.Read(buf); err != nil { return fmt.Sprintf("fallback-%d", time.Now().UnixNano()) } tok := hex.EncodeToString(buf) s.tokensMu.Lock() now := time.Now() for k, v := range s.tokens { if now.Sub(v) > browserTestTokenTTL { delete(s.tokens, k) } } s.tokens[tok] = now s.tokensMu.Unlock() return tok } // ConsumeToken validates and removes a token in one step. func (s *BrowserTestStore) ConsumeToken(tok string) bool { s.tokensMu.Lock() defer s.tokensMu.Unlock() t, ok := s.tokens[tok] if !ok { return false } delete(s.tokens, tok) return time.Since(t) <= browserTestTokenTTL } // Register inserts a new session, evicting the oldest if at capacity. func (s *BrowserTestStore) Register(session *BrowserTestSession) { s.mu.Lock() defer s.mu.Unlock() if len(s.sessions) >= browserTestSessionsMax { var oldestID string var oldest time.Time for id, sess := range s.sessions { if oldestID == "" || sess.connectedAt.Before(oldest) { oldestID = id oldest = sess.connectedAt } } if old, ok := s.sessions[oldestID]; ok { old.conn.Close() delete(s.sessions, oldestID) } } s.sessions[session.ID] = session } func (s *BrowserTestStore) Remove(id string) { s.mu.Lock() defer s.mu.Unlock() if sess, ok := s.sessions[id]; ok { sess.conn.Close() delete(s.sessions, id) } } func (s *BrowserTestStore) Get(id string) *BrowserTestSession { s.mu.RLock() defer s.mu.RUnlock() return s.sessions[id] } // Pick returns the requested session by ID, or the most-recently-connected // session if id is empty. Returns nil if no session matches. func (s *BrowserTestStore) Pick(id string) *BrowserTestSession { s.mu.RLock() defer s.mu.RUnlock() if id != "" { return s.sessions[id] } var picked *BrowserTestSession for _, sess := range s.sessions { if picked == nil || sess.connectedAt.After(picked.connectedAt) { picked = sess } } return picked } func (s *BrowserTestStore) List() []map[string]interface{} { s.mu.RLock() defer s.mu.RUnlock() out := make([]map[string]interface{}, 0, len(s.sessions)) for _, sess := range s.sessions { out = append(out, map[string]interface{}{ "id": sess.ID, "url": sess.URL, "title": sess.Title, "connected_at": sess.connectedAt.Format(time.RFC3339), }) } return out } // Send issues an RPC command to the browser session and waits up to TTL for // the matching reply. Returns the raw payload or an error. func (sess *BrowserTestSession) Send(action string, params map[string]interface{}) (json.RawMessage, error) { cid := newCorrelationID() ch := make(chan json.RawMessage, 1) sess.pendingMu.Lock() sess.pending[cid] = ch sess.pendingMu.Unlock() defer func() { sess.pendingMu.Lock() delete(sess.pending, cid) sess.pendingMu.Unlock() }() cmd := map[string]interface{}{ "id": cid, "action": action, "params": params, } sess.writeMu.Lock() err := sess.conn.WriteJSON(cmd) sess.writeMu.Unlock() if err != nil { return nil, fmt.Errorf("write: %w", err) } select { case payload := <-ch: return payload, nil case <-time.After(browserTestCommandTTL): return nil, fmt.Errorf("browser session did not reply within %s", browserTestCommandTTL) } } // AppendConsole records a console line, trimming to the buffer cap. func (sess *BrowserTestSession) AppendConsole(level, message string) { sess.mu.Lock() defer sess.mu.Unlock() sess.console = append(sess.console, ConsoleEntry{ Level: level, Message: message, Time: time.Now().Format(time.RFC3339), }) if len(sess.console) > browserTestConsoleMax { sess.console = sess.console[len(sess.console)-browserTestConsoleMax:] } } // SnapshotConsole returns a copy of the current console buffer. func (sess *BrowserTestSession) SnapshotConsole() []ConsoleEntry { sess.mu.Lock() defer sess.mu.Unlock() out := make([]ConsoleEntry, len(sess.console)) copy(out, sess.console) return out } func newCorrelationID() string { buf := make([]byte, 8) rand.Read(buf) return hex.EncodeToString(buf) } // HTTP handlers -------------------------------------------------------------- func (s *Server) handleBrowserTestSnippet(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } tok := s.browserTestStore.IssueToken() host := r.Host if host == "" { host = "127.0.0.1" } scheme := "ws" if r.TLS != nil { scheme = "wss" } wsURL := fmt.Sprintf("%s://%s/api/ws/browser-test?token=%s", scheme, host, tok) snippet := buildBrowserTestSnippet(wsURL) writeJSON(w, map[string]interface{}{ "token": tok, "ws_url": wsURL, "snippet": snippet, "expires_in": int(browserTestTokenTTL / time.Second), }) } func (s *Server) handleBrowserTestSessions(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } writeJSON(w, map[string]interface{}{ "sessions": s.browserTestStore.List(), }) } func (s *Server) handleBrowserTestConsole(w http.ResponseWriter, r *http.Request) { if r.Method != "GET" { writeError(w, "GET only", http.StatusMethodNotAllowed) return } id := strings.TrimPrefix(r.URL.Path, "/api/test/console/") sess := s.browserTestStore.Pick(id) if sess == nil { writeError(w, "no active browser test session", http.StatusNotFound) return } writeJSON(w, map[string]interface{}{ "session_id": sess.ID, "url": sess.URL, "console": sess.SnapshotConsole(), }) } // browserTestUpgrader accepts any origin: the connection is gated by a // short-lived token issued to the local UI, not by Origin checking. var browserTestUpgrader = websocket.Upgrader{ CheckOrigin: func(r *http.Request) bool { return true }, } func (s *Server) handleBrowserTestWS(w http.ResponseWriter, r *http.Request) { tok := r.URL.Query().Get("token") if tok == "" || !s.browserTestStore.ConsumeToken(tok) { writeError(w, "invalid or expired token", http.StatusUnauthorized) return } conn, err := browserTestUpgrader.Upgrade(w, r, nil) if err != nil { return } conn.SetReadLimit(2 << 20) // Read the hello message: page sends {"type":"hello","url":"...","title":"..."}. conn.SetReadDeadline(time.Now().Add(10 * time.Second)) var hello struct { Type string `json:"type"` URL string `json:"url"` Title string `json:"title"` } if err := conn.ReadJSON(&hello); err != nil || hello.Type != "hello" { conn.WriteJSON(map[string]string{"type": "error", "message": "expected hello"}) conn.Close() return } conn.SetReadDeadline(time.Time{}) id := newCorrelationID() sess := &BrowserTestSession{ ID: id, URL: hello.URL, Title: hello.Title, conn: conn, pending: map[string]chan json.RawMessage{}, connectedAt: time.Now(), } s.browserTestStore.Register(sess) defer s.browserTestStore.Remove(id) // Acknowledge with the assigned session ID. sess.writeMu.Lock() conn.WriteJSON(map[string]string{"type": "registered", "session_id": id}) sess.writeMu.Unlock() for { _, raw, err := conn.ReadMessage() if err != nil { return } var msg struct { Type string `json:"type"` ID string `json:"id,omitempty"` Level string `json:"level,omitempty"` Text string `json:"text,omitempty"` URL string `json:"url,omitempty"` Data json.RawMessage `json:"data,omitempty"` } if err := json.Unmarshal(raw, &msg); err != nil { continue } switch msg.Type { case "console": sess.AppendConsole(msg.Level, msg.Text) case "url_change": sess.mu.Lock() sess.URL = msg.URL sess.mu.Unlock() case "reply": sess.pendingMu.Lock() ch, ok := sess.pending[msg.ID] sess.pendingMu.Unlock() if ok { select { case ch <- msg.Data: default: } } case "ping": sess.writeMu.Lock() conn.WriteJSON(map[string]string{"type": "pong"}) sess.writeMu.Unlock() } } } // Agent tool ----------------------------------------------------------------- // BrowserTestParams is the schema exposed to the AI for the browser_test tool. type BrowserTestParams struct { Action string `json:"action" description:"One of: list_clickables, click, eval, console, current_url, wait, type, summary"` SessionID string `json:"session_id,omitempty" description:"Browser session id (optional, defaults to most recent)"` Selector string `json:"selector,omitempty" description:"CSS selector for click/type actions"` Index int `json:"index,omitempty" description:"Alternative to selector: index into the last list_clickables result (0-based)"` Expr string `json:"expr,omitempty" description:"JS expression to evaluate (eval action only)"` Text string `json:"text,omitempty" description:"Text to type (type action only)"` WaitMs int `json:"wait_ms,omitempty" description:"Milliseconds to wait (wait action only, max 5000)"` Tail int `json:"tail,omitempty" description:"Console action: how many recent lines to return (default 50, max 200)"` } // RegisterBrowserTestTool wires the agent tool against a session store. func RegisterBrowserTestTool(reg *agent.Registry, store *BrowserTestStore) error { tool, err := agent.NewTool("browser_test", "Drive the user's connected browser tab for end-to-end testing. Available actions: list_clickables (returns indexed clickable elements), click (by selector or index), eval (run a JS expression and return result), console (read recent console output, ideal to spot errors after a click), current_url, wait (sleep ms before next check), type (set value on an input), summary (URL+title+last console entries). Always start with list_clickables; click; then console to verify no errors.", func(ctx context.Context, p BrowserTestParams) (agent.ToolResponse, error) { sess := store.Pick(p.SessionID) if sess == nil { return agent.TextErrorResponse("no active browser session — ask the user to paste the snippet from the Tests tab in their target page"), nil } action := strings.ToLower(strings.TrimSpace(p.Action)) switch action { case "": return agent.TextErrorResponse("action is required"), nil case "list_clickables", "click", "eval", "current_url", "type": case "console", "summary", "wait": default: return agent.TextErrorResponse("unknown action: " + p.Action), nil } if action == "console" { tail := p.Tail if tail <= 0 { tail = 50 } if tail > browserTestConsoleMax { tail = browserTestConsoleMax } entries := sess.SnapshotConsole() if len(entries) > tail { entries = entries[len(entries)-tail:] } out, _ := json.MarshalIndent(map[string]interface{}{ "session_id": sess.ID, "console": entries, }, "", " ") return agent.TextResponse(string(out)), nil } if action == "summary" { entries := sess.SnapshotConsole() if len(entries) > 20 { entries = entries[len(entries)-20:] } out, _ := json.MarshalIndent(map[string]interface{}{ "session_id": sess.ID, "url": sess.URL, "title": sess.Title, "recent_console": entries, }, "", " ") return agent.TextResponse(string(out)), nil } if action == "wait" { ms := p.WaitMs if ms <= 0 { ms = 200 } if ms > 5000 { ms = 5000 } select { case <-ctx.Done(): return agent.TextErrorResponse("cancelled"), nil case <-time.After(time.Duration(ms) * time.Millisecond): } return agent.TextResponse(fmt.Sprintf("waited %dms", ms)), nil } // Capture console snapshot length before so we can return only the delta // after the action — useful so the AI can spot errors caused by the click. pre := len(sess.SnapshotConsole()) params := map[string]interface{}{} if p.Selector != "" { params["selector"] = p.Selector } if p.Index > 0 || (action == "click" && p.Selector == "") { params["index"] = p.Index } if p.Expr != "" { params["expr"] = p.Expr } if p.Text != "" { params["text"] = p.Text } payload, err := sess.Send(action, params) if err != nil { return agent.TextErrorResponse(err.Error()), nil } // Console delta: messages logged during this command. post := sess.SnapshotConsole() var delta []ConsoleEntry if len(post) > pre { delta = post[pre:] } result := map[string]interface{}{ "action": action, "reply": json.RawMessage(payload), "console_delta": delta, "current_url": sess.URL, } out, _ := json.MarshalIndent(result, "", " ") return agent.TextResponse(string(out)), nil }) if err != nil { return err } return reg.Register(tool) } // Snippet generator ---------------------------------------------------------- func buildBrowserTestSnippet(wsURL string) string { // Note: this is the JS injected into the user's target page. It opens the // WS, hooks console, and dispatches commands. Kept terse on purpose. return `(function(){ if (window.__muyueTestRunner) { console.log('[Muyue] runner already attached'); return; } var WS_URL = ` + jsString(wsURL) + `; var ws = new WebSocket(WS_URL); var lastList = []; function send(obj){ try{ ws.send(JSON.stringify(obj)); }catch(e){} } function reply(id, data){ send({type:'reply', id:id, data:data}); } function safeText(el){ var t = (el.innerText || el.textContent || '').trim(); if (t.length > 80) t = t.slice(0,80)+'…'; return t; } function describe(el){ var sel = el.id ? '#'+el.id : el.tagName.toLowerCase(); if (!el.id && el.className && typeof el.className === 'string') { sel += '.' + el.className.trim().split(/\s+/).slice(0,2).join('.'); } var label = el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('name') || ''; return { tag: el.tagName.toLowerCase(), selector: sel, text: safeText(el), label: label, type: el.getAttribute('type')||'', disabled: !!el.disabled }; } function list(){ var els = Array.from(document.querySelectorAll('button, a[href], input[type=submit], input[type=button], [role=button], [onclick]')); lastList = els.filter(function(e){ var r=e.getBoundingClientRect(); return r.width>0 && r.height>0; }); return lastList.map(describe).map(function(d,i){ d.index = i; return d; }); } function clickEl(el){ if (!el) return { ok:false, error:'element not found' }; if (el.disabled) return { ok:false, error:'element is disabled' }; try { el.scrollIntoView({block:'center'}); el.click(); return { ok:true }; } catch(e){ return { ok:false, error:String(e) }; } } function dispatch(msg){ var p = msg.params || {}; switch(msg.action){ case 'list_clickables': return list(); case 'click': { var el; if (p.selector) el = document.querySelector(p.selector); else if (typeof p.index === 'number') el = lastList[p.index]; return clickEl(el); } case 'eval': { try { var r = (0,eval)(p.expr); return { ok:true, value: serialize(r) }; } catch(e){ return { ok:false, error:String(e) }; } } case 'current_url': return { url: location.href, title: document.title }; case 'type': { var el = p.selector ? document.querySelector(p.selector) : (lastList[p.index]); if (!el) return { ok:false, error:'element not found' }; var proto = Object.getPrototypeOf(el); var setter = Object.getOwnPropertyDescriptor(proto, 'value'); try { setter && setter.set ? setter.set.call(el, p.text||'') : (el.value = p.text||''); } catch(e){ el.value = p.text||''; } el.dispatchEvent(new Event('input', {bubbles:true})); el.dispatchEvent(new Event('change', {bubbles:true})); return { ok:true }; } } return { ok:false, error:'unknown action' }; } function serialize(v){ if (v === undefined) return 'undefined'; try { return JSON.parse(JSON.stringify(v)); } catch(e){ return String(v); } } ['log','info','warn','error','debug'].forEach(function(lvl){ var orig = console[lvl]; console[lvl] = function(){ try { var parts = Array.from(arguments).map(function(a){ if (typeof a === 'string') return a; try { return JSON.stringify(a); } catch(e){ return String(a); } }); send({type:'console', level: lvl, text: parts.join(' ')}); } catch(e){} return orig.apply(console, arguments); }; }); window.addEventListener('error', function(e){ send({type:'console', level:'error', text:'window.onerror: '+(e.message||e.error||'unknown')}); }); window.addEventListener('unhandledrejection', function(e){ send({type:'console', level:'error', text:'unhandledrejection: '+String(e.reason)}); }); var lastUrl = location.href; setInterval(function(){ if (location.href !== lastUrl){ lastUrl = location.href; send({type:'url_change', url: lastUrl}); } }, 500); ws.onopen = function(){ send({type:'hello', url: location.href, title: document.title}); }; ws.onmessage = function(ev){ try { var msg = JSON.parse(ev.data); } catch(e){ return; } if (msg.type === 'registered') { console.log('[Muyue] connected — session', msg.session_id); return; } if (msg.action) reply(msg.id, dispatch(msg)); }; ws.onclose = function(){ console.log('[Muyue] runner disconnected'); window.__muyueTestRunner = null; }; window.__muyueTestRunner = { ws: ws, list: list }; })();` } func jsString(s string) string { b, _ := json.Marshal(s) return string(b) }