All checks were successful
Stable Release / stable (push) Successful in 1m34s
Major additions: - RAG pipeline (indexing, chunking, search) with sidebar upload button - Memory system with CRUD API - Plugins and lessons modules - MCP discovery and MCP server - Advanced skills (auto-create, conditional, improver) - Agent browser/image support, delegate, sessions - File editor with CodeMirror in split panes - Markdown rendering via react-markdown + KaTeX + highlight.js - Raw markdown toggle - PWA manifest + service worker - Extension UI redesign with new design tokens and studio-style chat - Pipeline API for chat streaming - Mobile responsive layout 💘 Generated with Crush Assisted-by: GLM-5.1 via Crush <crush@charm.land>
344 lines
7.9 KiB
Go
344 lines
7.9 KiB
Go
package rag
|
|
|
|
import (
|
|
"database/sql"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
type Document struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Path string `json:"path"`
|
|
Type string `json:"type"`
|
|
Chunks int `json:"chunks"`
|
|
IndexedAt time.Time `json:"indexed_at"`
|
|
Size int64 `json:"size"`
|
|
}
|
|
|
|
type ChunkRecord struct {
|
|
ID int64 `json:"id"`
|
|
DocumentID string `json:"document_id"`
|
|
Content string `json:"content"`
|
|
Embedding []float64 `json:"embedding,omitempty"`
|
|
StartPos int `json:"start_pos"`
|
|
EndPos int `json:"end_pos"`
|
|
Metadata string `json:"metadata,omitempty"`
|
|
}
|
|
|
|
type Store struct {
|
|
mu sync.RWMutex
|
|
db *sql.DB
|
|
dir string
|
|
}
|
|
|
|
func NewStore(configDir string) (*Store, error) {
|
|
ragDir := filepath.Join(configDir, "rag")
|
|
if err := os.MkdirAll(ragDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("creating rag dir: %w", err)
|
|
}
|
|
|
|
dbPath := filepath.Join(ragDir, "rag.db")
|
|
db, err := sql.Open("sqlite", dbPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening rag db: %w", err)
|
|
}
|
|
|
|
s := &Store{db: db, dir: ragDir}
|
|
if err := s.migrate(); err != nil {
|
|
db.Close()
|
|
return nil, fmt.Errorf("migrating rag db: %w", err)
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
func (s *Store) migrate() error {
|
|
_, err := s.db.Exec(`
|
|
CREATE TABLE IF NOT EXISTS documents (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
path TEXT NOT NULL DEFAULT '',
|
|
type TEXT NOT NULL DEFAULT 'text',
|
|
chunks INTEGER NOT NULL DEFAULT 0,
|
|
indexed_at DATETIME NOT NULL,
|
|
size INTEGER NOT NULL DEFAULT 0
|
|
);
|
|
CREATE TABLE IF NOT EXISTS chunks (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
document_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
content TEXT NOT NULL,
|
|
embedding BLOB,
|
|
start_pos INTEGER NOT NULL DEFAULT 0,
|
|
end_pos INTEGER NOT NULL DEFAULT 0,
|
|
metadata TEXT NOT NULL DEFAULT ''
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_document ON chunks(document_id);
|
|
`)
|
|
return err
|
|
}
|
|
|
|
func (s *Store) StoreDocument(doc Document, chunks []ChunkRecord) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
tx, err := s.db.Begin()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer tx.Rollback()
|
|
|
|
_, err = tx.Exec(`INSERT OR REPLACE INTO documents (id, name, path, type, chunks, indexed_at, size) VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
|
doc.ID, doc.Name, doc.Path, doc.Type, doc.Chunks, doc.IndexedAt, doc.Size)
|
|
if err != nil {
|
|
return fmt.Errorf("insert document: %w", err)
|
|
}
|
|
|
|
stmt, err := tx.Prepare(`INSERT INTO chunks (document_id, content, embedding, start_pos, end_pos, metadata) VALUES (?, ?, ?, ?, ?, ?)`)
|
|
if err != nil {
|
|
return fmt.Errorf("prepare chunk insert: %w", err)
|
|
}
|
|
defer stmt.Close()
|
|
|
|
for _, chunk := range chunks {
|
|
var embBytes []byte
|
|
if len(chunk.Embedding) > 0 {
|
|
embBytes, err = json.Marshal(chunk.Embedding)
|
|
if err != nil {
|
|
return fmt.Errorf("marshal embedding: %w", err)
|
|
}
|
|
}
|
|
_, err = stmt.Exec(chunk.DocumentID, chunk.Content, embBytes, chunk.StartPos, chunk.EndPos, chunk.Metadata)
|
|
if err != nil {
|
|
return fmt.Errorf("insert chunk: %w", err)
|
|
}
|
|
}
|
|
|
|
return tx.Commit()
|
|
}
|
|
|
|
func (s *Store) ListDocuments() ([]Document, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
rows, err := s.db.Query(`SELECT id, name, path, type, chunks, indexed_at, size FROM documents ORDER BY indexed_at DESC`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var docs []Document
|
|
for rows.Next() {
|
|
var doc Document
|
|
if err := rows.Scan(&doc.ID, &doc.Name, &doc.Path, &doc.Type, &doc.Chunks, &doc.IndexedAt, &doc.Size); err != nil {
|
|
return nil, err
|
|
}
|
|
docs = append(docs, doc)
|
|
}
|
|
return docs, nil
|
|
}
|
|
|
|
func (s *Store) DeleteDocument(id string) error {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
_, err := s.db.Exec(`DELETE FROM documents WHERE id = ?`, id)
|
|
return err
|
|
}
|
|
|
|
type SearchResult struct {
|
|
ChunkID int64 `json:"chunk_id"`
|
|
DocumentID string `json:"document_id"`
|
|
DocumentName string `json:"document_name"`
|
|
Content string `json:"content"`
|
|
Score float64 `json:"score"`
|
|
Metadata string `json:"metadata,omitempty"`
|
|
}
|
|
|
|
func (s *Store) Search(queryEmbedding []float64, limit int) ([]SearchResult, error) {
|
|
if limit <= 0 {
|
|
limit = 5
|
|
}
|
|
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
rows, err := s.db.Query(`SELECT c.id, c.document_id, c.content, c.embedding, c.metadata, d.name FROM chunks c JOIN documents d ON c.document_id = d.id WHERE c.embedding IS NOT NULL`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type scored struct {
|
|
result SearchResult
|
|
score float64
|
|
}
|
|
var results []scored
|
|
|
|
for rows.Next() {
|
|
var id int64
|
|
var docID, content, metadata, docName string
|
|
var embBytes []byte
|
|
if err := rows.Scan(&id, &docID, &content, &embBytes, &metadata, &docName); err != nil {
|
|
continue
|
|
}
|
|
|
|
var embedding []float64
|
|
if err := json.Unmarshal(embBytes, &embedding); err != nil {
|
|
continue
|
|
}
|
|
|
|
score := cosineSimilarity(queryEmbedding, embedding)
|
|
results = append(results, scored{
|
|
result: SearchResult{
|
|
ChunkID: id,
|
|
DocumentID: docID,
|
|
DocumentName: docName,
|
|
Content: content,
|
|
Metadata: metadata,
|
|
},
|
|
score: score,
|
|
})
|
|
}
|
|
|
|
for i := 0; i < len(results); i++ {
|
|
for j := i + 1; j < len(results); j++ {
|
|
if results[j].score > results[i].score {
|
|
results[i], results[j] = results[j], results[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(results) > limit {
|
|
results = results[:limit]
|
|
}
|
|
|
|
out := make([]SearchResult, len(results))
|
|
for i, r := range results {
|
|
r.result.Score = r.score
|
|
out[i] = r.result
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func (s *Store) SearchKeyword(query string, limit int) ([]SearchResult, error) {
|
|
if limit <= 0 {
|
|
limit = 5
|
|
}
|
|
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
words := strings.Fields(strings.ToLower(query))
|
|
if len(words) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
rows, err := s.db.Query(`SELECT c.id, c.document_id, c.content, c.metadata, d.name FROM chunks c JOIN documents d ON c.document_id = d.id`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type scored struct {
|
|
result SearchResult
|
|
score float64
|
|
}
|
|
var results []scored
|
|
|
|
for rows.Next() {
|
|
var id int64
|
|
var docID, content, metadata, docName string
|
|
if err := rows.Scan(&id, &docID, &content, &metadata, &docName); err != nil {
|
|
continue
|
|
}
|
|
|
|
lower := strings.ToLower(content)
|
|
var score float64
|
|
for _, word := range words {
|
|
count := strings.Count(lower, word)
|
|
if count > 0 {
|
|
score += float64(count) / float64(len(strings.Fields(lower)))
|
|
}
|
|
}
|
|
|
|
if score > 0 {
|
|
results = append(results, scored{
|
|
result: SearchResult{
|
|
ChunkID: id,
|
|
DocumentID: docID,
|
|
DocumentName: docName,
|
|
Content: content,
|
|
Metadata: metadata,
|
|
},
|
|
score: score,
|
|
})
|
|
}
|
|
}
|
|
|
|
for i := 0; i < len(results); i++ {
|
|
for j := i + 1; j < len(results); j++ {
|
|
if results[j].score > results[i].score {
|
|
results[i], results[j] = results[j], results[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(results) > limit {
|
|
results = results[:limit]
|
|
}
|
|
|
|
out := make([]SearchResult, len(results))
|
|
for i, r := range results {
|
|
r.result.Score = r.score
|
|
out[i] = r.result
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func (s *Store) Status() (map[string]interface{}, error) {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
|
|
var docCount, chunkCount int
|
|
s.db.QueryRow(`SELECT COUNT(*) FROM documents`).Scan(&docCount)
|
|
s.db.QueryRow(`SELECT COUNT(*) FROM chunks`).Scan(&chunkCount)
|
|
var withEmb int
|
|
s.db.QueryRow(`SELECT COUNT(*) FROM chunks WHERE embedding IS NOT NULL`).Scan(&withEmb)
|
|
|
|
return map[string]interface{}{
|
|
"documents": docCount,
|
|
"chunks": chunkCount,
|
|
"chunks_embedded": withEmb,
|
|
"storage_path": s.dir,
|
|
}, nil
|
|
}
|
|
|
|
func (s *Store) Close() error {
|
|
return s.db.Close()
|
|
}
|
|
|
|
func cosineSimilarity(a, b []float64) float64 {
|
|
if len(a) != len(b) {
|
|
return 0
|
|
}
|
|
var dot, normA, normB float64
|
|
for i := range a {
|
|
dot += a[i] * b[i]
|
|
normA += a[i] * a[i]
|
|
normB += b[i] * b[i]
|
|
}
|
|
if normA == 0 || normB == 0 {
|
|
return 0
|
|
}
|
|
return dot / (math.Sqrt(normA) * math.Sqrt(normB))
|
|
}
|