All checks were successful
Stable Release / stable (push) Successful in 1m34s
Major additions: - RAG pipeline (indexing, chunking, search) with sidebar upload button - Memory system with CRUD API - Plugins and lessons modules - MCP discovery and MCP server - Advanced skills (auto-create, conditional, improver) - Agent browser/image support, delegate, sessions - File editor with CodeMirror in split panes - Markdown rendering via react-markdown + KaTeX + highlight.js - Raw markdown toggle - PWA manifest + service worker - Extension UI redesign with new design tokens and studio-style chat - Pipeline API for chat streaming - Mobile responsive layout 💘 Generated with Crush Assisted-by: GLM-5.1 via Crush <crush@charm.land>
175 lines
3.6 KiB
Go
175 lines
3.6 KiB
Go
package rag
|
|
|
|
import (
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type Chunk struct {
|
|
ID int `json:"id"`
|
|
Content string `json:"content"`
|
|
StartPos int `json:"start_pos"`
|
|
EndPos int `json:"end_pos"`
|
|
Metadata string `json:"metadata,omitempty"`
|
|
}
|
|
|
|
func ChunkText(text string, maxTokens int) []Chunk {
|
|
if maxTokens <= 0 {
|
|
maxTokens = 500
|
|
}
|
|
maxChars := maxTokens * 4
|
|
if maxChars < 200 {
|
|
maxChars = 200
|
|
}
|
|
|
|
lines := strings.Split(text, "\n")
|
|
var chunks []Chunk
|
|
var current strings.Builder
|
|
chunkID := 0
|
|
startPos := 0
|
|
currentPos := 0
|
|
|
|
for _, line := range lines {
|
|
lineLen := utf8.RuneCountInString(line) + 1
|
|
|
|
if current.Len() > 0 && utf8.RuneCountInString(current.String())+lineLen > maxChars {
|
|
chunks = append(chunks, Chunk{
|
|
ID: chunkID,
|
|
Content: strings.TrimSpace(current.String()),
|
|
StartPos: startPos,
|
|
EndPos: currentPos,
|
|
})
|
|
chunkID++
|
|
startPos = currentPos
|
|
current.Reset()
|
|
}
|
|
|
|
current.WriteString(line)
|
|
current.WriteString("\n")
|
|
currentPos += lineLen
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
chunks = append(chunks, Chunk{
|
|
ID: chunkID,
|
|
Content: strings.TrimSpace(current.String()),
|
|
StartPos: startPos,
|
|
EndPos: currentPos,
|
|
})
|
|
}
|
|
|
|
return chunks
|
|
}
|
|
|
|
func ChunkMarkdown(text string, maxTokens int) []Chunk {
|
|
if maxTokens <= 0 {
|
|
maxTokens = 500
|
|
}
|
|
maxChars := maxTokens * 4
|
|
|
|
sections := splitMarkdownSections(text)
|
|
var chunks []Chunk
|
|
chunkID := 0
|
|
pos := 0
|
|
|
|
for _, section := range sections {
|
|
if utf8.RuneCountInString(section) > maxChars {
|
|
subChunks := ChunkText(section, maxTokens)
|
|
for i := range subChunks {
|
|
subChunks[i].ID = chunkID
|
|
subChunks[i].StartPos += pos
|
|
subChunks[i].EndPos += pos
|
|
chunkID++
|
|
}
|
|
chunks = append(chunks, subChunks...)
|
|
} else {
|
|
chunks = append(chunks, Chunk{
|
|
ID: chunkID,
|
|
Content: strings.TrimSpace(section),
|
|
StartPos: pos,
|
|
EndPos: pos + utf8.RuneCountInString(section),
|
|
})
|
|
chunkID++
|
|
}
|
|
pos += utf8.RuneCountInString(section)
|
|
}
|
|
|
|
return chunks
|
|
}
|
|
|
|
func splitMarkdownSections(text string) []string {
|
|
var sections []string
|
|
var current strings.Builder
|
|
lines := strings.Split(text, "\n")
|
|
|
|
for _, line := range lines {
|
|
if strings.HasPrefix(line, "#") || strings.HasPrefix(line, "##") || strings.HasPrefix(line, "###") {
|
|
if current.Len() > 0 {
|
|
sections = append(sections, current.String())
|
|
current.Reset()
|
|
}
|
|
}
|
|
current.WriteString(line)
|
|
current.WriteString("\n")
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
sections = append(sections, current.String())
|
|
}
|
|
|
|
if len(sections) == 0 && text != "" {
|
|
sections = []string{text}
|
|
}
|
|
|
|
return sections
|
|
}
|
|
|
|
func ChunkCode(code string, lang string, maxTokens int) []Chunk {
|
|
if maxTokens <= 0 {
|
|
maxTokens = 300
|
|
}
|
|
maxChars := maxTokens * 4
|
|
|
|
var chunks []Chunk
|
|
chunkID := 0
|
|
pos := 0
|
|
|
|
lines := strings.Split(code, "\n")
|
|
var current strings.Builder
|
|
currentLines := 0
|
|
|
|
for _, line := range lines {
|
|
lineLen := utf8.RuneCountInString(line) + 1
|
|
|
|
if current.Len() > 0 && (utf8.RuneCountInString(current.String())+lineLen > maxChars || currentLines > 50) {
|
|
chunks = append(chunks, Chunk{
|
|
ID: chunkID,
|
|
Content: strings.TrimSpace(current.String()),
|
|
StartPos: pos,
|
|
EndPos: pos + utf8.RuneCountInString(current.String()),
|
|
Metadata: lang,
|
|
})
|
|
chunkID++
|
|
pos += utf8.RuneCountInString(current.String())
|
|
current.Reset()
|
|
currentLines = 0
|
|
}
|
|
|
|
current.WriteString(line)
|
|
current.WriteString("\n")
|
|
currentLines++
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
chunks = append(chunks, Chunk{
|
|
ID: chunkID,
|
|
Content: strings.TrimSpace(current.String()),
|
|
StartPos: pos,
|
|
EndPos: pos + utf8.RuneCountInString(current.String()),
|
|
Metadata: lang,
|
|
})
|
|
}
|
|
|
|
return chunks
|
|
}
|