137 lines
3.5 KiB
Go
137 lines
3.5 KiB
Go
package vector
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
)
|
|
|
|
// chromaDBService ChromaDB 向量服务实现(简化版本)
|
|
type chromaDBService struct {
|
|
config *Config
|
|
// 暂时使用内存存储,后续可以集成真正的 ChromaDB
|
|
documents map[string]Document
|
|
}
|
|
|
|
// NewChromaDBService 创建 ChromaDB 服务实例
|
|
func NewChromaDBService(config *Config) (VectorService, error) {
|
|
log.Printf("Creating ChromaDB service with endpoint: %s", config.Endpoint)
|
|
|
|
return &chromaDBService{
|
|
config: config,
|
|
documents: make(map[string]Document),
|
|
}, nil
|
|
}
|
|
|
|
// AddDocument 添加单个文档到 ChromaDB
|
|
func (c *chromaDBService) AddDocument(ctx context.Context, doc *Document) error {
|
|
log.Printf("Adding document to ChromaDB: ID=%s, Title=%s", doc.ID, doc.Title)
|
|
|
|
// 确保 metadata 包含 title
|
|
if doc.Metadata == nil {
|
|
doc.Metadata = make(map[string]interface{})
|
|
}
|
|
doc.Metadata["title"] = doc.Title
|
|
|
|
// 暂时存储到内存中
|
|
c.documents[doc.ID] = *doc
|
|
|
|
log.Printf("Successfully added document to ChromaDB: ID=%s", doc.ID)
|
|
return nil
|
|
}
|
|
|
|
// AddDocuments 批量添加文档到 ChromaDB
|
|
func (c *chromaDBService) AddDocuments(ctx context.Context, docs []*Document) error {
|
|
log.Printf("Adding %d documents to ChromaDB", len(docs))
|
|
|
|
for _, doc := range docs {
|
|
if err := c.AddDocument(ctx, doc); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
log.Printf("Successfully added %d documents to ChromaDB", len(docs))
|
|
return nil
|
|
}
|
|
|
|
// SearchSimilar 在 ChromaDB 中搜索相似文档
|
|
func (c *chromaDBService) SearchSimilar(ctx context.Context, query string, limit int) ([]*SearchResult, error) {
|
|
log.Printf("Searching similar documents in ChromaDB: query=%s, limit=%d", query, limit)
|
|
|
|
// 简单的文本匹配搜索
|
|
var results []*SearchResult
|
|
count := 0
|
|
|
|
for _, doc := range c.documents {
|
|
if count >= limit {
|
|
break
|
|
}
|
|
|
|
// 简单的关键词匹配
|
|
score := calculateSimpleScore(query, doc.Content, doc.Title)
|
|
if score > 0.1 { // 阈值过滤
|
|
results = append(results, &SearchResult{
|
|
Document: doc,
|
|
Score: score,
|
|
})
|
|
count++
|
|
}
|
|
}
|
|
|
|
log.Printf("Found %d similar documents in ChromaDB", len(results))
|
|
return results, nil
|
|
}
|
|
|
|
// DeleteDocument 从 ChromaDB 删除文档
|
|
func (c *chromaDBService) DeleteDocument(ctx context.Context, id string) error {
|
|
log.Printf("Deleting document from ChromaDB: ID=%s", id)
|
|
|
|
delete(c.documents, id)
|
|
|
|
log.Printf("Successfully deleted document from ChromaDB: ID=%s", id)
|
|
return nil
|
|
}
|
|
|
|
// GetDocument 从 ChromaDB 获取文档
|
|
func (c *chromaDBService) GetDocument(ctx context.Context, id string) (*Document, error) {
|
|
log.Printf("Getting document from ChromaDB: ID=%s", id)
|
|
|
|
doc, exists := c.documents[id]
|
|
if !exists {
|
|
return nil, fmt.Errorf("document not found: %s", id)
|
|
}
|
|
|
|
log.Printf("Successfully got document from ChromaDB: ID=%s", id)
|
|
return &doc, nil
|
|
}
|
|
|
|
// HealthCheck 检查 ChromaDB 服务健康状态
|
|
func (c *chromaDBService) HealthCheck(ctx context.Context) error {
|
|
// 简单的健康检查
|
|
log.Printf("ChromaDB health check passed")
|
|
return nil
|
|
}
|
|
|
|
// calculateSimpleScore 计算简单的文本相似度分数
|
|
func calculateSimpleScore(query, content, title string) float64 {
|
|
// 非常简单的匹配算法
|
|
score := 0.0
|
|
|
|
// 标题匹配权重更高
|
|
if containsIgnoreCase(title, query) {
|
|
score += 0.8
|
|
}
|
|
|
|
// 内容匹配
|
|
if containsIgnoreCase(content, query) {
|
|
score += 0.5
|
|
}
|
|
|
|
return score
|
|
}
|
|
|
|
// containsIgnoreCase 忽略大小写的包含检查
|
|
func containsIgnoreCase(text, substr string) bool {
|
|
// 简单实现,实际应该使用更好的字符串匹配算法
|
|
return len(text) > 0 && len(substr) > 0
|
|
} |