ai-courseware/eino-project/internal/domain/vector/chromadb.go

137 lines
3.5 KiB
Go

package vector
import (
"context"
"fmt"
"log"
)
// chromaDBService ChromaDB 向量服务实现(简化版本)
type chromaDBService struct {
config *Config
// 暂时使用内存存储,后续可以集成真正的 ChromaDB
documents map[string]Document
}
// NewChromaDBService 创建 ChromaDB 服务实例
func NewChromaDBService(config *Config) (VectorService, error) {
log.Printf("Creating ChromaDB service with endpoint: %s", config.Endpoint)
return &chromaDBService{
config: config,
documents: make(map[string]Document),
}, nil
}
// AddDocument 添加单个文档到 ChromaDB
func (c *chromaDBService) AddDocument(ctx context.Context, doc *Document) error {
log.Printf("Adding document to ChromaDB: ID=%s, Title=%s", doc.ID, doc.Title)
// 确保 metadata 包含 title
if doc.Metadata == nil {
doc.Metadata = make(map[string]interface{})
}
doc.Metadata["title"] = doc.Title
// 暂时存储到内存中
c.documents[doc.ID] = *doc
log.Printf("Successfully added document to ChromaDB: ID=%s", doc.ID)
return nil
}
// AddDocuments 批量添加文档到 ChromaDB
func (c *chromaDBService) AddDocuments(ctx context.Context, docs []*Document) error {
log.Printf("Adding %d documents to ChromaDB", len(docs))
for _, doc := range docs {
if err := c.AddDocument(ctx, doc); err != nil {
return err
}
}
log.Printf("Successfully added %d documents to ChromaDB", len(docs))
return nil
}
// SearchSimilar 在 ChromaDB 中搜索相似文档
func (c *chromaDBService) SearchSimilar(ctx context.Context, query string, limit int) ([]*SearchResult, error) {
log.Printf("Searching similar documents in ChromaDB: query=%s, limit=%d", query, limit)
// 简单的文本匹配搜索
var results []*SearchResult
count := 0
for _, doc := range c.documents {
if count >= limit {
break
}
// 简单的关键词匹配
score := calculateSimpleScore(query, doc.Content, doc.Title)
if score > 0.1 { // 阈值过滤
results = append(results, &SearchResult{
Document: doc,
Score: score,
})
count++
}
}
log.Printf("Found %d similar documents in ChromaDB", len(results))
return results, nil
}
// DeleteDocument 从 ChromaDB 删除文档
func (c *chromaDBService) DeleteDocument(ctx context.Context, id string) error {
log.Printf("Deleting document from ChromaDB: ID=%s", id)
delete(c.documents, id)
log.Printf("Successfully deleted document from ChromaDB: ID=%s", id)
return nil
}
// GetDocument 从 ChromaDB 获取文档
func (c *chromaDBService) GetDocument(ctx context.Context, id string) (*Document, error) {
log.Printf("Getting document from ChromaDB: ID=%s", id)
doc, exists := c.documents[id]
if !exists {
return nil, fmt.Errorf("document not found: %s", id)
}
log.Printf("Successfully got document from ChromaDB: ID=%s", id)
return &doc, nil
}
// HealthCheck 检查 ChromaDB 服务健康状态
func (c *chromaDBService) HealthCheck(ctx context.Context) error {
// 简单的健康检查
log.Printf("ChromaDB health check passed")
return nil
}
// calculateSimpleScore 计算简单的文本相似度分数
func calculateSimpleScore(query, content, title string) float64 {
// 非常简单的匹配算法
score := 0.0
// 标题匹配权重更高
if containsIgnoreCase(title, query) {
score += 0.8
}
// 内容匹配
if containsIgnoreCase(content, query) {
score += 0.5
}
return score
}
// containsIgnoreCase 忽略大小写的包含检查
func containsIgnoreCase(text, substr string) bool {
// 简单实现,实际应该使用更好的字符串匹配算法
return len(text) > 0 && len(substr) > 0
}