l_ai_knowledge/internal/application/repository/chunk.go

132 lines
4.5 KiB
Go

package repository
import (
"context"
"errors"
"gorm.io/gorm"
"knowlege-lsxd/internal/common"
"knowlege-lsxd/internal/types"
"knowlege-lsxd/internal/types/interfaces"
)
// chunkRepository implements the ChunkRepository interface
type chunkRepository struct {
db *gorm.DB
}
// NewChunkRepository creates a new chunk repository
func NewChunkRepository(db *gorm.DB) interfaces.ChunkRepository {
return &chunkRepository{db: db}
}
// CreateChunks creates multiple chunks in batches
func (r *chunkRepository) CreateChunks(ctx context.Context, chunks []*types.Chunk) error {
for _, chunk := range chunks {
chunk.Content = common.CleanInvalidUTF8(chunk.Content)
}
return r.db.WithContext(ctx).CreateInBatches(chunks, 100).Error
}
// GetChunkByID retrieves a chunk by its ID and tenant ID
func (r *chunkRepository) GetChunkByID(ctx context.Context, tenantID uint, id string) (*types.Chunk, error) {
var chunk types.Chunk
if err := r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).First(&chunk).Error; err != nil {
if errors.Is(err, gorm.ErrRecordNotFound) {
return nil, errors.New("chunk not found")
}
return nil, err
}
return &chunk, nil
}
// ListChunksByID retrieves multiple chunks by their IDs
func (r *chunkRepository) ListChunksByID(
ctx context.Context, tenantID uint, ids []string,
) ([]*types.Chunk, error) {
var chunks []*types.Chunk
if err := r.db.WithContext(ctx).
Where("tenant_id = ? AND id IN ?", tenantID, ids).
Find(&chunks).Error; err != nil {
return nil, err
}
return chunks, nil
}
// ListChunksByKnowledgeID lists all chunks for a knowledge ID
func (r *chunkRepository) ListChunksByKnowledgeID(
ctx context.Context, tenantID uint, knowledgeID string,
) ([]*types.Chunk, error) {
var chunks []*types.Chunk
if err := r.db.WithContext(ctx).
Select("id, content, knowledge_id, knowledge_base_id, start_at, end_at, chunk_index, is_enabled, chunk_type, parent_chunk_id, image_info").
Where("tenant_id = ? AND knowledge_id = ? and chunk_type = ?", tenantID, knowledgeID, "text").
Order("chunk_index ASC").
Find(&chunks).Error; err != nil {
return nil, err
}
return chunks, nil
}
// ListPagedChunksByKnowledgeID lists chunks for a knowledge ID with pagination
func (r *chunkRepository) ListPagedChunksByKnowledgeID(
ctx context.Context, tenantID uint, knowledgeID string, page *types.Pagination, chunk_type []types.ChunkType,
) ([]*types.Chunk, int64, error) {
var chunks []*types.Chunk
var total int64
// First query the total count
if err := r.db.WithContext(ctx).Model(&types.Chunk{}).
Where("tenant_id = ? AND knowledge_id = ?", tenantID, knowledgeID).
Count(&total).Error; err != nil {
return nil, 0, err
}
// Then query the paginated data
if err := r.db.WithContext(ctx).
Select("id, content, knowledge_id, knowledge_base_id, start_at, end_at, chunk_index, is_enabled, chunk_type, parent_chunk_id, image_info").
Where("tenant_id = ? AND knowledge_id = ? and chunk_type in (?)", tenantID, knowledgeID, chunk_type).
Order("chunk_index ASC").
Offset(page.Offset()).
Limit(page.Limit()).
Find(&chunks).Error; err != nil {
return nil, 0, err
}
return chunks, total, nil
}
func (r *chunkRepository) ListChunkByParentID(ctx context.Context, tenantID uint, parentID string) ([]*types.Chunk, error) {
var chunks []*types.Chunk
if err := r.db.WithContext(ctx).
Where("tenant_id = ? AND parent_chunk_id = ?", tenantID, parentID).
Find(&chunks).Error; err != nil {
return nil, err
}
return chunks, nil
}
// UpdateChunk updates a chunk
func (r *chunkRepository) UpdateChunk(ctx context.Context, chunk *types.Chunk) error {
return r.db.WithContext(ctx).Save(chunk).Error
}
// DeleteChunk deletes a chunk by its ID
func (r *chunkRepository) DeleteChunk(ctx context.Context, tenantID uint, id string) error {
return r.db.WithContext(ctx).Where("tenant_id = ? AND id = ?", tenantID, id).Delete(&types.Chunk{}).Error
}
// DeleteChunksByKnowledgeID deletes all chunks for a knowledge ID
func (r *chunkRepository) DeleteChunksByKnowledgeID(ctx context.Context, tenantID uint, knowledgeID string) error {
return r.db.WithContext(ctx).Where(
"tenant_id = ? AND knowledge_id = ?", tenantID, knowledgeID,
).Delete(&types.Chunk{}).Error
}
// DeleteByKnowledgeList deletes all chunks for a knowledge list
func (r *chunkRepository) DeleteByKnowledgeList(ctx context.Context, tenantID uint, knowledgeIDs []string) error {
return r.db.WithContext(ctx).Where(
"tenant_id = ? AND knowledge_id in ?", tenantID, knowledgeIDs,
).Delete(&types.Chunk{}).Error
}