l_ai_knowledge/internal/common/tools.go

101 lines
2.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package common
import (
"encoding/json"
"maps"
"regexp"
"slices"
"strings"
"unicode/utf8"
)
// ToInterfaceSlice converts a slice of strings to a slice of empty interfaces.
func ToInterfaceSlice[T any](slice []T) []interface{} {
interfaceSlice := make([]interface{}, len(slice))
for i, v := range slice {
interfaceSlice[i] = v
}
return interfaceSlice
}
// []string -> string, " join, space separated
func StringSliceJoin(slice []string) string {
result := make([]string, len(slice))
for i, v := range slice {
result[i] = `"` + v + `"`
}
return strings.Join(result, " ")
}
func GetAttrs[A, B any](extract func(A) B, attrs ...A) []B {
result := make([]B, len(attrs))
for i, attr := range attrs {
result[i] = extract(attr)
}
return result
}
// Deduplicate removes duplicates from a slice based on a key function
// T: the type of elements in the slice
// K: the type of key used for deduplication
func Deduplicate[T any, K comparable](keyFunc func(T) K, items ...T) []T {
seen := make(map[K]T)
for _, item := range items {
key := keyFunc(item)
if _, exists := seen[key]; !exists {
seen[key] = item
}
}
return slices.Collect(maps.Values(seen))
}
// ParseLLMJsonResponse parses a JSON response from LLM, handling cases where JSON is wrapped in code blocks.
// This is useful when LLMs return responses like:
// ```json
// {"key": "value"}
// ```
// or regular JSON responses directly.
func ParseLLMJsonResponse(content string, target interface{}) error {
// First, try to parse directly as JSON
err := json.Unmarshal([]byte(content), target)
if err == nil {
return nil
}
// If direct parsing fails, try to extract JSON from code blocks
re := regexp.MustCompile("```(?:json)?\\s*([\\s\\S]*?)```")
matches := re.FindStringSubmatch(content)
if len(matches) >= 2 {
// Extract the JSON content within the code block
jsonContent := strings.TrimSpace(matches[1])
return json.Unmarshal([]byte(jsonContent), target)
}
// If no code block found, return the original error
return err
}
// CleanInvalidUTF8 移除字符串中的非法 UTF-8 字符和 \x00
func CleanInvalidUTF8(s string) string {
var b strings.Builder
b.Grow(len(s))
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
// 非法 UTF-8 字节,跳过
i++
continue
}
if r == 0 {
// NULL 字符 \x00跳过
i += size
continue
}
b.WriteRune(r)
i += size
}
return b.String()
}