l_ai_knowledge/internal/application/service/metric/common.go

135 lines
2.6 KiB
Go

package metric
import (
"regexp"
"strings"
"knowlege-lsxd/internal/types"
)
func sum(m map[string]int) int {
s := 0
for _, v := range m {
s += v
}
return s
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func abs(a int) int {
if a < 0 {
return -a
}
return a
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func splitSentences(text string) []string {
// 编译正则表达式(匹配中文句号或英文句号)
re := regexp.MustCompile(`([。.])`)
// 分割文本并保留分隔符用于定位
split := re.Split(text, -1)
var sentences []string
current := strings.Builder{}
for i, s := range split {
// 交替获取文本段和分隔符(奇数为分隔符)
if i%2 == 0 {
current.WriteString(s)
} else {
// 当遇到分隔符时,完成当前句子
if current.Len() > 0 {
sentence := strings.TrimSpace(current.String())
if sentence != "" {
sentences = append(sentences, sentence)
}
current.Reset()
}
}
}
// 处理最后一个无分隔符的文本段
if remaining := strings.TrimSpace(current.String()); remaining != "" {
sentences = append(sentences, remaining)
}
return sentences
}
func splitIntoWords(sentences []string) []string {
// 正则匹配中英文段落(中文块、英文块、其他字符)
re := regexp.MustCompile(`([\p{Han}]+)|([a-zA-Z0-9_.,!?]+)|(\p{P})`)
var tokens []string
for _, text := range sentences {
matches := re.FindAllStringSubmatch(text, -1)
for _, groups := range matches {
chineseBlock := groups[1]
englishBlock := groups[2]
punctuation := groups[3]
switch {
case chineseBlock != "": // 处理中文部分
words := types.Jieba.Cut(chineseBlock, true)
tokens = append(tokens, words...)
case englishBlock != "": // 处理英文部分
engTokens := strings.Fields(englishBlock)
tokens = append(tokens, engTokens...)
case punctuation != "": // 保留标点符号
tokens = append(tokens, punctuation)
}
}
}
return tokens
}
func ToSet[T comparable](li []T) map[T]struct{} {
res := make(map[T]struct{}, len(li))
for _, v := range li {
res[v] = struct{}{}
}
return res
}
func SliceMap[T any, Y any](li []T, fn func(T) Y) []Y {
res := make([]Y, len(li))
for i, v := range li {
res[i] = fn(v)
}
return res
}
func Hit[T comparable](li []T, set map[T]struct{}) int {
count := 0
for _, v := range li {
if _, exist := set[v]; exist {
count++
}
}
return count
}
func Fold[T any, Y any](slice []T, initial Y, f func(Y, T) Y) Y {
accumulator := initial
for _, item := range slice {
accumulator = f(accumulator, item)
}
return accumulator
}