l_ai_knowledge/internal/application/service/metric/rouge.go

73 lines
2.6 KiB
Go

package metric
import "knowlege-lsxd/internal/types"
// reference: https://github.com/dd-Rebecca/rouge
// RougeMetric implements ROUGE (Recall-Oriented Understudy for Gisting Evaluation) metrics
// for evaluating text summarization quality by comparing generated text to reference text
type RougeMetric struct {
exclusive bool // Whether to use exclusive matching mode
metric string // ROUGE metric type (e.g. "rouge-1", "rouge-l")
stats string // Statistic to return (e.g. "f", "p", "r")
}
// AvailableMetrics defines all supported ROUGE variants and their calculation functions
var AvailableMetrics = map[string]func([]string, []string, bool) map[string]float64{
"rouge-1": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeN(hyp, ref, 1, false, exclusive) // Unigram-based ROUGE
},
"rouge-2": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeN(hyp, ref, 2, false, exclusive) // Bigram-based ROUGE
},
"rouge-3": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeN(hyp, ref, 3, false, exclusive) // Trigram-based ROUGE
},
"rouge-4": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeN(hyp, ref, 4, false, exclusive) // 4-gram based ROUGE
},
"rouge-5": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeN(hyp, ref, 5, false, exclusive) // 5-gram based ROUGE
},
"rouge-l": func(hyp, ref []string, exclusive bool) map[string]float64 {
return rougeLSummaryLevel(hyp, ref, false, exclusive) // Longest common subsequence based ROUGE
},
}
// NewRougeMetric creates a new ROUGE metric calculator
func NewRougeMetric(exclusive bool, metrics, stats string) *RougeMetric {
r := &RougeMetric{
exclusive: exclusive,
metric: metrics,
stats: stats,
}
return r
}
// Compute calculates the ROUGE score between generated text and reference text
func (r *RougeMetric) Compute(metricInput *types.MetricInput) float64 {
hyps := []string{metricInput.GeneratedTexts} // Generated/hypothesis text
refs := []string{metricInput.GeneratedGT} // Reference/ground truth text
scores := 0.0
count := 0
// Calculate scores for each hypothesis-reference pair
for i := 0; i < len(hyps); i++ {
hyp := splitSentences(hyps[i]) // Split into sentences
ref := splitSentences(refs[i])
// Get appropriate ROUGE calculation function
fn := AvailableMetrics[r.metric]
sc := fn(hyp, ref, r.exclusive)
scores += sc[r.stats] // Accumulate specified statistic (f1/precision/recall)
count++
}
if count == 0 {
return 0 // Avoid division by zero
}
return scores / float64(count) // Return average score
}