feat(api): 优化字段去重逻辑并添加标签去重处理

在exports.go中增强字段去重逻辑,确保主表字段优先于副表字段,并处理相同列名的去重,提升导出数据的准确性和一致性。同时,添加字段匹配校验以确保最终字段数量与模板一致。
This commit is contained in:
zhouyonggao 2025-12-15 15:11:01 +08:00
parent 3046274e24
commit 61e8cc04e7
1 changed files with 136 additions and 99 deletions

View File

@ -207,103 +207,141 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
}
}
}
// Normalize template fields preserving order
normalized := make([]string, 0, len(fs))
for _, tf := range fs {
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
tf = strings.Replace(tf, "order_info.", "order.", 1)
}
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
tf = "order_voucher.channel_activity_id"
}
normalized = append(normalized, tf)
}
// 移除 YMT 无效字段key批次
if ds == "ymt" {
tmp := make([]string, 0, len(normalized))
for _, tf := range normalized {
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
continue
}
tmp = append(tmp, tf)
}
normalized = tmp
}
// whitelist validation & soft removal of disallowed fields
bad := []string{}
filtered = make([]string, 0, len(normalized))
for _, tf := range normalized {
if !wl[tf] {
bad = append(bad, tf)
continue
}
filtered = append(filtered, tf)
}
if len(bad) > 0 {
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
}
// 字段去重:移除完全重复的字段(包括主表自身的重复)
{
seen := make(map[string]bool)
deduped := make([]string, 0, len(filtered))
removed := []string{}
for _, tf := range filtered {
if seen[tf] {
removed = append(removed, tf)
continue
}
seen[tf] = true
deduped = append(deduped, tf)
}
if len(removed) > 0 {
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
}
filtered = deduped
}
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
if ds == "ymt" && (main == "order" || main == "order_info") {
mainTableFields := make(map[string]bool)
// 先收集主表的所有字段名
for _, tf := range filtered {
parts := strings.Split(tf, ".")
if len(parts) == 2 && parts[0] == "order" {
mainTableFields[parts[1]] = true
}
}
if len(mainTableFields) > 0 {
deduped := make([]string, 0, len(filtered))
removed := []string{}
for _, tf := range filtered {
parts := strings.Split(tf, ".")
if len(parts) == 2 {
if parts[0] == "order" {
// 主表字段,保留
deduped = append(deduped, tf)
} else {
// 副表字段,检查是否与主表字段重复
if mainTableFields[parts[1]] {
// 字段名重复,移除副表字段
removed = append(removed, tf)
continue
}
// 字段名不重复,保留
deduped = append(deduped, tf)
}
} else {
// 格式不正确,保留原样
deduped = append(deduped, tf)
}
}
if len(removed) > 0 {
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
}
filtered = deduped
}
}
// 字段匹配校验(数量与顺序)
if len(filtered) != len(fs) {
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
}
// Normalize template fields preserving order
normalized := make([]string, 0, len(fs))
for _, tf := range fs {
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
tf = strings.Replace(tf, "order_info.", "order.", 1)
}
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
tf = "order_voucher.channel_activity_id"
}
normalized = append(normalized, tf)
}
// 移除 YMT 无效字段key批次
if ds == "ymt" {
tmp := make([]string, 0, len(normalized))
for _, tf := range normalized {
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
continue
}
tmp = append(tmp, tf)
}
normalized = tmp
}
// whitelist validation & soft removal of disallowed fields
bad := []string{}
filtered = make([]string, 0, len(normalized))
for _, tf := range normalized {
if !wl[tf] {
bad = append(bad, tf)
continue
}
filtered = append(filtered, tf)
}
if len(bad) > 0 {
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
}
// 字段去重:移除完全重复的字段(包括主表自身的重复)
{
seen := make(map[string]bool)
deduped := make([]string, 0, len(filtered))
removed := []string{}
for _, tf := range filtered {
if seen[tf] {
removed = append(removed, tf)
continue
}
seen[tf] = true
deduped = append(deduped, tf)
}
if len(removed) > 0 {
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
}
filtered = deduped
}
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
if ds == "ymt" && (main == "order" || main == "order_info") {
mainTableFields := make(map[string]bool)
// 先收集主表的所有字段名
for _, tf := range filtered {
parts := strings.Split(tf, ".")
if len(parts) == 2 && parts[0] == "order" {
mainTableFields[parts[1]] = true
}
}
if len(mainTableFields) > 0 {
deduped := make([]string, 0, len(filtered))
removed := []string{}
for _, tf := range filtered {
parts := strings.Split(tf, ".")
if len(parts) == 2 {
if parts[0] == "order" {
// 主表字段,保留
deduped = append(deduped, tf)
} else {
// 副表字段,检查是否与主表字段重复
if mainTableFields[parts[1]] {
// 字段名重复,移除副表字段
removed = append(removed, tf)
continue
}
// 字段名不重复,保留
deduped = append(deduped, tf)
}
} else {
// 格式不正确,保留原样
deduped = append(deduped, tf)
}
}
if len(removed) > 0 {
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
}
filtered = deduped
}
}
labels := FieldLabels()
// 相同列名(中文标签)去重:如果多个表的字段共享同一列名,优先保留主表字段
{
labelIdx := map[string]int{}
deduped := make([]string, 0, len(filtered))
removed := []string{}
for _, tf := range filtered {
label := labels[tf]
if label == "" {
label = tf
}
parts := strings.Split(tf, ".")
isMain := len(parts) == 2 && parts[0] == main
if idx, ok := labelIdx[label]; ok {
prev := deduped[idx]
prevParts := strings.Split(prev, ".")
prevMain := len(prevParts) == 2 && prevParts[0] == main
switch {
case prevMain:
removed = append(removed, tf)
continue
case isMain:
removed = append(removed, prev)
deduped[idx] = tf
continue
default:
removed = append(removed, tf)
continue
}
}
labelIdx[label] = len(deduped)
deduped = append(deduped, tf)
}
if len(removed) > 0 {
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_by_label", "removed": removed, "reason": "同名列优先保留主表字段"})
}
filtered = deduped
}
// 字段匹配校验(数量与顺序)
if len(filtered) != len(fs) {
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
}
// relax: creator_in 非必填,若权限中提供其他边界将被合并为等值过滤
req := exporter.BuildRequest{MainTable: main, Datasource: ds, Fields: filtered, Filters: p.Filters}
q, args, err := rrepo.Build(req, wl)
@ -329,7 +367,6 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
}
var estimate int64
estimate = rrepo.EstimateFastChunked(dataDB, ds, main, p.Filters)
labels := FieldLabels()
hdrs := make([]string, len(filtered))
for i, tf := range filtered {
if v, ok := labels[tf]; ok {
@ -347,7 +384,7 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
for i := range hdrs {
if cnt[hdrs[i]] > 1 {
parts := strings.Split(filtered[i], ".")
if len(parts) == 2 && parts[0] != "order" {
if len(parts) == 2 && parts[0] != main {
hdrs[i] = tableLabel(parts[0]) + "." + hdrs[i]
}
}