feat(api): 优化字段去重逻辑并添加标签去重处理
在exports.go中增强字段去重逻辑,确保主表字段优先于副表字段,并处理相同列名的去重,提升导出数据的准确性和一致性。同时,添加字段匹配校验以确保最终字段数量与模板一致。
This commit is contained in:
parent
3046274e24
commit
61e8cc04e7
|
|
@ -207,103 +207,141 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Normalize template fields preserving order
|
||||
normalized := make([]string, 0, len(fs))
|
||||
for _, tf := range fs {
|
||||
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
|
||||
tf = strings.Replace(tf, "order_info.", "order.", 1)
|
||||
}
|
||||
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
|
||||
tf = "order_voucher.channel_activity_id"
|
||||
}
|
||||
normalized = append(normalized, tf)
|
||||
}
|
||||
// 移除 YMT 无效字段(key批次)
|
||||
if ds == "ymt" {
|
||||
tmp := make([]string, 0, len(normalized))
|
||||
for _, tf := range normalized {
|
||||
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
|
||||
continue
|
||||
}
|
||||
tmp = append(tmp, tf)
|
||||
}
|
||||
normalized = tmp
|
||||
}
|
||||
// whitelist validation & soft removal of disallowed fields
|
||||
bad := []string{}
|
||||
filtered = make([]string, 0, len(normalized))
|
||||
for _, tf := range normalized {
|
||||
if !wl[tf] {
|
||||
bad = append(bad, tf)
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, tf)
|
||||
}
|
||||
if len(bad) > 0 {
|
||||
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
|
||||
}
|
||||
// 字段去重:移除完全重复的字段(包括主表自身的重复)
|
||||
{
|
||||
seen := make(map[string]bool)
|
||||
deduped := make([]string, 0, len(filtered))
|
||||
removed := []string{}
|
||||
for _, tf := range filtered {
|
||||
if seen[tf] {
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
}
|
||||
seen[tf] = true
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
if len(removed) > 0 {
|
||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
|
||||
}
|
||||
filtered = deduped
|
||||
}
|
||||
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
|
||||
if ds == "ymt" && (main == "order" || main == "order_info") {
|
||||
mainTableFields := make(map[string]bool)
|
||||
// 先收集主表的所有字段名
|
||||
for _, tf := range filtered {
|
||||
parts := strings.Split(tf, ".")
|
||||
if len(parts) == 2 && parts[0] == "order" {
|
||||
mainTableFields[parts[1]] = true
|
||||
}
|
||||
}
|
||||
if len(mainTableFields) > 0 {
|
||||
deduped := make([]string, 0, len(filtered))
|
||||
removed := []string{}
|
||||
for _, tf := range filtered {
|
||||
parts := strings.Split(tf, ".")
|
||||
if len(parts) == 2 {
|
||||
if parts[0] == "order" {
|
||||
// 主表字段,保留
|
||||
deduped = append(deduped, tf)
|
||||
} else {
|
||||
// 副表字段,检查是否与主表字段重复
|
||||
if mainTableFields[parts[1]] {
|
||||
// 字段名重复,移除副表字段
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
}
|
||||
// 字段名不重复,保留
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
} else {
|
||||
// 格式不正确,保留原样
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
}
|
||||
if len(removed) > 0 {
|
||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
|
||||
}
|
||||
filtered = deduped
|
||||
}
|
||||
}
|
||||
// 字段匹配校验(数量与顺序)
|
||||
if len(filtered) != len(fs) {
|
||||
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
|
||||
}
|
||||
// Normalize template fields preserving order
|
||||
normalized := make([]string, 0, len(fs))
|
||||
for _, tf := range fs {
|
||||
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
|
||||
tf = strings.Replace(tf, "order_info.", "order.", 1)
|
||||
}
|
||||
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
|
||||
tf = "order_voucher.channel_activity_id"
|
||||
}
|
||||
normalized = append(normalized, tf)
|
||||
}
|
||||
// 移除 YMT 无效字段(key批次)
|
||||
if ds == "ymt" {
|
||||
tmp := make([]string, 0, len(normalized))
|
||||
for _, tf := range normalized {
|
||||
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
|
||||
continue
|
||||
}
|
||||
tmp = append(tmp, tf)
|
||||
}
|
||||
normalized = tmp
|
||||
}
|
||||
// whitelist validation & soft removal of disallowed fields
|
||||
bad := []string{}
|
||||
filtered = make([]string, 0, len(normalized))
|
||||
for _, tf := range normalized {
|
||||
if !wl[tf] {
|
||||
bad = append(bad, tf)
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, tf)
|
||||
}
|
||||
if len(bad) > 0 {
|
||||
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
|
||||
}
|
||||
// 字段去重:移除完全重复的字段(包括主表自身的重复)
|
||||
{
|
||||
seen := make(map[string]bool)
|
||||
deduped := make([]string, 0, len(filtered))
|
||||
removed := []string{}
|
||||
for _, tf := range filtered {
|
||||
if seen[tf] {
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
}
|
||||
seen[tf] = true
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
if len(removed) > 0 {
|
||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
|
||||
}
|
||||
filtered = deduped
|
||||
}
|
||||
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
|
||||
if ds == "ymt" && (main == "order" || main == "order_info") {
|
||||
mainTableFields := make(map[string]bool)
|
||||
// 先收集主表的所有字段名
|
||||
for _, tf := range filtered {
|
||||
parts := strings.Split(tf, ".")
|
||||
if len(parts) == 2 && parts[0] == "order" {
|
||||
mainTableFields[parts[1]] = true
|
||||
}
|
||||
}
|
||||
if len(mainTableFields) > 0 {
|
||||
deduped := make([]string, 0, len(filtered))
|
||||
removed := []string{}
|
||||
for _, tf := range filtered {
|
||||
parts := strings.Split(tf, ".")
|
||||
if len(parts) == 2 {
|
||||
if parts[0] == "order" {
|
||||
// 主表字段,保留
|
||||
deduped = append(deduped, tf)
|
||||
} else {
|
||||
// 副表字段,检查是否与主表字段重复
|
||||
if mainTableFields[parts[1]] {
|
||||
// 字段名重复,移除副表字段
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
}
|
||||
// 字段名不重复,保留
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
} else {
|
||||
// 格式不正确,保留原样
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
}
|
||||
if len(removed) > 0 {
|
||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
|
||||
}
|
||||
filtered = deduped
|
||||
}
|
||||
}
|
||||
labels := FieldLabels()
|
||||
// 相同列名(中文标签)去重:如果多个表的字段共享同一列名,优先保留主表字段
|
||||
{
|
||||
labelIdx := map[string]int{}
|
||||
deduped := make([]string, 0, len(filtered))
|
||||
removed := []string{}
|
||||
for _, tf := range filtered {
|
||||
label := labels[tf]
|
||||
if label == "" {
|
||||
label = tf
|
||||
}
|
||||
parts := strings.Split(tf, ".")
|
||||
isMain := len(parts) == 2 && parts[0] == main
|
||||
if idx, ok := labelIdx[label]; ok {
|
||||
prev := deduped[idx]
|
||||
prevParts := strings.Split(prev, ".")
|
||||
prevMain := len(prevParts) == 2 && prevParts[0] == main
|
||||
switch {
|
||||
case prevMain:
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
case isMain:
|
||||
removed = append(removed, prev)
|
||||
deduped[idx] = tf
|
||||
continue
|
||||
default:
|
||||
removed = append(removed, tf)
|
||||
continue
|
||||
}
|
||||
}
|
||||
labelIdx[label] = len(deduped)
|
||||
deduped = append(deduped, tf)
|
||||
}
|
||||
if len(removed) > 0 {
|
||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_by_label", "removed": removed, "reason": "同名列优先保留主表字段"})
|
||||
}
|
||||
filtered = deduped
|
||||
}
|
||||
// 字段匹配校验(数量与顺序)
|
||||
if len(filtered) != len(fs) {
|
||||
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
|
||||
}
|
||||
// relax: creator_in 非必填,若权限中提供其他边界将被合并为等值过滤
|
||||
req := exporter.BuildRequest{MainTable: main, Datasource: ds, Fields: filtered, Filters: p.Filters}
|
||||
q, args, err := rrepo.Build(req, wl)
|
||||
|
|
@ -329,7 +367,6 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
var estimate int64
|
||||
estimate = rrepo.EstimateFastChunked(dataDB, ds, main, p.Filters)
|
||||
labels := FieldLabels()
|
||||
hdrs := make([]string, len(filtered))
|
||||
for i, tf := range filtered {
|
||||
if v, ok := labels[tf]; ok {
|
||||
|
|
@ -347,7 +384,7 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
|||
for i := range hdrs {
|
||||
if cnt[hdrs[i]] > 1 {
|
||||
parts := strings.Split(filtered[i], ".")
|
||||
if len(parts) == 2 && parts[0] != "order" {
|
||||
if len(parts) == 2 && parts[0] != main {
|
||||
hdrs[i] = tableLabel(parts[0]) + "." + hdrs[i]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue