feat(api): 优化字段去重逻辑并添加标签去重处理
在exports.go中增强字段去重逻辑,确保主表字段优先于副表字段,并处理相同列名的去重,提升导出数据的准确性和一致性。同时,添加字段匹配校验以确保最终字段数量与模板一致。
This commit is contained in:
parent
3046274e24
commit
61e8cc04e7
|
|
@ -207,103 +207,141 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Normalize template fields preserving order
|
// Normalize template fields preserving order
|
||||||
normalized := make([]string, 0, len(fs))
|
normalized := make([]string, 0, len(fs))
|
||||||
for _, tf := range fs {
|
for _, tf := range fs {
|
||||||
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
|
if ds == "ymt" && strings.HasPrefix(tf, "order_info.") {
|
||||||
tf = strings.Replace(tf, "order_info.", "order.", 1)
|
tf = strings.Replace(tf, "order_info.", "order.", 1)
|
||||||
}
|
}
|
||||||
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
|
if ds == "marketing" && tf == "order_voucher.channel_batch_no" {
|
||||||
tf = "order_voucher.channel_activity_id"
|
tf = "order_voucher.channel_activity_id"
|
||||||
}
|
}
|
||||||
normalized = append(normalized, tf)
|
normalized = append(normalized, tf)
|
||||||
}
|
}
|
||||||
// 移除 YMT 无效字段(key批次)
|
// 移除 YMT 无效字段(key批次)
|
||||||
if ds == "ymt" {
|
if ds == "ymt" {
|
||||||
tmp := make([]string, 0, len(normalized))
|
tmp := make([]string, 0, len(normalized))
|
||||||
for _, tf := range normalized {
|
for _, tf := range normalized {
|
||||||
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
|
if tf == "order.key_batch_id" || tf == "order.key_batch_name" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
tmp = append(tmp, tf)
|
tmp = append(tmp, tf)
|
||||||
}
|
}
|
||||||
normalized = tmp
|
normalized = tmp
|
||||||
}
|
}
|
||||||
// whitelist validation & soft removal of disallowed fields
|
// whitelist validation & soft removal of disallowed fields
|
||||||
bad := []string{}
|
bad := []string{}
|
||||||
filtered = make([]string, 0, len(normalized))
|
filtered = make([]string, 0, len(normalized))
|
||||||
for _, tf := range normalized {
|
for _, tf := range normalized {
|
||||||
if !wl[tf] {
|
if !wl[tf] {
|
||||||
bad = append(bad, tf)
|
bad = append(bad, tf)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
filtered = append(filtered, tf)
|
filtered = append(filtered, tf)
|
||||||
}
|
}
|
||||||
if len(bad) > 0 {
|
if len(bad) > 0 {
|
||||||
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
|
logging.JSON("ERROR", map[string]interface{}{"event": "fields_not_whitelisted", "removed": bad})
|
||||||
}
|
}
|
||||||
// 字段去重:移除完全重复的字段(包括主表自身的重复)
|
// 字段去重:移除完全重复的字段(包括主表自身的重复)
|
||||||
{
|
{
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
deduped := make([]string, 0, len(filtered))
|
deduped := make([]string, 0, len(filtered))
|
||||||
removed := []string{}
|
removed := []string{}
|
||||||
for _, tf := range filtered {
|
for _, tf := range filtered {
|
||||||
if seen[tf] {
|
if seen[tf] {
|
||||||
removed = append(removed, tf)
|
removed = append(removed, tf)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[tf] = true
|
seen[tf] = true
|
||||||
deduped = append(deduped, tf)
|
deduped = append(deduped, tf)
|
||||||
}
|
}
|
||||||
if len(removed) > 0 {
|
if len(removed) > 0 {
|
||||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
|
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_exact", "removed": removed, "reason": "移除完全重复的字段"})
|
||||||
}
|
}
|
||||||
filtered = deduped
|
filtered = deduped
|
||||||
}
|
}
|
||||||
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
|
// 主表和副表相同字段去重:以主表为主,移除副表的重复字段
|
||||||
if ds == "ymt" && (main == "order" || main == "order_info") {
|
if ds == "ymt" && (main == "order" || main == "order_info") {
|
||||||
mainTableFields := make(map[string]bool)
|
mainTableFields := make(map[string]bool)
|
||||||
// 先收集主表的所有字段名
|
// 先收集主表的所有字段名
|
||||||
for _, tf := range filtered {
|
for _, tf := range filtered {
|
||||||
parts := strings.Split(tf, ".")
|
parts := strings.Split(tf, ".")
|
||||||
if len(parts) == 2 && parts[0] == "order" {
|
if len(parts) == 2 && parts[0] == "order" {
|
||||||
mainTableFields[parts[1]] = true
|
mainTableFields[parts[1]] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(mainTableFields) > 0 {
|
if len(mainTableFields) > 0 {
|
||||||
deduped := make([]string, 0, len(filtered))
|
deduped := make([]string, 0, len(filtered))
|
||||||
removed := []string{}
|
removed := []string{}
|
||||||
for _, tf := range filtered {
|
for _, tf := range filtered {
|
||||||
parts := strings.Split(tf, ".")
|
parts := strings.Split(tf, ".")
|
||||||
if len(parts) == 2 {
|
if len(parts) == 2 {
|
||||||
if parts[0] == "order" {
|
if parts[0] == "order" {
|
||||||
// 主表字段,保留
|
// 主表字段,保留
|
||||||
deduped = append(deduped, tf)
|
deduped = append(deduped, tf)
|
||||||
} else {
|
} else {
|
||||||
// 副表字段,检查是否与主表字段重复
|
// 副表字段,检查是否与主表字段重复
|
||||||
if mainTableFields[parts[1]] {
|
if mainTableFields[parts[1]] {
|
||||||
// 字段名重复,移除副表字段
|
// 字段名重复,移除副表字段
|
||||||
removed = append(removed, tf)
|
removed = append(removed, tf)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// 字段名不重复,保留
|
// 字段名不重复,保留
|
||||||
deduped = append(deduped, tf)
|
deduped = append(deduped, tf)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 格式不正确,保留原样
|
// 格式不正确,保留原样
|
||||||
deduped = append(deduped, tf)
|
deduped = append(deduped, tf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(removed) > 0 {
|
if len(removed) > 0 {
|
||||||
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
|
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated", "removed": removed, "reason": "主表和副表存在相同字段,以主表为主"})
|
||||||
}
|
}
|
||||||
filtered = deduped
|
filtered = deduped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 字段匹配校验(数量与顺序)
|
labels := FieldLabels()
|
||||||
if len(filtered) != len(fs) {
|
// 相同列名(中文标签)去重:如果多个表的字段共享同一列名,优先保留主表字段
|
||||||
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
|
{
|
||||||
}
|
labelIdx := map[string]int{}
|
||||||
|
deduped := make([]string, 0, len(filtered))
|
||||||
|
removed := []string{}
|
||||||
|
for _, tf := range filtered {
|
||||||
|
label := labels[tf]
|
||||||
|
if label == "" {
|
||||||
|
label = tf
|
||||||
|
}
|
||||||
|
parts := strings.Split(tf, ".")
|
||||||
|
isMain := len(parts) == 2 && parts[0] == main
|
||||||
|
if idx, ok := labelIdx[label]; ok {
|
||||||
|
prev := deduped[idx]
|
||||||
|
prevParts := strings.Split(prev, ".")
|
||||||
|
prevMain := len(prevParts) == 2 && prevParts[0] == main
|
||||||
|
switch {
|
||||||
|
case prevMain:
|
||||||
|
removed = append(removed, tf)
|
||||||
|
continue
|
||||||
|
case isMain:
|
||||||
|
removed = append(removed, prev)
|
||||||
|
deduped[idx] = tf
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
removed = append(removed, tf)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
labelIdx[label] = len(deduped)
|
||||||
|
deduped = append(deduped, tf)
|
||||||
|
}
|
||||||
|
if len(removed) > 0 {
|
||||||
|
logging.JSON("INFO", map[string]interface{}{"event": "fields_deduplicated_by_label", "removed": removed, "reason": "同名列优先保留主表字段"})
|
||||||
|
}
|
||||||
|
filtered = deduped
|
||||||
|
}
|
||||||
|
// 字段匹配校验(数量与顺序)
|
||||||
|
if len(filtered) != len(fs) {
|
||||||
|
logging.JSON("ERROR", map[string]interface{}{"event": "field_count_mismatch", "template_count": len(fs), "final_count": len(filtered)})
|
||||||
|
}
|
||||||
// relax: creator_in 非必填,若权限中提供其他边界将被合并为等值过滤
|
// relax: creator_in 非必填,若权限中提供其他边界将被合并为等值过滤
|
||||||
req := exporter.BuildRequest{MainTable: main, Datasource: ds, Fields: filtered, Filters: p.Filters}
|
req := exporter.BuildRequest{MainTable: main, Datasource: ds, Fields: filtered, Filters: p.Filters}
|
||||||
q, args, err := rrepo.Build(req, wl)
|
q, args, err := rrepo.Build(req, wl)
|
||||||
|
|
@ -329,7 +367,6 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
var estimate int64
|
var estimate int64
|
||||||
estimate = rrepo.EstimateFastChunked(dataDB, ds, main, p.Filters)
|
estimate = rrepo.EstimateFastChunked(dataDB, ds, main, p.Filters)
|
||||||
labels := FieldLabels()
|
|
||||||
hdrs := make([]string, len(filtered))
|
hdrs := make([]string, len(filtered))
|
||||||
for i, tf := range filtered {
|
for i, tf := range filtered {
|
||||||
if v, ok := labels[tf]; ok {
|
if v, ok := labels[tf]; ok {
|
||||||
|
|
@ -347,7 +384,7 @@ func (a *ExportsAPI) create(w http.ResponseWriter, r *http.Request) {
|
||||||
for i := range hdrs {
|
for i := range hdrs {
|
||||||
if cnt[hdrs[i]] > 1 {
|
if cnt[hdrs[i]] > 1 {
|
||||||
parts := strings.Split(filtered[i], ".")
|
parts := strings.Split(filtered[i], ".")
|
||||||
if len(parts) == 2 && parts[0] != "order" {
|
if len(parts) == 2 && parts[0] != main {
|
||||||
hdrs[i] = tableLabel(parts[0]) + "." + hdrs[i]
|
hdrs[i] = tableLabel(parts[0]) + "." + hdrs[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue