package exporter import ( "database/sql" "log" "server/internal/logging" "server/internal/schema" "server/internal/utils" "strings" "time" ) type CursorSQL struct{ ds, main, mt, tsCol, pkCol string } func NewCursorSQL(ds, main string) *CursorSQL { sch := schema.Get(ds, main) mt := sch.TableName(main) ts, _ := sch.MapField(main, "create_time") pk, _ := sch.MapField(main, "order_number") return &CursorSQL{ds: ds, main: main, mt: mt, tsCol: ts, pkCol: pk} } func (c *CursorSQL) InjectSelect(base string) string { idx := strings.Index(base, " FROM ") if idx <= 0 { return base } u := strings.ToUpper(base) prefix := "SELECT " if strings.HasPrefix(u, "SELECT DISTINCT ") { prefix = "SELECT DISTINCT " } else if strings.HasPrefix(u, "SELECT SQL_NO_CACHE ") { prefix = "SELECT SQL_NO_CACHE " } return prefix + "`" + c.mt + "`." + c.tsCol + " AS __ts, `" + c.mt + "`." + c.pkCol + " AS __pk, " + base[len(prefix):] } func (c *CursorSQL) AddOrder(base string) string { return base + " ORDER BY `" + c.mt + "`." + c.tsCol + ", `" + c.mt + "`." + c.pkCol } func (c *CursorSQL) AddCursor(base string) string { u := strings.ToUpper(base) cond := " AND ((`" + c.mt + "`." + c.tsCol + ") > ? OR ((`" + c.mt + "`." + c.tsCol + ") = ? AND (`" + c.mt + "`." + c.pkCol + ") > ?))" if strings.Contains(u, " WHERE ") { return base + cond } return base + strings.TrimPrefix(cond, " AND ") } func CountRows(db *sql.DB, base string, args []interface{}) int64 { u := strings.ToUpper(base) idx := strings.Index(u, " FROM ") cut := len(base) if idx > 0 { for _, tok := range []string{" ORDER BY ", " LIMIT ", " OFFSET "} { if p := strings.Index(u[idx:], tok); p >= 0 { cp := idx + p if cp < cut { cut = cp } } } } minimal := base if idx > 0 { seg := base[idx:cut] minimal = "SELECT 1" + seg } q := "SELECT COUNT(1) FROM (" + minimal + ") AS sub" row := db.QueryRow(q, args...) var c int64 if err := row.Scan(&c); err != nil { logging.JSON("ERROR", map[string]interface{}{"event": "count_error", "error": err.Error(), "sql": q, "args": args}) log.Printf("count_error sql=%s args=%v err=%v", q, args, err) return 0 } return c } func CountRowsFast(db *sql.DB, ds, main string, filters map[string]interface{}) int64 { sch := schema.Get(ds, main) mt := sch.TableName(main) q := "SELECT COUNT(1) FROM `" + mt + "` WHERE 1=1" args := []interface{}{} addIn := func(col string, v interface{}) { switch t := v.(type) { case []interface{}: if len(t) == 0 { return } ph := make([]string, len(t)) for i := range t { ph[i] = "?" args = append(args, t[i]) } q += " AND `" + col + "` IN (" + strings.Join(ph, ",") + ")" case []string: if len(t) == 0 { return } ph := make([]string, len(t)) for i := range t { ph[i] = "?" args = append(args, t[i]) } q += " AND `" + col + "` IN (" + strings.Join(ph, ",") + ")" case []int: if len(t) == 0 { return } ph := make([]string, len(t)) for i := range t { ph[i] = "?" args = append(args, t[i]) } q += " AND `" + col + "` IN (" + strings.Join(ph, ",") + ")" case []int64: if len(t) == 0 { return } ph := make([]string, len(t)) for i := range t { ph[i] = "?" args = append(args, t[i]) } q += " AND `" + col + "` IN (" + strings.Join(ph, ",") + ")" } } for k, v := range filters { tbl, col, ok := sch.FilterColumn(k) if !ok { continue } if tbl != "order" { continue } switch k { case "creator_in": addIn(col, v) case "create_time_between": switch t := v.(type) { case []interface{}: if len(t) == 2 { q += " AND `" + col + "` BETWEEN ? AND ?" args = append(args, t[0], t[1]) } case []string: if len(t) == 2 { q += " AND `" + col + "` BETWEEN ? AND ?" args = append(args, t[0], t[1]) } } default: q += " AND `" + col + "` = ?" args = append(args, v) } } // 记录行数统计 SQL,方便排查估算问题 logging.JSON("INFO", map[string]interface{}{ "event": "count_fast_query", "datasource": ds, "main": main, "sql": q, "args": args, "filters": filters, }) row := db.QueryRow(q, args...) var c int64 if err := row.Scan(&c); err != nil { logging.JSON("ERROR", map[string]interface{}{"event": "count_fast_error", "error": err.Error(), "sql": q, "args": args}) log.Printf("count_fast_error sql=%s args=%v err=%v", q, args, err) return 0 } return c } func CountRowsFastChunked(db *sql.DB, ds, main string, filters map[string]interface{}) int64 { start := "" end := "" if v, ok := filters["create_time_between"]; ok { switch t := v.(type) { case []interface{}: if len(t) == 2 { start = utils.ToString(t[0]) end = utils.ToString(t[1]) } case []string: if len(t) == 2 { start = t[0] end = t[1] } } } if start == "" || end == "" { return CountRowsFast(db, ds, main, filters) } // 计算时间跨度(天数) layout := "2006-01-02 15:04:05" st, err1 := time.Parse(layout, start) en, err2 := time.Parse(layout, end) if err1 != nil || err2 != nil { return CountRowsFast(db, ds, main, filters) } daysDiff := int(en.Sub(st).Hours() / 24) // 优化:根据时间跨度自适应分块策略 // ≤15天: 直接查询,≤30天: 按天分片,>30天: 按月分片(30天) var ranges [][2]string if daysDiff <= 15 { // 15天内直接查询,不分片 return CountRowsFast(db, ds, main, filters) } else if daysDiff <= 30 { // 15-30天:按天分片(步长15) ranges = SplitByDays(start, end, 15) } else if daysDiff <= 90 { // 30-90天:按周分片(7天) ranges = SplitByWeeks(start, end) } else { // >90天:按月分片(30天) ranges = SplitByMonths(start, end) logging.JSON("INFO", map[string]interface{}{ "event": "count_chunked_by_months", "datasource": ds, "main": main, "days_diff": daysDiff, "chunks": len(ranges), }) } var total int64 for _, rg := range ranges { fl := map[string]interface{}{} for k, v := range filters { fl[k] = v } fl["create_time_between"] = []string{rg[0], rg[1]} total += CountRowsFast(db, ds, main, fl) } return total } // SplitByDays 按天数分割时间范围,返回多个时间区间 func SplitByDays(startStr, endStr string, stepDays int) [][2]string { layout := "2006-01-02 15:04:05" s := strings.TrimSpace(startStr) e := strings.TrimSpace(endStr) st, err1 := time.Parse(layout, s) en, err2 := time.Parse(layout, e) if err1 != nil || err2 != nil || !en.After(st) || stepDays <= 0 { return [][2]string{{s, e}} } var out [][2]string cur := st step := time.Duration(stepDays) * 24 * time.Hour for cur.Before(en) { nxt := cur.Add(step) if nxt.After(en) { nxt = en } out = append(out, [2]string{cur.Format(layout), nxt.Format(layout)}) cur = nxt } return out } // SplitByWeeks 按周(7天)分割时间范围,返回多个时间区间 func SplitByWeeks(startStr, endStr string) [][2]string { layout := "2006-01-02 15:04:05" s := strings.TrimSpace(startStr) e := strings.TrimSpace(endStr) st, err1 := time.Parse(layout, s) en, err2 := time.Parse(layout, e) if err1 != nil || err2 != nil || !en.After(st) { return [][2]string{{s, e}} } var out [][2]string cur := st weekDuration := 7 * 24 * time.Hour for cur.Before(en) { nxt := cur.Add(weekDuration) if nxt.After(en) { nxt = en } out = append(out, [2]string{cur.Format(layout), nxt.Format(layout)}) cur = nxt } return out } // SplitByMonths 按月(30天)分割时间范围,用于超長时间跨度的分块統計 func SplitByMonths(startStr, endStr string) [][2]string { layout := "2006-01-02 15:04:05" s := strings.TrimSpace(startStr) e := strings.TrimSpace(endStr) st, err1 := time.Parse(layout, s) en, err2 := time.Parse(layout, e) if err1 != nil || err2 != nil || !en.After(st) { return [][2]string{{s, e}} } var out [][2]string cur := st // 按月分片 for cur.Before(en) { // 下个月的第一天 nxt := time.Date(cur.Year(), cur.Month()+1, 1, 0, 0, 0, 0, cur.Location()) if nxt.After(en) { nxt = en } out = append(out, [2]string{cur.Format(layout), nxt.Format(layout)}) cur = nxt } return out } type RowTransform func([]string) []string type RollCallback func(path string, size int64, partRows int64) error type ProgressCallback func(totalRows int64) error func StreamWithCursor(db *sql.DB, base string, args []interface{}, cur *CursorSQL, batch int, cols []string, newWriter func() (RowWriter, error), transform RowTransform, maxRowsPerFile int64, onRoll RollCallback, onProgress ProgressCallback) (int64, []string, error) { w, err := newWriter() if err != nil { return 0, nil, err } _ = w.WriteHeader(cols) if onProgress != nil { _ = onProgress(0) } out := make([]interface{}, len(cols)+2) dest := make([]interface{}, len(cols)+2) for i := range out { dest[i] = &out[i] } var total int64 var part int64 var tick int64 files := []string{} lastTs := "" lastPk := "" for { q2 := cur.InjectSelect(base) if lastTs != "" || lastPk != "" { q2 = cur.AddCursor(q2) } q2 = cur.AddOrder(q2) + " LIMIT ?" args2 := append([]interface{}{}, args...) if lastTs != "" || lastPk != "" { args2 = append(args2, lastTs, lastTs, lastPk) } args2 = append(args2, batch) rows, e := db.Query(q2, args2...) if e != nil { logging.JSON("ERROR", map[string]interface{}{"event": "cursor_query_error", "sql": q2, "args": args2, "error": e.Error()}) log.Printf("cursor_query_error sql=%s args=%v err=%v", q2, args2, e) // fallback to LIMIT/OFFSET pagination when cursor query fails _ = rows _, _, _ = w.Close() return pagedOffset(db, base, args, batch, cols, newWriter, transform, maxRowsPerFile, onRoll, onProgress) } fetched := false for rows.Next() { fetched = true if e := rows.Scan(dest...); e != nil { rows.Close() // fallback to LIMIT/OFFSET when scan fails (likely column mismatch) logging.JSON("ERROR", map[string]interface{}{"event": "cursor_scan_error", "error": e.Error()}) log.Printf("cursor_scan_error err=%v", e) _, _, _ = w.Close() return pagedOffset(db, base, args, batch, cols, newWriter, transform, maxRowsPerFile, onRoll, onProgress) } vals := make([]string, len(cols)) for i := 0; i < len(cols); i++ { // skip the injected cursor columns (__ts, __pk) at positions 0 and 1 idx := i + 2 if b, ok := out[idx].([]byte); ok { vals[i] = string(b) } else if out[idx] == nil { vals[i] = "" } else { vals[i] = utils.ToString(out[idx]) } } if transform != nil { vals = transform(vals) } _ = w.WriteRow(vals) total++ part++ tick++ // update cursor state from injected columns lastTs = utils.ToString(out[0]) lastPk = utils.ToString(out[1]) if onProgress != nil && (tick == 1 || tick%200 == 0) { _ = onProgress(total) logging.JSON("INFO", map[string]interface{}{"event": "progress_tick", "total_rows": total}) } if part >= maxRowsPerFile { p, sz, _ := w.Close() files = append(files, p) if onRoll != nil { _ = onRoll(p, sz, part) } w, e = newWriter() if e != nil { rows.Close() return total, files, e } _ = w.WriteHeader(cols) part = 0 } } rows.Close() if !fetched { break } } p, sz, _ := w.Close() if part > 0 || len(files) == 0 { files = append(files, p) if onRoll != nil { _ = onRoll(p, sz, part) } } if onProgress != nil { _ = onProgress(total) } return total, files, nil } // pagedOffset provides a robust fallback using LIMIT/OFFSET without cursor columns func pagedOffset(db *sql.DB, base string, args []interface{}, batch int, cols []string, newWriter func() (RowWriter, error), transform RowTransform, maxRowsPerFile int64, onRoll RollCallback, onProgress ProgressCallback) (int64, []string, error) { w, err := newWriter() if err != nil { return 0, nil, err } _ = w.WriteHeader(cols) if onProgress != nil { _ = onProgress(0) } files := []string{} var total int64 var part int64 var tick int64 for off := 0; ; off += batch { q := "SELECT * FROM (" + base + ") AS sub LIMIT ? OFFSET ?" args2 := append(append([]interface{}{}, args...), batch, off) rows, e := db.Query(q, args2...) if e != nil { logging.JSON("ERROR", map[string]interface{}{"event": "offset_query_error", "sql": q, "args": args2, "error": e.Error()}) log.Printf("offset_query_error sql=%s args=%v err=%v", q, args2, e) return total, files, e } fetched := false out := make([]interface{}, len(cols)) dest := make([]interface{}, len(cols)) for i := range out { dest[i] = &out[i] } for rows.Next() { fetched = true if e := rows.Scan(dest...); e != nil { rows.Close() logging.JSON("ERROR", map[string]interface{}{"event": "offset_scan_error", "error": e.Error()}) log.Printf("offset_scan_error err=%v", e) return total, files, e } vals := make([]string, len(cols)) for i := 0; i < len(cols); i++ { if b, ok := out[i].([]byte); ok { vals[i] = string(b) } else if out[i] == nil { vals[i] = "" } else { vals[i] = utils.ToString(out[i]) } } if transform != nil { vals = transform(vals) } _ = w.WriteRow(vals) total++ part++ tick++ if onProgress != nil && (tick == 1 || tick%200 == 0) { _ = onProgress(total) logging.JSON("INFO", map[string]interface{}{"event": "progress_tick", "total_rows": total}) } if part >= maxRowsPerFile { p, sz, _ := w.Close() files = append(files, p) if onRoll != nil { _ = onRoll(p, sz, part) } w, e = newWriter() if e != nil { rows.Close() return total, files, e } _ = w.WriteHeader(cols) part = 0 } } rows.Close() if !fetched { break } } p, sz, _ := w.Close() if part > 0 || len(files) == 0 { files = append(files, p) if onRoll != nil { _ = onRoll(p, sz, part) } } if onProgress != nil { _ = onProgress(total) } return total, files, nil }