add: 新增Word文件分析服务及路由

This commit is contained in:
renzhiyuan 2026-01-29 10:58:18 +08:00
parent e5bbddd58d
commit 27b7191865
10 changed files with 588 additions and 0 deletions

View File

@ -0,0 +1,50 @@
package third_party
import (
"context"
"time"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
type Hsyq struct {
mapClient map[string]*arkruntime.Client
}
func NewHsyq() *Hsyq {
return &Hsyq{
mapClient: make(map[string]*arkruntime.Client),
}
}
func (h *Hsyq) getClient(key string) *arkruntime.Client {
var client *arkruntime.Client
if _, ok := h.mapClient[key]; !ok {
client = h.mapClient[key]
} else {
client = arkruntime.NewClientWithApiKey(
key,
arkruntime.WithRegion("cn-beijing"),
arkruntime.WithTimeout(2*time.Minute),
arkruntime.WithRetryTimes(2),
)
h.mapClient[key] = client
}
return client
}
// 火山引擎
func (h *Hsyq) RequestHsyq(ctx context.Context, key string, modelName string, prompt []*model.ChatCompletionMessage) (model.ChatCompletionResponse, error) {
req := model.CreateChatCompletionRequest{
Model: modelName,
Messages: prompt,
}
resp, err := h.getClient(key).CreateChatCompletion(ctx, req)
if err != nil {
return model.ChatCompletionResponse{ID: ""}, err
}
return resp, err
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceAdvicerImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceAdvicerImplImpl(db *utils.Db) *AdviceAdvicerImpl {
return &AdviceAdvicerImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceAdvicer)),
}
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceProjectImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceProjectImpl(db *utils.Db) *AdviceProjectImpl {
return &AdviceProjectImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceProject)),
}
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceTalkImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceTalkImpl(db *utils.Db) *AdviceTalkImpl {
return &AdviceTalkImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceTalk)),
}
}

View File

@ -0,0 +1,33 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceAdvicer = "ai_advice_advicer"
// AiAdviceAdvicer mapped from table <ai_advice_advicer>
type AiAdviceAdvicer struct {
AdvicerID int32 `gorm:"column:advicer_id;primaryKey;autoIncrement:true" json:"advicer_id"`
Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名
Birth time.Time `gorm:"column:birth;not null;comment:用户名称" json:"birth"` // 用户名称
Gender int32 `gorm:"column:gender;not null;comment:1:男2女" json:"gender"` // 1:男2
WorkingYears int32 `gorm:"column:working_years;not null;default:1;comment:工作年限" json:"working_years"` // 工作年限
ContactTags string `gorm:"column:contact_tags;not null;comment:联系方式" json:"contact_tags"` // 联系方式
NativeRegion string `gorm:"column:native_region;not null;comment:籍贯" json:"native_region"` // 籍贯
DialectFeatures string `gorm:"column:dialect_features;not null;comment:语言风格" json:"dialect_features"` // 语言风格
SentencePatterns string `gorm:"column:sentence_patterns;comment:句子模式" json:"sentence_patterns"` // 句子模式
ToneTags string `gorm:"column:tone_tags;comment:语气标签" json:"tone_tags"` // 语气标签
PersonalityTags string `gorm:"column:personality_tags;not null;comment:个性标签" json:"personality_tags"` // 个性标签
SignatureDialogues string `gorm:"column:signature_dialogues;comment:代表性对话示例" json:"signature_dialogues"` // 代表性对话示例
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceAdvicer's table name
func (*AiAdviceAdvicer) TableName() string {
return TableNameAiAdviceAdvicer
}

View File

@ -0,0 +1,28 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceProject = "ai_advice_project"
// AiAdviceProject mapped from table <ai_advice_project>
type AiAdviceProject struct {
ProjectID int32 `gorm:"column:project_id;primaryKey;autoIncrement:true" json:"project_id"`
Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名
RegionValue string `gorm:"column:region_value;comment:区域价值话术库" json:"region_value"` // 区域价值话术库
CompetitionComparison string `gorm:"column:competition_comparison;comment:竞品对比话术" json:"competition_comparison"` // 竞品对比话术
CoreSellingPoints string `gorm:"column:core_selling_points;comment:项目核心卖点" json:"core_selling_points"` // 项目核心卖点
SupportingFacilities string `gorm:"column:supporting_facilities;comment:配套体系" json:"supporting_facilities"` // 配套体系
DeveloperBacking string `gorm:"column:developer_backing;comment:开发商背书" json:"developer_backing"` // 开发商背书
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceProject's table name
func (*AiAdviceProject) TableName() string {
return TableNameAiAdviceProject
}

View File

@ -0,0 +1,27 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceTalk = "ai_advice_talk"
// AiAdviceTalk mapped from table <ai_advice_talk>
type AiAdviceTalk struct {
TalkID int32 `gorm:"column:talk_id;primaryKey;autoIncrement:true" json:"talk_id"`
NeedsMining string `gorm:"column:needs_mining;comment:需求挖掘话术" json:"needs_mining"` // 需求挖掘话术
PainPointResponse string `gorm:"column:pain_point_response;comment:痛点应对策略" json:"pain_point_response"` // 痛点应对策略
ValueBuilding string `gorm:"column:value_building;comment:价值塑造技巧" json:"value_building"` // 价值塑造技巧
ClosingTechniques string `gorm:"column:closing_techniques;comment:促单话术" json:"closing_techniques"` // 促单话术
CommunicationRhythm string `gorm:"column:communication_rhythm;comment:沟通节奏控制" json:"communication_rhythm"` // 沟通节奏控制
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceTalk's table name
func (*AiAdviceTalk) TableName() string {
return TableNameAiAdviceTalk
}

135
internal/entitys/advicer.go Normal file
View File

@ -0,0 +1,135 @@
package entitys
type WordAnaReq struct {
WordFileUrl string `json:"word_file_url"`
}
// -------顾问
// DialectFeatures 方言特征
type DialectFeatures struct {
Region string `json:"region"` //方言使用程度
Intensity float64 `json:"intensity"` // 方言使用强度0-1
KeyWords []string `json:"KeyWords"`
}
func (e *DialectFeatures) Example() string {
return `{"region":"四川成都话","intensity":0.4,"key_words":["噻","要得","没得","不晓得","是不是","对的嘛","好嘛","晓得嘛","真的","正儿八经","说实话"]}`
}
// SentencePatterns 句子模式
type SentencePatterns struct {
OpeningMode []string `json:"openingMode"` //开场模式
ExplanationMode []string `json:"explanationMode"` //解释模式
ConfirmationMode []string `json:"confirmationMode"` //确认模式
SummaryMode []string `json:"summaryMode"` //总结模式
TransitionMode []string `json:"transitionMode"` //过渡模式
}
func (e *SentencePatterns) Example() string {
return `{"openingMode":["我是你们的职业顾问","我给你介绍一下","我们先来看一下"],"explanationMode":["是这样的","我跟你讲","因为...所以...","你发现没得"],"confirmationMode":["对吧?","是不是嘛?","你晓得不?","明白了噻?"],"summaryMode":["所以说","总结一下哈","简单说就是"],"transitionMode":["然后的话","再其次","除此之外","还有一点"]}`
}
// PersonalityTags 个性标签
type PersonalityTags []string
func (e *PersonalityTags) Example() string {
return `["耐心细致","本地通","数据控","关系型","细节控","乐观积极","诚实可信"]`
}
// ToneTags 语气标签
type ToneTags struct {
Enthusiasm float64 `json:"enthusiasm"`
Patience float64 `json:"patience"`
Confidence float64 `json:"confidence"`
Friendliness float64 `json:"friendliness"`
Persuasion float64 `json:"persuasion"`
}
func (e *ToneTags) Example() string {
return `{"enthusiasm":0.8,"patience":0.9,"confidence":0.85,"friendliness":0.75,"persuasion":0.7}`
}
// SignatureDialogues 代表性对话示例
type SignatureDialogues struct {
Context string `json:"context"`
Dialogue string `json:"dialogue"` //解释
}
func (e *SignatureDialogues) Example() string {
return `[{"context":"客户质疑地块大小","dialogue":"哥14亩确实不大但你要在成都是2.5环内城买房这种是个普遍存在的一个现象。你看万景和绿城都是13亩中铁建只有8.8亩339那个帮泰只有11亩。我们虽然地小但楼间距开阔啊看过去都是200多米"},{"context":"客户担心物业费高","dialogue":"姐我懂你意思我们也觉得物业费是有点贵。但招商物业是铂金服务有管家送外卖、免费宠物喂养这些增值服务。你算一下就算贵一块钱十年也就多14000但好物业让房子增值不止这点"},{"context":"客户犹豫价格","dialogue":"说实话这个地段的地价都比28板块贵5000多但我们单价只贵3000。你看龙湖滨江云河颂套内单价都36000了我们才33000真的性价比高现在不买以后这个板块可能就买不起了。"},{"context":"介绍项目优势","dialogue":"我跟你讲我们项目就三个核心地段在2.5环内槐树店板块,产品是全玻璃幕墙+三层中空玻璃价格是板块最低门槛。花400多万买这里真的是可上可下"},{"context":"处理客户异议","dialogue":"我懂你的担心很多客户刚开始也这样想。但你看嘛我们旁边那块48亩地还没拍以后肯定也是大品牌开发商来做豪宅。到时候我们这边全是高端盘价格只会往上走"}]`
}
// -------项目
// RegionValue 区域价值话术库
type RegionValue map[string]string
func (e *RegionValue) Example() string {
return `{"区位层级":["成华区2.5环内侧,这个位置真的稀缺","槐树店板块现在是成华区的number one板块","北接三板桥商圈,西靠万象城,东临火车东站","属于淮舜板块,万象城东的核心位置"],"地价论证":["我们地价19500华晨府20400棕榈也是2万+","2.5环内现在地价没有低于19000的","面粉贵了,面包不可能便宜"],"板块热度":["从21年新希望锦麟一品开始这边全是高端盘","龙湖最高端的滨江系列在这里,新希望的锦麟系列也在这里","各大品牌开发商争相恐后都在这边拿地"],"发展规划":["槐树店板块是棋盘成钢之后第二个富人区","整个板块都是300万到900万的总价段","未来全是改善型住宅,没有刚需盘"]}`
}
// CompetitionComparison 竞品对比话术
type CompetitionComparison map[string]string
func (e *CompetitionComparison) Example() string {
return `{"龙湖滨江云河颂":{"优点承认":"龙湖位置确实好,看沙河公园","价格对比":"他们单价32000-35000但得房率只有95%套内算下来36000+","优势突出":"我们得房率118平实得132平套内单价才33000"},"邦泰云锦":{"定位相似":"邦泰也是首个项目,要打造口碑","价格参考":"他们当时12800拿地现在卖34000","品质对比":"我们外立面全玻璃幕墙比他们成本高30%"},"招商景程序":{"板块差异":"他们在28板块我们是槐树店板块","地价对比":"他们地价13000左右我们19500","价值判断":"地价比我们便宜6000但单价差不多我们更值"},"万景/绿城":{"地块对比":"他们都是13-15亩我们14亩差不多","楼间距":"他们楼间距只有30米我们有200多米","产品力":"我们全屋木饰面交付,他们很多是简装"}}`
}
// CoreSellingPoints 竞品对比话术
type CoreSellingPoints map[string]string
func (e *CoreSellingPoints) Example() string {
return `{"规划设计":{"楼间距":"一号楼看公园280米二号楼看邦泰190米三号楼间距71米","布局":"L型布局最大化利用公园景观","容积率":"2.0,在主城区非常低","绿化率":"35%加公园绿化实际超过50%"},"建筑品质":{"外立面":"全玻璃幕墙+铝单板,浅金色铝板","玻璃":"三层中空氩气玻璃(非双层),成本高一倍","层高":"3.2米层高豪宅标准很多盘只有3.05米)","架空层":"6米挑高全架空设计"},"户型产品":{"118户型":"118平做四房三卫成都唯一实得132平","140户型":"270度全景舱两面采光","得房率":"赠送多,实得率高","功能设计":"动静分区,双套房设计"},"装修标准":{"三大件":"菲斯曼地暖,日立中央空调,霍尼韦尔新风","厨房":"方太Y9烟灶20套洗碗机安吉尔净水器","卫浴":"高仪全套,无缝拼接台盆","地面":"简一大理石瓷砖,无缝拼接","墙面":"全屋木饰面,部分硬质软包"}}`
}
// SupportingFacilities 配套体系
type SupportingFacilities map[string]string
func (e *SupportingFacilities) Example() string {
return `{"交通配套":{"地铁":"双店路站350米7号线槐树店站550米4号线未来12号线","道路":"中环路、成洛大道到春熙路5个站","通达性":"到火车东站2个站到华西30分钟内"},"商业配套":{"高端商圈":"万象城1.6公里三板桥1.8公里","社区商业":"成华奥园广场、十里风荷底商","未来商业":"上东里商业(明年开业,有永辉超市)"},"生态配套":{"公园体系":"楼下40亩槐树店公园400米沙河公园700米多宝寺公园","绿道系统":"沿沙河跑到塔山公园20多公里","环境优势":"2.5环内唯一有300亩生态带的楼盘"},"教育配套":{"幼儿园":"楼下公立幼儿园明年9月招生","小学":"城市附小锦汇东城(成华区生源最好的学校)","生源优势":"周边新盘都是300万+,生源纯粹"},"医疗配套":{"三甲医院":"市六医院、市二医院3公里内","顶尖医疗":"华西医院锦江院区30分钟车程","便利性":"到华西本部也是30分钟内"}}`
}
// DeveloperBacking 开发商背书
type DeveloperBacking map[string]string
func (e *DeveloperBacking) Example() string {
return `{"公司实力":"中信资产,多元化民营企业","资金安全":"在河南渑池有铝土矿每年稳定收入10亿","开发经验":"宜宾有5个项目贵州2个成都是首个项目","合作方":"招商铂金物业,首次与外部企业合作"}`
}
// -------销售话术
// NeedsMining 需求挖掘话术
type NeedsMining map[string]string
func (e *NeedsMining) Example() string {
return `{"预算需求":["你们总价想控制在多少以内?","是考虑按揭还是一次性?","月供能接受多少范围?"],"居住需求":["几个人住?有老人小孩吗?","主要是自住还是考虑投资?","现在住哪里?想改善哪些方面?"],"通勤需求":["在哪个位置上班?","主要开车还是坐地铁?","对地铁距离有要求吗?"],"偏好需求":["更看重地段还是产品本身?","喜欢安静的还是热闹的?","对楼层、朝向有偏好吗?"],"时间需求":["打算什么时候入住?","现在看到什么阶段了?","决策需要跟家人商量吗?"]}`
}
// PainPointResponse 痛点应对策略
type PainPointResponse map[string]string
func (e *PainPointResponse) Example() string {
return `{"地块太小":{"承认事实":"14亩确实不大","普遍现象":"2.5环内都是小地块万景13亩中铁建8.8亩","转化优势":"但人少安静,楼间距反而更开阔","对比竞品":"339的邦泰才11亩人家上千万豪宅"},"物业费高":{"理解感受":"我懂你,我们也觉得有点贵","价值分析":"但6块里3块是增值服务保洁、送外卖、宠物喂养","价格补贴":"前三年补贴到5块跟其他盘差不多","保值论证":"好物业让房子增值,紫东星座就是例子"},"开发商不知名":{"坦诚相告":"没听说过很正常,我们是成都首个项目","实力展示":"但中信主业是矿产房地产只占20%,资金安全","案例对比":"邦泰、奥兰刚开始也没人知道,现在都认可了","品质承诺":"首个项目更要做好口碑,不计成本打造"},"周边有社区用地":{"明确规划":"那边是社区服务中心,最多三层楼","距离保证":"离我们有100米不影响采光","未来价值":"社区配套齐全,生活更方便","对比安慰":"总比修高楼挡光好嘛"},"价格偏高":{"地段价值":"地段值这个价2.5环内没得选","产品价值":"全玻璃幕墙、3.2米层高,成本就高","比较价值":"比龙湖、邦泰单价都低","门槛价值":"板块最低总价,以后更买不起"}}`
}
// ValueBuilding 价值塑造技巧
type ValueBuilding map[string]string
func (e *ValueBuilding) Example() string {
return `{"地段价值塑造":["买房最重要的是地段、地段、还是地段","核心地段的核心资产才保值增值","2.5环内的地卖一块少一块,不可再生"],"产品价值塑造":["我们是用改善的价格,买豪宅的标准","很多细节都是3000万豪宅才有的配置","外立面成本比竞品高30%,但单价差不多"],"稀缺性塑造":["整个槐树店板块我们是唯一有400万以下产品的","118四房三卫全成都找不出第二个","200多米楼间距主城区绝版"],"未来价值塑造":["现在垫垫脚买了,未来换房可上可下","旁边48亩地以后拍出来肯定刷新地价","板块全部交付后,城市界面会完全不一样"]}`
}
// ClosingTechniques 促单话术
type ClosingTechniques map[string]string
func (e *ClosingTechniques) Example() string {
return `{"紧迫感营造":{"时间紧迫":["今天是月底最后一天,领导有压力价格可谈","我们刚刚开盘,还有额外优惠","月底冲业绩,价格最有弹性"],"房源稀缺":["118只剩20多套了好楼层不多","这栋楼就60户卖一套少一套","特价房只有这几套,今天不定可能就没了"]},"优惠策略":{"价格优惠":["今天定的话,我可以跟领导申请额外折扣","买车位的话,总价多给两个点优惠","一次性付款再优惠一个点"],"附加价值":["送一年物业费","送品牌家电礼包","优先选车位"]},"决策推动":{"小步推进":["要不先交个小定保留房源?","可以先排个号,有优惠优先通知你","今天不定的话,我帮你留意好楼层"],"风险规避":["现在不定,下次来可能就不是这个价了","好楼层不等人,上次有客户犹豫一天就没了","月底优惠政策可能明天就没了"]},"成交确认":{"二选一":["您是选902还是1002","倾向118还是140","喜欢东边户还是西边户?"],"假设成交":["那我给您准备合同了","车位您选哪个位置?","按揭资料我带您去准备"]}}`
}
// CommunicationRhythm 沟通节奏控制
type CommunicationRhythm map[string]string
func (e *CommunicationRhythm) Example() string {
return `{"开场阶段":{"时间占比":"5%","目标":"建立关系,了解需求","关键动作":"亲切称呼,简单寒暄,确认看房重点"},"沙盘讲解":{"时间占比":"30%","目标":"建立价值认知","关键动作":"板块价值→周边配套→项目亮点→开发商介绍"},"样板间体验":{"时间占比":"35%","目标":"激发购买欲望","关键动作":"让客户亲自体验(开关窗、看视野、感受层高)"},"价格谈判":{"时间占比":"25%","目标":"促成决策","关键动作":"算价→解释优惠→对比竞品→促单"},"结束跟进":{"时间占比":"5%","目标":"建立长期联系","关键动作":"送资料,加微信,约定下次,小礼物"}}`
}

View File

@ -0,0 +1,246 @@
package file_download
import (
"bytes"
"errors"
"fmt"
"io"
"github.com/unidoc/unioffice/document"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
)
// 下载文件
func DownloadFile(url string, validFunc func(resp *http.Response) error) ([]byte, string, error) {
// 设置超时
client := &http.Client{
Timeout: 30 * time.Second,
}
// 发送请求
resp, err := client.Get(url)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
}
if validFunc != nil {
err = validFunc(resp)
if err != nil {
return nil, "", err
}
}
// 读取文件数据
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", err
}
// 获取文件名
filename := getFilenameFromURL(url, resp)
return data, filename, nil
}
// 从 URL 或响应头获取文件名
func getFilenameFromURL(urlStr string, resp *http.Response) string {
// 1. 尝试从 Content-Disposition 头获取
contentDisposition := resp.Header.Get("Content-Disposition")
if contentDisposition != "" {
if strings.Contains(contentDisposition, "filename=") {
parts := strings.Split(contentDisposition, "filename=")
if len(parts) > 1 {
filename := strings.Trim(parts[1], `"' `)
return sanitizeFilename(filename)
}
}
}
// 2. 从 URL 路径获取
parsedURL, err := url.Parse(urlStr)
if err == nil {
path := parsedURL.Path
if path != "" {
filename := filepath.Base(path)
if filename != "" && filename != "." && filename != "/" {
return sanitizeFilename(filename)
}
}
}
// 3. 生成默认文件名
return fmt.Sprintf("word_%d.docx", time.Now().Unix())
}
// 清理文件名
func sanitizeFilename(filename string) string {
// 移除非法字符
illegalChars := []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"}
for _, char := range illegalChars {
filename = strings.ReplaceAll(filename, char, "_")
}
// 确保有扩展名
if !strings.Contains(filename, ".") {
filename += ".docx"
}
return filename
}
// 从URL获取Word文件的纯文本内容
func GetWordTextFromURL(url string, validFunc func(resp *http.Response) error) (string, string, error) {
// 1. 下载文件
data, fileName, err := DownloadFile(url, validFunc)
if err != nil {
return "", "", fmt.Errorf("下载失败: %w", err)
}
// 2. 解析Word文件
text, err := parseWordContent(data)
if err != nil {
return "", "", fmt.Errorf("解析失败: %w", err)
}
return text, fileName, nil
}
// 解析Word内容 - 简单版本,只提取文字
func parseWordContent(data []byte) (string, error) {
// 创建reader
reader := bytes.NewReader(data)
// 打开Word文档
doc, err := document.Read(reader, int64(len(data)))
if err != nil {
return "", err
}
defer doc.Close()
// 提取所有文字
var textBuilder strings.Builder
// 遍历所有段落
for _, paragraph := range doc.Paragraphs() {
// 遍历段落中的所有文本块
for _, run := range paragraph.Runs() {
textBuilder.WriteString(run.Text())
}
// 每个段落后面加换行
textBuilder.WriteString("\n")
}
// 返回清理过的文本
result := strings.TrimSpace(textBuilder.String())
return result, nil
}
// 解析 Word 文件内容
func parseWordFile(filePath string) (map[string]interface{}, error) {
ext := strings.ToLower(filepath.Ext(filePath))
result := map[string]interface{}{
"filepath": filePath,
"format": ext,
}
// 解析 .docx 文件
if ext == ".docx" {
doc, err := document.Open(filePath)
if err != nil {
return nil, err
}
defer doc.Close()
// 提取段落文本
var paragraphs []string
for _, p := range doc.Paragraphs() {
text := ""
for _, run := range p.Runs() {
text += run.Text()
}
if strings.TrimSpace(text) != "" {
paragraphs = append(paragraphs, text)
}
}
// 提取表格内容
var tables []map[string]interface{}
for _, table := range doc.Tables() {
tableData := make(map[string]interface{})
var rows []map[int]string
for _, row := range table.Rows() {
rowData := make(map[int]string)
for cellIdx, cell := range row.Cells() {
cellText := ""
for _, p := range cell.Paragraphs() {
for _, run := range p.Runs() {
cellText += run.Text()
}
}
rowData[cellIdx] = cellText
}
rows = append(rows, rowData)
}
tableData["rows"] = rows
tableData["row_count"] = len(rows)
tables = append(tables, tableData)
}
result["paragraphs"] = paragraphs
result["tables"] = tables
result["paragraph_count"] = len(paragraphs)
result["table_count"] = len(tables)
} else if ext == ".doc" {
// 对于 .doc 文件,可能需要其他库或转换
// 这里简单读取为二进制文件
data, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
result["binary_size"] = len(data)
result["note"] = ".doc 文件需要专门的解析库"
}
// 获取文件信息
fileInfo, _ := os.Stat(filePath)
if fileInfo != nil {
result["filesize"] = fileInfo.Size()
result["modified"] = fileInfo.ModTime()
}
return result, nil
}
// 判断是否为 Word 文件
func IsWordFile(resp *http.Response) error {
contentType := resp.Header.Get("Content-Type")
wordContentTypes := []string{
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-word",
"application/octet-stream", // 有些服务器可能返回这个
}
contentType = strings.ToLower(contentType)
for _, ct := range wordContentTypes {
if strings.Contains(contentType, ct) {
return nil
}
}
return errors.New("错误的文件类型")
}

View File

@ -0,0 +1,15 @@
package services
import (
"testing"
)
func Test_WordAna(t *testing.T) {
Run()
advicer.WordAnat("https://attachment-public.oss-cn-hangzhou.aliyuncs.com/ai-scheduler/data-analytics/word/content.docx")
}
var (
advicer *AdviceService
)