diff --git a/internal/biz/llm_service/third_party/hsyq.go b/internal/biz/llm_service/third_party/hsyq.go new file mode 100644 index 0000000..763db1d --- /dev/null +++ b/internal/biz/llm_service/third_party/hsyq.go @@ -0,0 +1,50 @@ +package third_party + +import ( + "context" + "time" + + "github.com/volcengine/volcengine-go-sdk/service/arkruntime" + "github.com/volcengine/volcengine-go-sdk/service/arkruntime/model" +) + +type Hsyq struct { + mapClient map[string]*arkruntime.Client +} + +func NewHsyq() *Hsyq { + return &Hsyq{ + mapClient: make(map[string]*arkruntime.Client), + } +} + +func (h *Hsyq) getClient(key string) *arkruntime.Client { + var client *arkruntime.Client + if _, ok := h.mapClient[key]; !ok { + client = h.mapClient[key] + } else { + client = arkruntime.NewClientWithApiKey( + key, + arkruntime.WithRegion("cn-beijing"), + arkruntime.WithTimeout(2*time.Minute), + arkruntime.WithRetryTimes(2), + ) + h.mapClient[key] = client + } + return client +} + +// 火山引擎 +func (h *Hsyq) RequestHsyq(ctx context.Context, key string, modelName string, prompt []*model.ChatCompletionMessage) (model.ChatCompletionResponse, error) { + req := model.CreateChatCompletionRequest{ + Model: modelName, + Messages: prompt, + } + + resp, err := h.getClient(key).CreateChatCompletion(ctx, req) + if err != nil { + return model.ChatCompletionResponse{ID: ""}, err + } + + return resp, err +} diff --git a/internal/data/impl/advice_advicer_impl.go b/internal/data/impl/advice_advicer_impl.go new file mode 100644 index 0000000..ffbbeaa --- /dev/null +++ b/internal/data/impl/advice_advicer_impl.go @@ -0,0 +1,18 @@ +package impl + +import ( + "ai_scheduler/internal/data/model" + "ai_scheduler/tmpl/dataTemp" + "ai_scheduler/utils" +) + +type AdviceAdvicerImpl struct { + dataTemp.DataTemp + BaseRepository[model.AiTask] +} + +func NewAdviceAdvicerImplImpl(db *utils.Db) *AdviceAdvicerImpl { + return &AdviceAdvicerImpl{ + DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceAdvicer)), + } +} diff --git a/internal/data/impl/advice_project_impl.go b/internal/data/impl/advice_project_impl.go new file mode 100644 index 0000000..ca75899 --- /dev/null +++ b/internal/data/impl/advice_project_impl.go @@ -0,0 +1,18 @@ +package impl + +import ( + "ai_scheduler/internal/data/model" + "ai_scheduler/tmpl/dataTemp" + "ai_scheduler/utils" +) + +type AdviceProjectImpl struct { + dataTemp.DataTemp + BaseRepository[model.AiTask] +} + +func NewAdviceProjectImpl(db *utils.Db) *AdviceProjectImpl { + return &AdviceProjectImpl{ + DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceProject)), + } +} diff --git a/internal/data/impl/advice_talk_impl.go b/internal/data/impl/advice_talk_impl.go new file mode 100644 index 0000000..aaf9ba3 --- /dev/null +++ b/internal/data/impl/advice_talk_impl.go @@ -0,0 +1,18 @@ +package impl + +import ( + "ai_scheduler/internal/data/model" + "ai_scheduler/tmpl/dataTemp" + "ai_scheduler/utils" +) + +type AdviceTalkImpl struct { + dataTemp.DataTemp + BaseRepository[model.AiTask] +} + +func NewAdviceTalkImpl(db *utils.Db) *AdviceTalkImpl { + return &AdviceTalkImpl{ + DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceTalk)), + } +} diff --git a/internal/data/model/ai_advice_advicer.gen.go b/internal/data/model/ai_advice_advicer.gen.go new file mode 100644 index 0000000..c525ec6 --- /dev/null +++ b/internal/data/model/ai_advice_advicer.gen.go @@ -0,0 +1,33 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package model + +import ( + "time" +) + +const TableNameAiAdviceAdvicer = "ai_advice_advicer" + +// AiAdviceAdvicer mapped from table +type AiAdviceAdvicer struct { + AdvicerID int32 `gorm:"column:advicer_id;primaryKey;autoIncrement:true" json:"advicer_id"` + Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名 + Birth time.Time `gorm:"column:birth;not null;comment:用户名称" json:"birth"` // 用户名称 + Gender int32 `gorm:"column:gender;not null;comment:1:男,2:女" json:"gender"` // 1:男,2:女 + WorkingYears int32 `gorm:"column:working_years;not null;default:1;comment:工作年限" json:"working_years"` // 工作年限 + ContactTags string `gorm:"column:contact_tags;not null;comment:联系方式" json:"contact_tags"` // 联系方式 + NativeRegion string `gorm:"column:native_region;not null;comment:籍贯" json:"native_region"` // 籍贯 + DialectFeatures string `gorm:"column:dialect_features;not null;comment:语言风格" json:"dialect_features"` // 语言风格 + SentencePatterns string `gorm:"column:sentence_patterns;comment:句子模式" json:"sentence_patterns"` // 句子模式 + ToneTags string `gorm:"column:tone_tags;comment:语气标签" json:"tone_tags"` // 语气标签 + PersonalityTags string `gorm:"column:personality_tags;not null;comment:个性标签" json:"personality_tags"` // 个性标签 + SignatureDialogues string `gorm:"column:signature_dialogues;comment:代表性对话示例" json:"signature_dialogues"` // 代表性对话示例 + CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"` +} + +// TableName AiAdviceAdvicer's table name +func (*AiAdviceAdvicer) TableName() string { + return TableNameAiAdviceAdvicer +} diff --git a/internal/data/model/ai_advice_project.gen.go b/internal/data/model/ai_advice_project.gen.go new file mode 100644 index 0000000..5829e36 --- /dev/null +++ b/internal/data/model/ai_advice_project.gen.go @@ -0,0 +1,28 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package model + +import ( + "time" +) + +const TableNameAiAdviceProject = "ai_advice_project" + +// AiAdviceProject mapped from table +type AiAdviceProject struct { + ProjectID int32 `gorm:"column:project_id;primaryKey;autoIncrement:true" json:"project_id"` + Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名 + RegionValue string `gorm:"column:region_value;comment:区域价值话术库" json:"region_value"` // 区域价值话术库 + CompetitionComparison string `gorm:"column:competition_comparison;comment:竞品对比话术" json:"competition_comparison"` // 竞品对比话术 + CoreSellingPoints string `gorm:"column:core_selling_points;comment:项目核心卖点" json:"core_selling_points"` // 项目核心卖点 + SupportingFacilities string `gorm:"column:supporting_facilities;comment:配套体系" json:"supporting_facilities"` // 配套体系 + DeveloperBacking string `gorm:"column:developer_backing;comment:开发商背书" json:"developer_backing"` // 开发商背书 + CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"` +} + +// TableName AiAdviceProject's table name +func (*AiAdviceProject) TableName() string { + return TableNameAiAdviceProject +} diff --git a/internal/data/model/ai_advice_talk.gen.go b/internal/data/model/ai_advice_talk.gen.go new file mode 100644 index 0000000..f465123 --- /dev/null +++ b/internal/data/model/ai_advice_talk.gen.go @@ -0,0 +1,27 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package model + +import ( + "time" +) + +const TableNameAiAdviceTalk = "ai_advice_talk" + +// AiAdviceTalk mapped from table +type AiAdviceTalk struct { + TalkID int32 `gorm:"column:talk_id;primaryKey;autoIncrement:true" json:"talk_id"` + NeedsMining string `gorm:"column:needs_mining;comment:需求挖掘话术" json:"needs_mining"` // 需求挖掘话术 + PainPointResponse string `gorm:"column:pain_point_response;comment:痛点应对策略" json:"pain_point_response"` // 痛点应对策略 + ValueBuilding string `gorm:"column:value_building;comment:价值塑造技巧" json:"value_building"` // 价值塑造技巧 + ClosingTechniques string `gorm:"column:closing_techniques;comment:促单话术" json:"closing_techniques"` // 促单话术 + CommunicationRhythm string `gorm:"column:communication_rhythm;comment:沟通节奏控制" json:"communication_rhythm"` // 沟通节奏控制 + CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"` +} + +// TableName AiAdviceTalk's table name +func (*AiAdviceTalk) TableName() string { + return TableNameAiAdviceTalk +} diff --git a/internal/entitys/advicer.go b/internal/entitys/advicer.go new file mode 100644 index 0000000..aaa92f1 --- /dev/null +++ b/internal/entitys/advicer.go @@ -0,0 +1,135 @@ +package entitys + +type WordAnaReq struct { + WordFileUrl string `json:"word_file_url"` +} + +// -------顾问 + +// DialectFeatures 方言特征 +type DialectFeatures struct { + Region string `json:"region"` //方言使用程度 + Intensity float64 `json:"intensity"` // 方言使用强度(0-1) + KeyWords []string `json:"KeyWords"` +} + +func (e *DialectFeatures) Example() string { + return `{"region":"四川成都话","intensity":0.4,"key_words":["噻","要得","没得","不晓得","是不是","对的嘛","好嘛","晓得嘛","真的","正儿八经","说实话"]}` +} + +// SentencePatterns 句子模式 +type SentencePatterns struct { + OpeningMode []string `json:"openingMode"` //开场模式 + ExplanationMode []string `json:"explanationMode"` //解释模式 + ConfirmationMode []string `json:"confirmationMode"` //确认模式 + SummaryMode []string `json:"summaryMode"` //总结模式 + TransitionMode []string `json:"transitionMode"` //过渡模式 +} + +func (e *SentencePatterns) Example() string { + return `{"openingMode":["我是你们的职业顾问","我给你介绍一下","我们先来看一下"],"explanationMode":["是这样的","我跟你讲","因为...所以...","你发现没得"],"confirmationMode":["对吧?","是不是嘛?","你晓得不?","明白了噻?"],"summaryMode":["所以说","总结一下哈","简单说就是"],"transitionMode":["然后的话","再其次","除此之外","还有一点"]}` +} + +// PersonalityTags 个性标签 +type PersonalityTags []string + +func (e *PersonalityTags) Example() string { + return `["耐心细致","本地通","数据控","关系型","细节控","乐观积极","诚实可信"]` +} + +// ToneTags 语气标签 +type ToneTags struct { + Enthusiasm float64 `json:"enthusiasm"` + Patience float64 `json:"patience"` + Confidence float64 `json:"confidence"` + Friendliness float64 `json:"friendliness"` + Persuasion float64 `json:"persuasion"` +} + +func (e *ToneTags) Example() string { + return `{"enthusiasm":0.8,"patience":0.9,"confidence":0.85,"friendliness":0.75,"persuasion":0.7}` +} + +// SignatureDialogues 代表性对话示例 +type SignatureDialogues struct { + Context string `json:"context"` + Dialogue string `json:"dialogue"` //解释 +} + +func (e *SignatureDialogues) Example() string { + return `[{"context":"客户质疑地块大小","dialogue":"哥,14亩确实不大,但你要在成都是2.5环内城买房,这种是个普遍存在的一个现象。你看万景和绿城都是13亩,中铁建只有8.8亩,339那个帮泰只有11亩。我们虽然地小,但楼间距开阔啊,看过去都是200多米!"},{"context":"客户担心物业费高","dialogue":"姐,我懂你意思,我们也觉得物业费是有点贵。但招商物业是铂金服务,有管家送外卖、免费宠物喂养这些增值服务。你算一下,就算贵一块钱,十年也就多14000,但好物业让房子增值不止这点!"},{"context":"客户犹豫价格","dialogue":"说实话,这个地段的地价都比28板块贵5000多,但我们单价只贵3000。你看龙湖滨江云河颂套内单价都36000了,我们才33000,真的性价比高!现在不买,以后这个板块可能就买不起了。"},{"context":"介绍项目优势","dialogue":"我跟你讲,我们项目就三个核心:地段在2.5环内槐树店板块,产品是全玻璃幕墙+三层中空玻璃,价格是板块最低门槛。花400多万买这里,真的是可上可下!"},{"context":"处理客户异议","dialogue":"我懂你的担心,很多客户刚开始也这样想。但你看嘛,我们旁边那块48亩地还没拍,以后肯定也是大品牌开发商来做豪宅。到时候我们这边全是高端盘,价格只会往上走!"}]` +} + +// -------项目 + +// RegionValue 区域价值话术库 +type RegionValue map[string]string + +func (e *RegionValue) Example() string { + return `{"区位层级":["成华区2.5环内侧,这个位置真的稀缺","槐树店板块现在是成华区的number one板块","北接三板桥商圈,西靠万象城,东临火车东站","属于淮舜板块,万象城东的核心位置"],"地价论证":["我们地价19500,华晨府20400,棕榈也是2万+","2.5环内现在地价没有低于19000的","面粉贵了,面包不可能便宜"],"板块热度":["从21年新希望锦麟一品开始,这边全是高端盘","龙湖最高端的滨江系列在这里,新希望的锦麟系列也在这里","各大品牌开发商争相恐后都在这边拿地"],"发展规划":["槐树店板块是棋盘成钢之后第二个富人区","整个板块都是300万到900万的总价段","未来全是改善型住宅,没有刚需盘"]}` +} + +// CompetitionComparison 竞品对比话术 +type CompetitionComparison map[string]string + +func (e *CompetitionComparison) Example() string { + return `{"龙湖滨江云河颂":{"优点承认":"龙湖位置确实好,看沙河公园","价格对比":"他们单价32000-35000,但得房率只有95%,套内算下来36000+","优势突出":"我们得房率118平实得132平,套内单价才33000"},"邦泰云锦":{"定位相似":"邦泰也是首个项目,要打造口碑","价格参考":"他们当时12800拿地,现在卖34000","品质对比":"我们外立面全玻璃幕墙,比他们成本高30%"},"招商景程序":{"板块差异":"他们在28板块,我们是槐树店板块","地价对比":"他们地价13000左右,我们19500","价值判断":"地价比我们便宜6000,但单价差不多,我们更值"},"万景/绿城":{"地块对比":"他们都是13-15亩,我们14亩差不多","楼间距":"他们楼间距只有30米,我们有200多米","产品力":"我们全屋木饰面交付,他们很多是简装"}}` +} + +// CoreSellingPoints 竞品对比话术 +type CoreSellingPoints map[string]string + +func (e *CoreSellingPoints) Example() string { + return `{"规划设计":{"楼间距":"一号楼看公园280米,二号楼看邦泰190米,三号楼间距71米","布局":"L型布局,最大化利用公园景观","容积率":"2.0,在主城区非常低","绿化率":"35%,加公园绿化实际超过50%"},"建筑品质":{"外立面":"全玻璃幕墙+铝单板,浅金色铝板","玻璃":"三层中空氩气玻璃(非双层),成本高一倍","层高":"3.2米层高,豪宅标准(很多盘只有3.05米)","架空层":"6米挑高,全架空设计"},"户型产品":{"118户型":"118平做四房三卫,成都唯一,实得132平","140户型":"270度全景舱,两面采光","得房率":"赠送多,实得率高","功能设计":"动静分区,双套房设计"},"装修标准":{"三大件":"菲斯曼地暖,日立中央空调,霍尼韦尔新风","厨房":"方太Y9烟灶,20套洗碗机,安吉尔净水器","卫浴":"高仪全套,无缝拼接台盆","地面":"简一大理石瓷砖,无缝拼接","墙面":"全屋木饰面,部分硬质软包"}}` +} + +// SupportingFacilities 配套体系 +type SupportingFacilities map[string]string + +func (e *SupportingFacilities) Example() string { + return `{"交通配套":{"地铁":"双店路站350米(7号线),槐树店站550米(4号线),未来12号线","道路":"中环路、成洛大道,到春熙路5个站","通达性":"到火车东站2个站,到华西30分钟内"},"商业配套":{"高端商圈":"万象城1.6公里,三板桥1.8公里","社区商业":"成华奥园广场、十里风荷底商","未来商业":"上东里商业(明年开业,有永辉超市)"},"生态配套":{"公园体系":"楼下40亩槐树店公园,400米沙河公园,700米多宝寺公园","绿道系统":"沿沙河跑到塔山公园20多公里","环境优势":"2.5环内唯一有300亩生态带的楼盘"},"教育配套":{"幼儿园":"楼下公立幼儿园(明年9月招生)","小学":"城市附小锦汇东城(成华区生源最好的学校)","生源优势":"周边新盘都是300万+,生源纯粹"},"医疗配套":{"三甲医院":"市六医院、市二医院3公里内","顶尖医疗":"华西医院锦江院区30分钟车程","便利性":"到华西本部也是30分钟内"}}` +} + +// DeveloperBacking 开发商背书 +type DeveloperBacking map[string]string + +func (e *DeveloperBacking) Example() string { + return `{"公司实力":"中信资产,多元化民营企业","资金安全":"在河南渑池有铝土矿,每年稳定收入10亿","开发经验":"宜宾有5个项目,贵州2个,成都是首个项目","合作方":"招商铂金物业,首次与外部企业合作"}` +} + +// -------销售话术 + +// NeedsMining 需求挖掘话术 +type NeedsMining map[string]string + +func (e *NeedsMining) Example() string { + return `{"预算需求":["你们总价想控制在多少以内?","是考虑按揭还是一次性?","月供能接受多少范围?"],"居住需求":["几个人住?有老人小孩吗?","主要是自住还是考虑投资?","现在住哪里?想改善哪些方面?"],"通勤需求":["在哪个位置上班?","主要开车还是坐地铁?","对地铁距离有要求吗?"],"偏好需求":["更看重地段还是产品本身?","喜欢安静的还是热闹的?","对楼层、朝向有偏好吗?"],"时间需求":["打算什么时候入住?","现在看到什么阶段了?","决策需要跟家人商量吗?"]}` +} + +// PainPointResponse 痛点应对策略 +type PainPointResponse map[string]string + +func (e *PainPointResponse) Example() string { + return `{"地块太小":{"承认事实":"14亩确实不大","普遍现象":"2.5环内都是小地块,万景13亩,中铁建8.8亩","转化优势":"但人少安静,楼间距反而更开阔","对比竞品":"339的邦泰才11亩,人家上千万豪宅"},"物业费高":{"理解感受":"我懂你,我们也觉得有点贵","价值分析":"但6块里3块是增值服务(保洁、送外卖、宠物喂养)","价格补贴":"前三年补贴到5块,跟其他盘差不多","保值论证":"好物业让房子增值,紫东星座就是例子"},"开发商不知名":{"坦诚相告":"没听说过很正常,我们是成都首个项目","实力展示":"但中信主业是矿产,房地产只占20%,资金安全","案例对比":"邦泰、奥兰刚开始也没人知道,现在都认可了","品质承诺":"首个项目更要做好口碑,不计成本打造"},"周边有社区用地":{"明确规划":"那边是社区服务中心,最多三层楼","距离保证":"离我们有100米,不影响采光","未来价值":"社区配套齐全,生活更方便","对比安慰":"总比修高楼挡光好嘛"},"价格偏高":{"地段价值":"地段值这个价,2.5环内没得选","产品价值":"全玻璃幕墙、3.2米层高,成本就高","比较价值":"比龙湖、邦泰单价都低","门槛价值":"板块最低总价,以后更买不起"}}` +} + +// ValueBuilding 价值塑造技巧 +type ValueBuilding map[string]string + +func (e *ValueBuilding) Example() string { + return `{"地段价值塑造":["买房最重要的是地段、地段、还是地段","核心地段的核心资产才保值增值","2.5环内的地卖一块少一块,不可再生"],"产品价值塑造":["我们是用改善的价格,买豪宅的标准","很多细节都是3000万豪宅才有的配置","外立面成本比竞品高30%,但单价差不多"],"稀缺性塑造":["整个槐树店板块,我们是唯一有400万以下产品的","118四房三卫,全成都找不出第二个","200多米楼间距,主城区绝版"],"未来价值塑造":["现在垫垫脚买了,未来换房可上可下","旁边48亩地以后拍出来,肯定刷新地价","板块全部交付后,城市界面会完全不一样"]}` +} + +// ClosingTechniques 促单话术 +type ClosingTechniques map[string]string + +func (e *ClosingTechniques) Example() string { + return `{"紧迫感营造":{"时间紧迫":["今天是月底最后一天,领导有压力价格可谈","我们刚刚开盘,还有额外优惠","月底冲业绩,价格最有弹性"],"房源稀缺":["118只剩20多套了,好楼层不多","这栋楼就60户,卖一套少一套","特价房只有这几套,今天不定可能就没了"]},"优惠策略":{"价格优惠":["今天定的话,我可以跟领导申请额外折扣","买车位的话,总价多给两个点优惠","一次性付款再优惠一个点"],"附加价值":["送一年物业费","送品牌家电礼包","优先选车位"]},"决策推动":{"小步推进":["要不先交个小定保留房源?","可以先排个号,有优惠优先通知你","今天不定的话,我帮你留意好楼层"],"风险规避":["现在不定,下次来可能就不是这个价了","好楼层不等人,上次有客户犹豫一天就没了","月底优惠政策可能明天就没了"]},"成交确认":{"二选一":["您是选902还是1002?","倾向118还是140?","喜欢东边户还是西边户?"],"假设成交":["那我给您准备合同了","车位您选哪个位置?","按揭资料我带您去准备"]}}` +} + +// CommunicationRhythm 沟通节奏控制 +type CommunicationRhythm map[string]string + +func (e *CommunicationRhythm) Example() string { + return `{"开场阶段":{"时间占比":"5%","目标":"建立关系,了解需求","关键动作":"亲切称呼,简单寒暄,确认看房重点"},"沙盘讲解":{"时间占比":"30%","目标":"建立价值认知","关键动作":"板块价值→周边配套→项目亮点→开发商介绍"},"样板间体验":{"时间占比":"35%","目标":"激发购买欲望","关键动作":"让客户亲自体验(开关窗、看视野、感受层高)"},"价格谈判":{"时间占比":"25%","目标":"促成决策","关键动作":"算价→解释优惠→对比竞品→促单"},"结束跟进":{"时间占比":"5%","目标":"建立长期联系","关键动作":"送资料,加微信,约定下次,小礼物"}}` +} diff --git a/internal/pkg/file_download/file_download.go b/internal/pkg/file_download/file_download.go new file mode 100644 index 0000000..26fc5c2 --- /dev/null +++ b/internal/pkg/file_download/file_download.go @@ -0,0 +1,246 @@ +package file_download + +import ( + "bytes" + "errors" + "fmt" + "io" + + "github.com/unidoc/unioffice/document" + + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "time" +) + +// 下载文件 +func DownloadFile(url string, validFunc func(resp *http.Response) error) ([]byte, string, error) { + // 设置超时 + client := &http.Client{ + Timeout: 30 * time.Second, + } + + // 发送请求 + resp, err := client.Get(url) + if err != nil { + return nil, "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status) + } + + if validFunc != nil { + err = validFunc(resp) + if err != nil { + return nil, "", err + } + } + + // 读取文件数据 + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, "", err + } + + // 获取文件名 + filename := getFilenameFromURL(url, resp) + + return data, filename, nil +} + +// 从 URL 或响应头获取文件名 +func getFilenameFromURL(urlStr string, resp *http.Response) string { + // 1. 尝试从 Content-Disposition 头获取 + contentDisposition := resp.Header.Get("Content-Disposition") + if contentDisposition != "" { + if strings.Contains(contentDisposition, "filename=") { + parts := strings.Split(contentDisposition, "filename=") + if len(parts) > 1 { + filename := strings.Trim(parts[1], `"' `) + return sanitizeFilename(filename) + } + } + } + + // 2. 从 URL 路径获取 + parsedURL, err := url.Parse(urlStr) + if err == nil { + path := parsedURL.Path + if path != "" { + filename := filepath.Base(path) + if filename != "" && filename != "." && filename != "/" { + return sanitizeFilename(filename) + } + } + } + + // 3. 生成默认文件名 + return fmt.Sprintf("word_%d.docx", time.Now().Unix()) +} + +// 清理文件名 +func sanitizeFilename(filename string) string { + // 移除非法字符 + illegalChars := []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} + for _, char := range illegalChars { + filename = strings.ReplaceAll(filename, char, "_") + } + + // 确保有扩展名 + if !strings.Contains(filename, ".") { + filename += ".docx" + } + + return filename +} + +// 从URL获取Word文件的纯文本内容 +func GetWordTextFromURL(url string, validFunc func(resp *http.Response) error) (string, string, error) { + // 1. 下载文件 + data, fileName, err := DownloadFile(url, validFunc) + if err != nil { + return "", "", fmt.Errorf("下载失败: %w", err) + } + + // 2. 解析Word文件 + text, err := parseWordContent(data) + if err != nil { + return "", "", fmt.Errorf("解析失败: %w", err) + } + + return text, fileName, nil +} + +// 解析Word内容 - 简单版本,只提取文字 +func parseWordContent(data []byte) (string, error) { + // 创建reader + reader := bytes.NewReader(data) + + // 打开Word文档 + doc, err := document.Read(reader, int64(len(data))) + if err != nil { + return "", err + } + defer doc.Close() + + // 提取所有文字 + var textBuilder strings.Builder + + // 遍历所有段落 + for _, paragraph := range doc.Paragraphs() { + // 遍历段落中的所有文本块 + for _, run := range paragraph.Runs() { + textBuilder.WriteString(run.Text()) + } + // 每个段落后面加换行 + textBuilder.WriteString("\n") + } + + // 返回清理过的文本 + result := strings.TrimSpace(textBuilder.String()) + return result, nil +} + +// 解析 Word 文件内容 +func parseWordFile(filePath string) (map[string]interface{}, error) { + ext := strings.ToLower(filepath.Ext(filePath)) + + result := map[string]interface{}{ + "filepath": filePath, + "format": ext, + } + + // 解析 .docx 文件 + if ext == ".docx" { + doc, err := document.Open(filePath) + if err != nil { + return nil, err + } + defer doc.Close() + + // 提取段落文本 + var paragraphs []string + for _, p := range doc.Paragraphs() { + text := "" + for _, run := range p.Runs() { + text += run.Text() + } + if strings.TrimSpace(text) != "" { + paragraphs = append(paragraphs, text) + } + } + + // 提取表格内容 + var tables []map[string]interface{} + for _, table := range doc.Tables() { + tableData := make(map[string]interface{}) + var rows []map[int]string + + for _, row := range table.Rows() { + rowData := make(map[int]string) + for cellIdx, cell := range row.Cells() { + cellText := "" + for _, p := range cell.Paragraphs() { + for _, run := range p.Runs() { + cellText += run.Text() + } + } + rowData[cellIdx] = cellText + } + rows = append(rows, rowData) + } + + tableData["rows"] = rows + tableData["row_count"] = len(rows) + tables = append(tables, tableData) + } + + result["paragraphs"] = paragraphs + result["tables"] = tables + result["paragraph_count"] = len(paragraphs) + result["table_count"] = len(tables) + + } else if ext == ".doc" { + // 对于 .doc 文件,可能需要其他库或转换 + // 这里简单读取为二进制文件 + data, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + result["binary_size"] = len(data) + result["note"] = ".doc 文件需要专门的解析库" + } + + // 获取文件信息 + fileInfo, _ := os.Stat(filePath) + if fileInfo != nil { + result["filesize"] = fileInfo.Size() + result["modified"] = fileInfo.ModTime() + } + + return result, nil +} + +// 判断是否为 Word 文件 +func IsWordFile(resp *http.Response) error { + contentType := resp.Header.Get("Content-Type") + wordContentTypes := []string{ + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.ms-word", + "application/octet-stream", // 有些服务器可能返回这个 + } + + contentType = strings.ToLower(contentType) + for _, ct := range wordContentTypes { + if strings.Contains(contentType, ct) { + return nil + } + } + return errors.New("错误的文件类型") +} diff --git a/internal/services/advicer_test.go b/internal/services/advicer_test.go new file mode 100644 index 0000000..bce9307 --- /dev/null +++ b/internal/services/advicer_test.go @@ -0,0 +1,15 @@ +package services + +import ( + "testing" +) + +func Test_WordAna(t *testing.T) { + Run() + + advicer.WordAnat("https://attachment-public.oss-cn-hangzhou.aliyuncs.com/ai-scheduler/data-analytics/word/content.docx") +} + +var ( + advicer *AdviceService +)