Compare commits

...

2 Commits

13 changed files with 585 additions and 1 deletions

1
go.mod
View File

@ -26,6 +26,7 @@ require (
github.com/gofiber/websocket/v2 v2.2.1 github.com/gofiber/websocket/v2 v2.2.1
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/google/wire v0.7.0 github.com/google/wire v0.7.0
github.com/lukasjarosch/go-docx v0.5.0
github.com/ollama/ollama v0.12.7 github.com/ollama/ollama v0.12.7
github.com/redis/go-redis/v9 v9.16.0 github.com/redis/go-redis/v9 v9.16.0
github.com/robfig/cron/v3 v3.0.1 github.com/robfig/cron/v3 v3.0.1

3
go.sum
View File

@ -327,6 +327,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lukasjarosch/go-docx v0.5.0 h1:4vU+gJ4WMdqwRvRVFF+XMw3rPfUGSXlToPJIX3mHQsQ=
github.com/lukasjarosch/go-docx v0.5.0/go.mod h1:ka/NZgDIJId48vMvcfWfduVTY7uV0/f8EgsmCjuS9X0=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
@ -582,6 +584,7 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200925080053-05aa5d4ee321/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=

View File

@ -64,7 +64,7 @@ func (a *AdviceBiz) WordAna(ctx context.Context, wordContent string) error {
func (a *AdviceBiz) callLlm(ctx context.Context, prompt string) (string, error) { func (a *AdviceBiz) callLlm(ctx context.Context, prompt string) (string, error) {
var message = make([]*model.ChatCompletionMessage, 1) var message = make([]*model.ChatCompletionMessage, 1)
message[0] = &model.ChatCompletionMessage{ message[0] = &model.ChatCompletionMessage{
Role: model.ChatMessageRoleSystem, Role: model.ChatMessageRoleUser,
Content: &model.ChatCompletionMessageContent{ Content: &model.ChatCompletionMessageContent{
StringValue: volcengine.String(prompt), StringValue: volcengine.String(prompt),
}, },

View File

@ -0,0 +1,52 @@
package third_party
import (
"context"
"time"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
)
type Hsyq struct {
mapClient map[string]*arkruntime.Client
}
func NewHsyq() *Hsyq {
return &Hsyq{
mapClient: make(map[string]*arkruntime.Client),
}
}
func (h *Hsyq) getClient(key string) *arkruntime.Client {
var client *arkruntime.Client
if _, ok := h.mapClient[key]; ok {
client = h.mapClient[key]
} else {
client = arkruntime.NewClientWithApiKey(
key,
arkruntime.WithRegion("cn-beijing"),
arkruntime.WithTimeout(2*time.Minute),
arkruntime.WithRetryTimes(2),
)
h.mapClient[key] = client
}
return client
}
// 火山引擎
func (h *Hsyq) RequestHsyq(ctx context.Context, key string, modelName string, prompt []*model.ChatCompletionMessage) (model.ChatCompletionResponse, error) {
req := model.CreateChatCompletionRequest{
Model: modelName,
Messages: prompt,
Stream: new(bool),
Thinking: &model.Thinking{Type: model.ThinkingTypeDisabled},
}
resp, err := h.getClient(key).CreateChatCompletion(ctx, req)
if err != nil {
return model.ChatCompletionResponse{ID: ""}, err
}
return resp, err
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceAdvicerImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceAdvicerImplImpl(db *utils.Db) *AdviceAdvicerImpl {
return &AdviceAdvicerImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceAdvicer)),
}
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceProjectImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceProjectImpl(db *utils.Db) *AdviceProjectImpl {
return &AdviceProjectImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceProject)),
}
}

View File

@ -0,0 +1,18 @@
package impl
import (
"ai_scheduler/internal/data/model"
"ai_scheduler/tmpl/dataTemp"
"ai_scheduler/utils"
)
type AdviceTalkImpl struct {
dataTemp.DataTemp
BaseRepository[model.AiTask]
}
func NewAdviceTalkImpl(db *utils.Db) *AdviceTalkImpl {
return &AdviceTalkImpl{
DataTemp: *dataTemp.NewDataTemp(db, new(model.AiAdviceTalk)),
}
}

View File

@ -0,0 +1,33 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceAdvicer = "ai_advice_advicer"
// AiAdviceAdvicer mapped from table <ai_advice_advicer>
type AiAdviceAdvicer struct {
AdvicerID int32 `gorm:"column:advicer_id;primaryKey;autoIncrement:true" json:"advicer_id"`
Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名
Birth time.Time `gorm:"column:birth;not null;comment:用户名称" json:"birth"` // 用户名称
Gender int32 `gorm:"column:gender;not null;comment:1:男2女" json:"gender"` // 1:男2
WorkingYears int32 `gorm:"column:working_years;not null;default:1;comment:工作年限" json:"working_years"` // 工作年限
ContactTags string `gorm:"column:contact_tags;not null;comment:联系方式" json:"contact_tags"` // 联系方式
NativeRegion string `gorm:"column:native_region;not null;comment:籍贯" json:"native_region"` // 籍贯
DialectFeatures string `gorm:"column:dialect_features;not null;comment:语言风格" json:"dialect_features"` // 语言风格
SentencePatterns string `gorm:"column:sentence_patterns;comment:句子模式" json:"sentence_patterns"` // 句子模式
ToneTags string `gorm:"column:tone_tags;comment:语气标签" json:"tone_tags"` // 语气标签
PersonalityTags string `gorm:"column:personality_tags;not null;comment:个性标签" json:"personality_tags"` // 个性标签
SignatureDialogues string `gorm:"column:signature_dialogues;comment:代表性对话示例" json:"signature_dialogues"` // 代表性对话示例
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceAdvicer's table name
func (*AiAdviceAdvicer) TableName() string {
return TableNameAiAdviceAdvicer
}

View File

@ -0,0 +1,28 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceProject = "ai_advice_project"
// AiAdviceProject mapped from table <ai_advice_project>
type AiAdviceProject struct {
ProjectID int32 `gorm:"column:project_id;primaryKey;autoIncrement:true" json:"project_id"`
Name string `gorm:"column:name;not null;comment:姓名" json:"name"` // 姓名
RegionValue string `gorm:"column:region_value;comment:区域价值话术库" json:"region_value"` // 区域价值话术库
CompetitionComparison string `gorm:"column:competition_comparison;comment:竞品对比话术" json:"competition_comparison"` // 竞品对比话术
CoreSellingPoints string `gorm:"column:core_selling_points;comment:项目核心卖点" json:"core_selling_points"` // 项目核心卖点
SupportingFacilities string `gorm:"column:supporting_facilities;comment:配套体系" json:"supporting_facilities"` // 配套体系
DeveloperBacking string `gorm:"column:developer_backing;comment:开发商背书" json:"developer_backing"` // 开发商背书
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceProject's table name
func (*AiAdviceProject) TableName() string {
return TableNameAiAdviceProject
}

View File

@ -0,0 +1,27 @@
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
// Code generated by gorm.io/gen. DO NOT EDIT.
package model
import (
"time"
)
const TableNameAiAdviceTalk = "ai_advice_talk"
// AiAdviceTalk mapped from table <ai_advice_talk>
type AiAdviceTalk struct {
TalkID int32 `gorm:"column:talk_id;primaryKey;autoIncrement:true" json:"talk_id"`
NeedsMining string `gorm:"column:needs_mining;comment:需求挖掘话术" json:"needs_mining"` // 需求挖掘话术
PainPointResponse string `gorm:"column:pain_point_response;comment:痛点应对策略" json:"pain_point_response"` // 痛点应对策略
ValueBuilding string `gorm:"column:value_building;comment:价值塑造技巧" json:"value_building"` // 价值塑造技巧
ClosingTechniques string `gorm:"column:closing_techniques;comment:促单话术" json:"closing_techniques"` // 促单话术
CommunicationRhythm string `gorm:"column:communication_rhythm;comment:沟通节奏控制" json:"communication_rhythm"` // 沟通节奏控制
CreateAt time.Time `gorm:"column:create_at;default:CURRENT_TIMESTAMP" json:"create_at"`
}
// TableName AiAdviceTalk's table name
func (*AiAdviceTalk) TableName() string {
return TableNameAiAdviceTalk
}

135
internal/entitys/advicer.go Normal file
View File

@ -0,0 +1,135 @@
package entitys
type WordAnaReq struct {
WordFileUrl string `json:"word_file_url"`
}
// -------顾问
// DialectFeatures 方言特征
type DialectFeatures struct {
Region string `json:"region"` //方言使用程度
Intensity float64 `json:"intensity"` // 方言使用强度0-1
KeyWords []string `json:"KeyWords"`
}
func (e *DialectFeatures) Example() string {
return `{"region":"四川成都话","intensity":0.4,"key_words":["噻","要得","没得","不晓得","是不是"]}`
}
// SentencePatterns 句子模式
type SentencePatterns struct {
OpeningMode []string `json:"openingMode"` //开场模式
ExplanationMode []string `json:"explanationMode"` //解释模式
ConfirmationMode []string `json:"confirmationMode"` //确认模式
SummaryMode []string `json:"summaryMode"` //总结模式
TransitionMode []string `json:"transitionMode"` //过渡模式
}
func (e *SentencePatterns) Example() string {
return `{"openingMode":["我给你介绍一下","我们先来看一下"],"explanationMode":["是这样的","我跟你讲","你发现没得"],"confirmationMode":["对吧?","是不是嘛?","你晓得不?","明白了噻?"],"summaryMode":["所以说","简单说就是"],"transitionMode":["然后的话","再其次","还有一点"]}`
}
// PersonalityTags 个性标签
type PersonalityTags []string
func (e *PersonalityTags) Example() string {
return `["耐心细致","细节控"]`
}
// ToneTags 语气标签
type ToneTags struct {
Enthusiasm float64 `json:"enthusiasm"`
Patience float64 `json:"patience"`
Confidence float64 `json:"confidence"`
Friendliness float64 `json:"friendliness"`
Persuasion float64 `json:"persuasion"`
}
func (e *ToneTags) Example() string {
return `{"enthusiasm":0.8,"patience":0.9,"confidence":0.85,"friendliness":0.75,"persuasion":0.7}`
}
// SignatureDialogues 代表性对话示例
type SignatureDialogues struct {
Context string `json:"context"`
Dialogue string `json:"dialogue"` //解释
}
func (e *SignatureDialogues) Example() string {
return `[{"context":"客户质疑地块大小","dialogue":"哥14亩确实不大但你要在成都是2.5环内城买房这种是个普遍存在的一个现象。你看万景和绿城都是13亩中铁建只有8.8亩339那个帮泰只有11亩。我们虽然地小但楼间距开阔啊看过去都是200多米"},{"context":"客户担心物业费高","dialogue":"姐我懂你意思我们也觉得物业费是有点贵。但招商物业是铂金服务有管家送外卖、免费宠物喂养这些增值服务。你算一下就算贵一块钱十年也就多14000但好物业让房子增值不止这点"},{"context":"客户犹豫价格","dialogue":"说实话这个地段的地价都比28板块贵5000多但我们单价只贵3000。你看龙湖滨江云河颂套内单价都36000了我们才33000真的性价比高现在不买以后这个板块可能就买不起了。"}]`
}
// -------项目
// RegionValue 区域价值话术库
type RegionValue map[string]string
func (e *RegionValue) Example() string {
return `{"区位层级":["成华区2.5环内侧,这个位置真的稀缺","槐树店板块现在是成华区的number one板块","北接三板桥商圈,西靠万象城,东临火车东站","属于淮舜板块,万象城东的核心位置"],"地价论证":["我们地价19500华晨府20400棕榈也是2万+","2.5环内现在地价没有低于19000的","面粉贵了,面包不可能便宜"],"板块热度":["从21年新希望锦麟一品开始这边全是高端盘","龙湖最高端的滨江系列在这里,新希望的锦麟系列也在这里","各大品牌开发商争相恐后都在这边拿地"],"发展规划":["槐树店板块是棋盘成钢之后第二个富人区","整个板块都是300万到900万的总价段","未来全是改善型住宅,没有刚需盘"]}`
}
// CompetitionComparison 竞品对比话术
type CompetitionComparison map[string]string
func (e *CompetitionComparison) Example() string {
return `{"龙湖滨江云河颂":{"优点承认":"龙湖位置确实好,看沙河公园","价格对比":"他们单价32000-35000但得房率只有95%套内算下来36000+","优势突出":"我们得房率118平实得132平套内单价才33000"},"邦泰云锦":{"定位相似":"邦泰也是首个项目,要打造口碑","价格参考":"他们当时12800拿地现在卖34000","品质对比":"我们外立面全玻璃幕墙比他们成本高30%"}}`
}
// CoreSellingPoints 竞品对比话术
type CoreSellingPoints map[string]string
func (e *CoreSellingPoints) Example() string {
return `{"龙湖滨江云河颂":{"优点承认":"龙湖位置确实好,看沙河公园","价格对比":"他们单价32000-35000但得房率只有95%套内算下来36000+","优势突出":"我们得房率118平实得132平套内单价才33000"},"邦泰云锦":{"定位相似":"邦泰也是首个项目,要打造口碑","价格参考":"他们当时12800拿地现在卖34000","品质对比":"我们外立面全玻璃幕墙比他们成本高30%"}`
}
// SupportingFacilities 配套体系
type SupportingFacilities map[string]string
func (e *SupportingFacilities) Example() string {
return `{"交通配套":{"地铁":"双店路站350米7号线槐树店站550米4号线未来12号线","道路":"中环路、成洛大道到春熙路5个站","通达性":"到火车东站2个站到华西30分钟内"},"教育配套":{"幼儿园":"楼下公立幼儿园","小学":"城市附小锦汇东城(成华区生源最好的学校)","生源优势":"周边新盘都是300万+,生源纯粹"},"医疗配套":{"三甲医院":"市六医院、市二医院3公里内","顶尖医疗":"华西医院锦江院区30分钟车程","便利性":"到华西本部也是30分钟内"}}`
}
// DeveloperBacking 开发商背书
type DeveloperBacking map[string]string
func (e *DeveloperBacking) Example() string {
return `{"公司实力":"中信资产,多元化民营企业","资金安全":"在河南渑池有铝土矿每年稳定收入10亿","开发经验":"宜宾有5个项目贵州2个成都是首个项目","合作方":"招商铂金物业,首次与外部企业合作"}`
}
// -------销售话术
// NeedsMining 需求挖掘话术
type NeedsMining map[string]string
func (e *NeedsMining) Example() string {
return `{"预算需求":["你们总价想控制在多少以内?","是考虑按揭还是一次性?","月供能接受多少范围?"],"居住需求":["几个人住?有老人小孩吗?","主要是自住还是考虑投资?","现在住哪里?想改善哪些方面?"],"通勤需求":["在哪个位置上班?","主要开车还是坐地铁?","对地铁距离有要求吗?"]}`
}
// PainPointResponse 痛点应对策略
type PainPointResponse map[string]string
func (e *PainPointResponse) Example() string {
return `{"地块太小":{"承认事实":"14亩确实不大","普遍现象":"2.5环内都是小地块万景13亩中铁建8.8亩","转化优势":"但人少安静,楼间距反而更开阔","对比竞品":"339的邦泰才11亩人家上千万豪宅"},"物业费高":{"理解感受":"我懂你,我们也觉得有点贵","价值分析":"但6块里3块是增值服务保洁、送外卖","价格补贴":"前三年补贴到5块跟其他盘差不多"}}`
}
// ValueBuilding 价值塑造技巧
type ValueBuilding map[string]string
func (e *ValueBuilding) Example() string {
return `{"地段价值塑造":["买房最重要的是地段、地段、还是地段","核心地段的核心资产才保值增值","2.5环内的地卖一块少一块,不可再生"],"产品价值塑造":["我们是用改善的价格,买豪宅的标准","很多细节都是3000万豪宅才有的配置","外立面成本比竞品高30%,但单价差不多"]}`
}
// ClosingTechniques 促单话术
type ClosingTechniques map[string]string
func (e *ClosingTechniques) Example() string {
return `{"紧迫感营造":{"时间紧迫":["今天是月底最后一天,领导有压力价格可谈","我们刚刚开盘,还有额外优惠","月底冲业绩,价格最有弹性"],"房源稀缺":["118只剩20多套了好楼层不多","这栋楼就60户卖一套少一套","特价房只有这几套,今天不定可能就没了"]},"优惠策略":{"价格优惠":["今天定的话,我可以跟领导申请额外折扣","买车位的话,总价多给两个点优惠","一次性付款再优惠一个点"],"附加价值":["送一年物业费","送品牌家电礼包","优先选车位"]},"决策推动":{"小步推进":["要不先交个小定保留房源?","可以先排个号,有优惠优先通知你","今天不定的话,我帮你留意好楼层"]}}`
}
// CommunicationRhythm 沟通节奏控制
type CommunicationRhythm map[string]string
func (e *CommunicationRhythm) Example() string {
return `{"开场阶段":{"时间占比":"5%","目标":"建立关系,了解需求","关键动作":"亲切称呼,简单寒暄,确认看房重点"},"沙盘讲解":{"时间占比":"30%","目标":"建立价值认知","关键动作":"板块价值→周边配套→项目亮点→开发商介绍"}}`
}

View File

@ -0,0 +1,236 @@
package file_download
import (
"archive/zip"
"bytes"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path/filepath"
"strings"
"time"
)
// 下载文件
func DownloadFile(url string, validFunc func(resp *http.Response) error) ([]byte, string, error) {
// 设置超时
client := &http.Client{
Timeout: 30 * time.Second,
}
// 发送请求
resp, err := client.Get(url)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
}
if validFunc != nil {
err = validFunc(resp)
if err != nil {
return nil, "", err
}
}
// 读取文件数据
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, "", err
}
// 获取文件名
filename := getFilenameFromURL(url, resp)
return data, filename, nil
}
// 从 URL 或响应头获取文件名
func getFilenameFromURL(urlStr string, resp *http.Response) string {
// 1. 尝试从 Content-Disposition 头获取
contentDisposition := resp.Header.Get("Content-Disposition")
if contentDisposition != "" {
if strings.Contains(contentDisposition, "filename=") {
parts := strings.Split(contentDisposition, "filename=")
if len(parts) > 1 {
filename := strings.Trim(parts[1], `"' `)
return sanitizeFilename(filename)
}
}
}
// 2. 从 URL 路径获取
parsedURL, err := url.Parse(urlStr)
if err == nil {
path := parsedURL.Path
if path != "" {
filename := filepath.Base(path)
if filename != "" && filename != "." && filename != "/" {
return sanitizeFilename(filename)
}
}
}
// 3. 生成默认文件名
return fmt.Sprintf("word_%d.docx", time.Now().Unix())
}
// 清理文件名
func sanitizeFilename(filename string) string {
// 移除非法字符
illegalChars := []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"}
for _, char := range illegalChars {
filename = strings.ReplaceAll(filename, char, "_")
}
// 确保有扩展名
if !strings.Contains(filename, ".") {
filename += ".docx"
}
return filename
}
// 从URL获取Word文件的纯文本内容
func GetWordTextFromURL(url string, validFunc func(resp *http.Response) error) (string, string, error) {
// 1. 下载文件
data, fileName, err := DownloadFile(url, validFunc)
if err != nil {
return "", "", fmt.Errorf("下载失败: %w", err)
}
// 2. 解析Word文件
text, err := parseWordContent(data)
if err != nil {
return "", "", fmt.Errorf("解析失败: %w", err)
}
return text, fileName, nil
}
// 解析Word内容 - 简单版本,只提取文字
func parseWordContent(data []byte) (string, error) {
reader := bytes.NewReader(data)
zipReader, err := zip.NewReader(reader, int64(len(data)))
if err != nil {
return "", fmt.Errorf("解压docx失败: %v", err)
}
var textBuilder strings.Builder
// 遍历 ZIP 文件中的文件
for _, file := range zipReader.File {
// 只处理文档主体文件
if file.Name == "word/document.xml" {
rc, err := file.Open()
if err != nil {
return "", fmt.Errorf("打开文档文件失败: %v", err)
}
defer rc.Close()
// 读取 XML 内容
xmlData, err := io.ReadAll(rc)
if err != nil {
return "", fmt.Errorf("读取XML失败: %v", err)
}
// 提取文本
text, err := parseWordXML(xmlData)
if err != nil {
return "", fmt.Errorf("解析XML失败: %v", err)
}
textBuilder.WriteString(text)
break // 找到主文档后退出循环
}
}
return textBuilder.String(), nil
}
// 解析 Word XML 文档
func parseWordXML(xmlData []byte) (string, error) {
type WordDocument struct {
XMLName xml.Name `xml:"document"`
Body struct {
Paragraphs []struct {
Runs []struct {
Text string `xml:"t"`
} `xml:"r"`
} `xml:"p"`
} `xml:"body"`
}
var doc WordDocument
if err := xml.Unmarshal(xmlData, &doc); err != nil {
// 尝试简化解析
return extractTextSimple(xmlData), nil
}
var textBuilder strings.Builder
for _, para := range doc.Body.Paragraphs {
for _, run := range para.Runs {
textBuilder.WriteString(run.Text)
}
textBuilder.WriteString("\n")
}
return textBuilder.String(), nil
}
// 简化文本提取(处理更复杂的文档结构)
func extractTextSimple(xmlData []byte) string {
var textBuilder strings.Builder
// 简单提取 <w:t> 标签内容
decoder := xml.NewDecoder(bytes.NewReader(xmlData))
for {
token, err := decoder.Token()
if err == io.EOF {
break
}
if err != nil {
continue
}
if startElem, ok := token.(xml.StartElement); ok {
if startElem.Name.Local == "t" {
// 读取文本内容
if nextToken, err := decoder.Token(); err == nil {
if charData, ok := nextToken.(xml.CharData); ok {
textBuilder.WriteString(string(charData))
}
}
}
}
}
return textBuilder.String()
}
// 判断是否为 Word 文件
func IsWordFile(resp *http.Response) error {
contentType := resp.Header.Get("Content-Type")
wordContentTypes := []string{
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-word",
"application/octet-stream", // 有些服务器可能返回这个
}
contentType = strings.ToLower(contentType)
for _, ct := range wordContentTypes {
if strings.Contains(contentType, ct) {
return nil
}
}
return errors.New("错误的文件类型")
}

View File

@ -0,0 +1,15 @@
package services
import (
"testing"
)
func Test_WordAna(t *testing.T) {
Run()
advicer.WordAnat("https://attachment-public.oss-cn-hangzhou.aliyuncs.com/ai-scheduler/data-analytics/word/content.docx")
}
var (
advicer *AdviceService
)