结构优化与图片识别增强
This commit is contained in:
parent
7076d6a918
commit
92218ceb4d
|
|
@ -53,3 +53,21 @@ tools:
|
|||
enabled: true
|
||||
DingTalkBot:
|
||||
enabled: true
|
||||
|
||||
|
||||
default_prompt:
|
||||
img_recognize:
|
||||
system_prompt:
|
||||
'你是一个具备图像理解与用户意图分析能力的智能助手。当用户提供一张图片时,请完成以下任务:
|
||||
1.图像内容识别:
|
||||
识别并描述图片中的主要对象、场景、文字(如存在)、颜色、布局等基本信息。
|
||||
如果图片中包含表格、图表、二维码、标志(Logo)、菜单、票据等内容,请特别指出。
|
||||
2. 关键信息提取:
|
||||
提取出图片中对用户可能有用的关键信息(例如金额、日期、标题、编号、联系信息、商品名称等)。
|
||||
若图片为文档类(如合同、发票、收据),请结构化输出关键字段(如客户名称、金额、开票日期等)。
|
||||
3.用户需求预测:
|
||||
根据图片内容和常见使用场景,推测用户可能想要执行的操作或提出的问题。
|
||||
例如:是否需要翻译图片中的文字?是否需要提取表格数据?是否需要分析图表趋势?是否需要识别某个标志的含义?
|
||||
输出你预测的 2~3 个用户可能的需求,并简要说明理由。
|
||||
'
|
||||
user_prompt: '识别图片内容, 以markdown格式输出'
|
||||
|
|
@ -110,15 +110,21 @@ func (d *Do) getImgData() (err error) {
|
|||
if len(imgs) == 0 {
|
||||
return
|
||||
}
|
||||
if err = pkg.ValidateImageURL(d.Ctx.Req.Img); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for k, img := range imgs {
|
||||
baseErr := "获取第" + strconv.Itoa(k+1) + "张图片失败:"
|
||||
entitys.ResLog(d.Ctx.Ch, "", "获取第"+strconv.Itoa(k+1)+"张图片")
|
||||
entitys.ResLog(d.Ctx.Ch, "img_get_start", "正在获取第"+strconv.Itoa(k+1)+"张图片")
|
||||
if err = pkg.ValidateImageURL(img); err != nil {
|
||||
entitys.ResLog(d.Ctx.Ch, "", baseErr+":expected image content")
|
||||
continue
|
||||
}
|
||||
req := l_request.Request{
|
||||
Method: "GET",
|
||||
Url: img,
|
||||
Headers: map[string]string{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Accept": "image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
},
|
||||
}
|
||||
res, _err := req.Send()
|
||||
if _err != nil {
|
||||
|
|
@ -135,6 +141,7 @@ func (d *Do) getImgData() (err error) {
|
|||
}
|
||||
d.Ctx.ImgByte = append(d.Ctx.ImgByte, res.Content)
|
||||
d.Ctx.ImgUrls = append(d.Ctx.ImgUrls, img)
|
||||
entitys.ResLog(d.Ctx.Ch, "img_get_end", "第"+strconv.Itoa(k+1)+"张图片获取成功")
|
||||
}
|
||||
|
||||
return
|
||||
|
|
|
|||
|
|
@ -44,15 +44,15 @@ func NewHandle(
|
|||
}
|
||||
|
||||
func (r *Handle) Recognize(ctx context.Context, requireData *entitys.RequireData) (err error) {
|
||||
entitys.ResLog(requireData.Ch, "", "准备意图识别")
|
||||
entitys.ResLog(requireData.Ch, "recognize_start", "准备意图识别")
|
||||
|
||||
//意图识别
|
||||
recognizeMsg, err := r.Ollama.IntentRecognize(ctx, requireData)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
entitys.ResLog(requireData.Ch, "", recognizeMsg)
|
||||
entitys.ResLog(requireData.Ch, "", "意图识别结束")
|
||||
entitys.ResLog(requireData.Ch, "recognize", recognizeMsg)
|
||||
entitys.ResLog(requireData.Ch, "recognize_end", "意图识别结束")
|
||||
|
||||
var match entitys.Match
|
||||
if err = json.Unmarshal([]byte(recognizeMsg), &match); err != nil {
|
||||
|
|
|
|||
|
|
@ -98,20 +98,20 @@ func (r *OllamaService) RecognizeWithImg(ctx context.Context, requireData *entit
|
|||
if requireData.ImgByte == nil {
|
||||
return
|
||||
}
|
||||
entitys.ResLog(requireData.Ch, "", "图片识别中。。。")
|
||||
entitys.ResLog(requireData.Ch, "recognize_img_start", "图片识别中...")
|
||||
|
||||
desc, err = r.client.Generation(ctx, &api.GenerateRequest{
|
||||
Model: r.config.Ollama.VlModel,
|
||||
Stream: new(bool),
|
||||
System: "完整提取出图片中的文字以及重要信息,并对用户的需求进行预测",
|
||||
Prompt: "完整提取出图片中的文字以及重要信息,并对用户的需求进行预测", //requireData.Req.Text,
|
||||
System: r.config.DefaultPrompt.ImgRecognize.SystemPrompt,
|
||||
Prompt: r.config.DefaultPrompt.ImgRecognize.UserPrompt,
|
||||
Images: requireData.ImgByte,
|
||||
KeepAlive: &api.Duration{Duration: 3600 * time.Second},
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
entitys.ResLog(requireData.Ch, "", "图片识别完成,识别内容:"+desc.Response)
|
||||
entitys.ResLog(requireData.Ch, "recognize_img_end", "图片识别完成,识别内容:"+desc.Response)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ func (r *AiRouterBiz) RouteWithSocket(c *websocket.Conn, req *entitys.ChatSockRe
|
|||
return
|
||||
}
|
||||
|
||||
//初始化通道/上下文
|
||||
//意图识别
|
||||
if err = r.handle.Recognize(ctx, dos.Ctx); err != nil {
|
||||
log.Errorf("意图识别失败: %s", err.Error())
|
||||
return
|
||||
|
|
|
|||
|
|
@ -9,16 +9,26 @@ import (
|
|||
|
||||
// Config 应用配置
|
||||
type Config struct {
|
||||
Server ServerConfig `mapstructure:"server"`
|
||||
Ollama OllamaConfig `mapstructure:"ollama"`
|
||||
Sys SysConfig `mapstructure:"sys"`
|
||||
Tools ToolsConfig `mapstructure:"tools"`
|
||||
Logging LoggingConfig `mapstructure:"logging"`
|
||||
Redis Redis `mapstructure:"redis"`
|
||||
DB DB `mapstructure:"db"`
|
||||
Server ServerConfig `mapstructure:"server"`
|
||||
Ollama OllamaConfig `mapstructure:"ollama"`
|
||||
Sys SysConfig `mapstructure:"sys"`
|
||||
Tools ToolsConfig `mapstructure:"tools"`
|
||||
Logging LoggingConfig `mapstructure:"logging"`
|
||||
Redis Redis `mapstructure:"redis"`
|
||||
DB DB `mapstructure:"db"`
|
||||
DefaultPrompt SysPrompt `mapstructure:"default_prompt"`
|
||||
// LLM *LLM `mapstructure:"llm"`
|
||||
}
|
||||
|
||||
type SysPrompt struct {
|
||||
ImgRecognize DefaultPrompt `mapstructure:"img_recognize"`
|
||||
}
|
||||
|
||||
type DefaultPrompt struct {
|
||||
SystemPrompt string `mapstructure:"system_prompt"`
|
||||
UserPrompt string `mapstructure:"user_prompt"`
|
||||
}
|
||||
|
||||
type LLM struct {
|
||||
Model string `mapstructure:"model"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,22 +35,22 @@ func ValidateImageURL(rawURL string) error {
|
|||
// 1. 基础格式验证
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid URL format: %v", err)
|
||||
return fmt.Errorf("未知的图片格式: %v", err)
|
||||
}
|
||||
|
||||
// 2. 检查协议是否为 http/https
|
||||
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||
return errors.New("URL must use http or https protocol")
|
||||
return errors.New("必须是http/https结构")
|
||||
}
|
||||
|
||||
// 3. 检查是否有空的主机名
|
||||
if parsed.Host == "" {
|
||||
return errors.New("URL missing host")
|
||||
return errors.New("未知的url地址")
|
||||
}
|
||||
|
||||
// 4. 检查路径是否为空(可选)
|
||||
// 4. 检查路径是否为空
|
||||
if strings.TrimSpace(parsed.Path) == "" {
|
||||
return errors.New("URL path is empty")
|
||||
return errors.New("url为空")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
|||
Loading…
Reference in New Issue