结构优化与图片识别增强
This commit is contained in:
parent
7076d6a918
commit
92218ceb4d
|
|
@ -53,3 +53,21 @@ tools:
|
||||||
enabled: true
|
enabled: true
|
||||||
DingTalkBot:
|
DingTalkBot:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|
||||||
|
|
||||||
|
default_prompt:
|
||||||
|
img_recognize:
|
||||||
|
system_prompt:
|
||||||
|
'你是一个具备图像理解与用户意图分析能力的智能助手。当用户提供一张图片时,请完成以下任务:
|
||||||
|
1.图像内容识别:
|
||||||
|
识别并描述图片中的主要对象、场景、文字(如存在)、颜色、布局等基本信息。
|
||||||
|
如果图片中包含表格、图表、二维码、标志(Logo)、菜单、票据等内容,请特别指出。
|
||||||
|
2. 关键信息提取:
|
||||||
|
提取出图片中对用户可能有用的关键信息(例如金额、日期、标题、编号、联系信息、商品名称等)。
|
||||||
|
若图片为文档类(如合同、发票、收据),请结构化输出关键字段(如客户名称、金额、开票日期等)。
|
||||||
|
3.用户需求预测:
|
||||||
|
根据图片内容和常见使用场景,推测用户可能想要执行的操作或提出的问题。
|
||||||
|
例如:是否需要翻译图片中的文字?是否需要提取表格数据?是否需要分析图表趋势?是否需要识别某个标志的含义?
|
||||||
|
输出你预测的 2~3 个用户可能的需求,并简要说明理由。
|
||||||
|
'
|
||||||
|
user_prompt: '识别图片内容, 以markdown格式输出'
|
||||||
|
|
@ -110,15 +110,21 @@ func (d *Do) getImgData() (err error) {
|
||||||
if len(imgs) == 0 {
|
if len(imgs) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err = pkg.ValidateImageURL(d.Ctx.Req.Img); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for k, img := range imgs {
|
for k, img := range imgs {
|
||||||
baseErr := "获取第" + strconv.Itoa(k+1) + "张图片失败:"
|
baseErr := "获取第" + strconv.Itoa(k+1) + "张图片失败:"
|
||||||
entitys.ResLog(d.Ctx.Ch, "", "获取第"+strconv.Itoa(k+1)+"张图片")
|
entitys.ResLog(d.Ctx.Ch, "img_get_start", "正在获取第"+strconv.Itoa(k+1)+"张图片")
|
||||||
|
if err = pkg.ValidateImageURL(img); err != nil {
|
||||||
|
entitys.ResLog(d.Ctx.Ch, "", baseErr+":expected image content")
|
||||||
|
continue
|
||||||
|
}
|
||||||
req := l_request.Request{
|
req := l_request.Request{
|
||||||
Method: "GET",
|
Method: "GET",
|
||||||
Url: img,
|
Url: img,
|
||||||
|
Headers: map[string]string{
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
|
"Accept": "image/webp,image/apng,image/*,*/*;q=0.8",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
res, _err := req.Send()
|
res, _err := req.Send()
|
||||||
if _err != nil {
|
if _err != nil {
|
||||||
|
|
@ -135,6 +141,7 @@ func (d *Do) getImgData() (err error) {
|
||||||
}
|
}
|
||||||
d.Ctx.ImgByte = append(d.Ctx.ImgByte, res.Content)
|
d.Ctx.ImgByte = append(d.Ctx.ImgByte, res.Content)
|
||||||
d.Ctx.ImgUrls = append(d.Ctx.ImgUrls, img)
|
d.Ctx.ImgUrls = append(d.Ctx.ImgUrls, img)
|
||||||
|
entitys.ResLog(d.Ctx.Ch, "img_get_end", "第"+strconv.Itoa(k+1)+"张图片获取成功")
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -44,15 +44,15 @@ func NewHandle(
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Handle) Recognize(ctx context.Context, requireData *entitys.RequireData) (err error) {
|
func (r *Handle) Recognize(ctx context.Context, requireData *entitys.RequireData) (err error) {
|
||||||
entitys.ResLog(requireData.Ch, "", "准备意图识别")
|
entitys.ResLog(requireData.Ch, "recognize_start", "准备意图识别")
|
||||||
|
|
||||||
//意图识别
|
//意图识别
|
||||||
recognizeMsg, err := r.Ollama.IntentRecognize(ctx, requireData)
|
recognizeMsg, err := r.Ollama.IntentRecognize(ctx, requireData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
entitys.ResLog(requireData.Ch, "", recognizeMsg)
|
entitys.ResLog(requireData.Ch, "recognize", recognizeMsg)
|
||||||
entitys.ResLog(requireData.Ch, "", "意图识别结束")
|
entitys.ResLog(requireData.Ch, "recognize_end", "意图识别结束")
|
||||||
|
|
||||||
var match entitys.Match
|
var match entitys.Match
|
||||||
if err = json.Unmarshal([]byte(recognizeMsg), &match); err != nil {
|
if err = json.Unmarshal([]byte(recognizeMsg), &match); err != nil {
|
||||||
|
|
|
||||||
|
|
@ -98,20 +98,20 @@ func (r *OllamaService) RecognizeWithImg(ctx context.Context, requireData *entit
|
||||||
if requireData.ImgByte == nil {
|
if requireData.ImgByte == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
entitys.ResLog(requireData.Ch, "", "图片识别中。。。")
|
entitys.ResLog(requireData.Ch, "recognize_img_start", "图片识别中...")
|
||||||
|
|
||||||
desc, err = r.client.Generation(ctx, &api.GenerateRequest{
|
desc, err = r.client.Generation(ctx, &api.GenerateRequest{
|
||||||
Model: r.config.Ollama.VlModel,
|
Model: r.config.Ollama.VlModel,
|
||||||
Stream: new(bool),
|
Stream: new(bool),
|
||||||
System: "完整提取出图片中的文字以及重要信息,并对用户的需求进行预测",
|
System: r.config.DefaultPrompt.ImgRecognize.SystemPrompt,
|
||||||
Prompt: "完整提取出图片中的文字以及重要信息,并对用户的需求进行预测", //requireData.Req.Text,
|
Prompt: r.config.DefaultPrompt.ImgRecognize.UserPrompt,
|
||||||
Images: requireData.ImgByte,
|
Images: requireData.ImgByte,
|
||||||
KeepAlive: &api.Duration{Duration: 3600 * time.Second},
|
KeepAlive: &api.Duration{Duration: 3600 * time.Second},
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
entitys.ResLog(requireData.Ch, "", "图片识别完成,识别内容:"+desc.Response)
|
entitys.ResLog(requireData.Ch, "recognize_img_end", "图片识别完成,识别内容:"+desc.Response)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ func (r *AiRouterBiz) RouteWithSocket(c *websocket.Conn, req *entitys.ChatSockRe
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
//初始化通道/上下文
|
//意图识别
|
||||||
if err = r.handle.Recognize(ctx, dos.Ctx); err != nil {
|
if err = r.handle.Recognize(ctx, dos.Ctx); err != nil {
|
||||||
log.Errorf("意图识别失败: %s", err.Error())
|
log.Errorf("意图识别失败: %s", err.Error())
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -9,16 +9,26 @@ import (
|
||||||
|
|
||||||
// Config 应用配置
|
// Config 应用配置
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Server ServerConfig `mapstructure:"server"`
|
Server ServerConfig `mapstructure:"server"`
|
||||||
Ollama OllamaConfig `mapstructure:"ollama"`
|
Ollama OllamaConfig `mapstructure:"ollama"`
|
||||||
Sys SysConfig `mapstructure:"sys"`
|
Sys SysConfig `mapstructure:"sys"`
|
||||||
Tools ToolsConfig `mapstructure:"tools"`
|
Tools ToolsConfig `mapstructure:"tools"`
|
||||||
Logging LoggingConfig `mapstructure:"logging"`
|
Logging LoggingConfig `mapstructure:"logging"`
|
||||||
Redis Redis `mapstructure:"redis"`
|
Redis Redis `mapstructure:"redis"`
|
||||||
DB DB `mapstructure:"db"`
|
DB DB `mapstructure:"db"`
|
||||||
|
DefaultPrompt SysPrompt `mapstructure:"default_prompt"`
|
||||||
// LLM *LLM `mapstructure:"llm"`
|
// LLM *LLM `mapstructure:"llm"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SysPrompt struct {
|
||||||
|
ImgRecognize DefaultPrompt `mapstructure:"img_recognize"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DefaultPrompt struct {
|
||||||
|
SystemPrompt string `mapstructure:"system_prompt"`
|
||||||
|
UserPrompt string `mapstructure:"user_prompt"`
|
||||||
|
}
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
Model string `mapstructure:"model"`
|
Model string `mapstructure:"model"`
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,22 +35,22 @@ func ValidateImageURL(rawURL string) error {
|
||||||
// 1. 基础格式验证
|
// 1. 基础格式验证
|
||||||
parsed, err := url.Parse(rawURL)
|
parsed, err := url.Parse(rawURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("invalid URL format: %v", err)
|
return fmt.Errorf("未知的图片格式: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. 检查协议是否为 http/https
|
// 2. 检查协议是否为 http/https
|
||||||
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||||
return errors.New("URL must use http or https protocol")
|
return errors.New("必须是http/https结构")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. 检查是否有空的主机名
|
// 3. 检查是否有空的主机名
|
||||||
if parsed.Host == "" {
|
if parsed.Host == "" {
|
||||||
return errors.New("URL missing host")
|
return errors.New("未知的url地址")
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. 检查路径是否为空(可选)
|
// 4. 检查路径是否为空
|
||||||
if strings.TrimSpace(parsed.Path) == "" {
|
if strings.TrimSpace(parsed.Path) == "" {
|
||||||
return errors.New("URL path is empty")
|
return errors.New("url为空")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue