fix: 1. 调整vllm配置及其相关 2. 意图识别模型切换 ollama -> vllm

2026-02-28 14:16:53 +08:00 · 2026-02-28 14:16:53 +08:00 · ca671694f9
parent 376c08e836
commit ca671694f9
8 changed files with 237 additions and 24 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@ -17,8 +17,14 @@ ollama:
  format: "json"
 vllm:
-  base_url: "http://172.17.0.1:8001/v1"
+  vl_model:
-  vl_model: "qwen2.5-vl-3b-awq"
+    base_url: "http://192.168.6.115:8001/v1"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
    timeout: "120s"
    level: "info"
--- a/config/config_env.yaml
+++ b/config/config_env.yaml
@ -14,8 +14,14 @@ ollama:
  format: "json"
 vllm:
-  base_url: "http://117.175.169.61:16001/v1"
+  vl_model:
-  vl_model: "qwen2.5-vl-3b-awq"
+    base_url: "http://192.168.6.115:8001/v1"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
    timeout: "120s"
    level: "info"
--- a/config/config_test.yaml
+++ b/config/config_test.yaml
@ -14,8 +14,14 @@ ollama:
  format: "json"
 vllm:
-  base_url: "http://host.docker.internal:8001/v1"
+  vl_model:
-  vl_model: "qwen2.5-vl-3b-awq"
+    base_url: "http://192.168.6.115:8001/v1"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
    timeout: "120s"
    level: "info"
--- a/internal/biz/do/handle.go
+++ b/internal/biz/do/handle.go
@ -36,6 +36,7 @@ import (
 type Handle struct {
 	Ollama                *llm_service.OllamaService
 	Vllm                  *llm_service.VllmService
 	toolManager           *tools.Manager
 	conf                  *config.Config
 	sessionImpl           *impl.SessionImpl
@ -47,6 +48,7 @@ type Handle struct {
 func NewHandle(
 	Ollama *llm_service.OllamaService,
 	Vllm *llm_service.VllmService,
 	toolManager *tools.Manager,
 	conf *config.Config,
 	sessionImpl *impl.SessionImpl,
@ -57,6 +59,7 @@ func NewHandle(
 ) *Handle {
 	return &Handle{
 		Ollama:                Ollama,
 		Vllm:                  Vllm,
 		toolManager:           toolManager,
 		conf:                  conf,
 		sessionImpl:           sessionImpl,
@ -72,7 +75,8 @@ func (r *Handle) Recognize(ctx context.Context, rec *entitys.Recognize, promptPr
 	prompt, err := promptProcessor.CreatePrompt(ctx, rec)
 	//意图识别
-	recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
+	// recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
 	recognizeMsg, err := r.Vllm.IntentRecognize(ctx, &entitys.ToolSelect{
 		Prompt: prompt,
 		Tools:  rec.Tasks,
 	})
--- a/internal/biz/llm_service/vllm.go
+++ b/internal/biz/llm_service/vllm.go
@ -0,0 +1,153 @@
 package llm_service
 import (
 	"ai_scheduler/internal/config"
 	"ai_scheduler/internal/entitys"
 	"ai_scheduler/internal/pkg"
 	"ai_scheduler/internal/pkg/utils_vllm"
 	"context"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"strings"
 	"github.com/cloudwego/eino/schema"
 	"github.com/ollama/ollama/api"
 )
 type VllmService struct {
 	client *utils_vllm.Client
 	config *config.Config
 }
 func NewVllmService(
 	client *utils_vllm.Client,
 	config *config.Config,
 ) *VllmService {
 	return &VllmService{
 		client: client,
 		config: config,
 	}
 }
 func (s *VllmService) IntentRecognize(ctx context.Context, req *entitys.ToolSelect) (msg string, err error) {
 	msgs := s.convertMessages(req.Prompt)
 	tools := s.convertTools(req.Tools)
 	resp, err := s.client.ToolSelect(ctx, msgs, tools)
 	if err != nil {
 		return
 	}
 	if resp.Content == "" {
 		if len(resp.ToolCalls) > 0 {
 			call := resp.ToolCalls[0]
 			var matchFromTools = &entitys.Match{
 				Confidence: 1,
 				Index:      call.Function.Name,
 				Parameters: call.Function.Arguments,
 				IsMatch:    true,
 			}
 			msg = pkg.JsonStringIgonErr(matchFromTools)
 		} else {
 			err = errors.New("不太明白你想表达的意思呢，可以在仔细描述一下您所需要的内容吗，感谢感谢")
 			return
 		}
 	} else {
 		msg = resp.Content
 	}
 	return
 }
 func (s *VllmService) convertMessages(prompts []api.Message) []*schema.Message {
 	msgs := make([]*schema.Message, 0, len(prompts))
 	for _, p := range prompts {
 		msg := &schema.Message{
 			Role:    schema.RoleType(p.Role),
 			Content: p.Content,
 		}
 		// 这里实际应该不会走进来
 		if len(p.Images) > 0 {
 			parts := []schema.MessageInputPart{
 				{Type: schema.ChatMessagePartTypeText, Text: p.Content},
 			}
 			for _, imgData := range p.Images {
 				b64 := base64.StdEncoding.EncodeToString(imgData)
 				mimeType := "image/jpeg"
 				parts = append(parts, schema.MessageInputPart{
 					Type: schema.ChatMessagePartTypeImageURL,
 					Image: &schema.MessageInputImage{
 						MessagePartCommon: schema.MessagePartCommon{
 							MIMEType:   mimeType,
 							Base64Data: &b64,
 						},
 					},
 				})
 			}
 			msg.UserInputMultiContent = parts
 		}
 		msgs = append(msgs, msg)
 	}
 	return msgs
 }
 func (s *VllmService) convertTools(tasks []entitys.RegistrationTask) []*schema.ToolInfo {
 	tools := make([]*schema.ToolInfo, 0, len(tasks))
 	for _, task := range tasks {
 		params := make(map[string]*schema.ParameterInfo)
 		for k, v := range task.TaskConfigDetail.Param.Properties {
 			dt := schema.String
 			// Handle v.Type dynamically to support both string and []string (compiler suggests []string)
 			// Using fmt.Sprint handles both cases safely without knowing exact type structure
 			typeStr := fmt.Sprintf("%v", v.Type)
 			typeStr = strings.Trim(typeStr, "[]") // normalize "[string]" -> "string"
 			switch typeStr {
 			case "string":
 				dt = schema.String
 			case "integer", "int":
 				dt = schema.Integer
 			case "number", "float":
 				dt = schema.Number
 			case "boolean", "bool":
 				dt = schema.Boolean
 			case "object":
 				dt = schema.Object
 			case "array":
 				dt = schema.Array
 			}
 			required := false
 			for _, r := range task.TaskConfigDetail.Param.Required {
 				if r == k {
 					required = true
 					break
 				}
 			}
 			desc := v.Description
 			if len(v.Enum) > 0 {
 				var enumStrs []string
 				for _, e := range v.Enum {
 					enumStrs = append(enumStrs, fmt.Sprintf("%v", e))
 				}
 				desc += " Enum: " + strings.Join(enumStrs, ", ")
 			}
 			params[k] = &schema.ParameterInfo{
 				Type:     dt,
 				Desc:     desc,
 				Required: required,
 			}
 		}
 		tools = append(tools, &schema.ToolInfo{
 			Name:        task.Name,
 			Desc:        task.Desc,
 			ParamsOneOf: schema.NewParamsOneOfByParams(params),
 		})
 	}
 	return tools
 }
--- a/internal/biz/provider_set.go
+++ b/internal/biz/provider_set.go
@ -13,6 +13,7 @@ var ProviderSetBiz = wire.NewSet(
 	NewChatHistoryBiz,
 	//llm_service.NewLangChainGenerate,
 	llm_service.NewOllamaGenerate,
 	llm_service.NewVllmService,
 	//handle.NewHandle,
 	do.NewDo,
 	do.NewHandle,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -122,8 +122,13 @@ type OllamaConfig struct {
 }
 type VllmConfig struct {
 	VLModel   VllmModel `mapstructure:"vl_model"`
 	TextModel VllmModel `mapstructure:"text_model"`
 }
 type VllmModel struct {
 	BaseURL string        `mapstructure:"base_url"`
-	VlModel string        `mapstructure:"vl_model"`
+	Model   string        `mapstructure:"model"`
 	Timeout time.Duration `mapstructure:"timeout"`
 	Level   string        `mapstructure:"level"`
 }
--- a/internal/pkg/utils_vllm/client.go
+++ b/internal/pkg/utils_vllm/client.go
@ -7,33 +7,63 @@ import (
 	"encoding/base64"
 	"github.com/cloudwego/eino-ext/components/model/openai"
 	"github.com/cloudwego/eino/components/model"
 	"github.com/cloudwego/eino/schema"
 )
 type Client struct {
-	model  *openai.ChatModel
+	vlModel       *openai.ChatModel
 	generateModel *openai.ChatModel
 	config        *config.Config
 }
 func NewClient(config *config.Config) (*Client, func(), error) {
-	m, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
+	// 初始化视觉模型
-		BaseURL: config.Vllm.BaseURL,
+	vl, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
-		Model:   config.Vllm.VlModel,
+		BaseURL: config.Vllm.VLModel.BaseURL,
-		Timeout: config.Vllm.Timeout,
+		Model:   config.Vllm.VLModel.Model,
 		Timeout: config.Vllm.VLModel.Timeout,
 	})
 	if err != nil {
 		return nil, nil, err
 	}
-	c := &Client{model: m, config: config}
+
 	// 初始化生成模型
 	gen, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
 		BaseURL: config.Vllm.TextModel.BaseURL,
 		Model:   config.Vllm.TextModel.Model,
 		Timeout: config.Vllm.TextModel.Timeout,
 		ExtraFields: map[string]any{
 			"chat_template_kwargs": map[string]any{
 				"enable_thinking": false,
 			},
 		},
 	})
 	if err != nil {
 		return nil, nil, err
 	}
 	c := &Client{
 		vlModel:       vl,
 		generateModel: gen,
 		config:        config,
 	}
 	cleanup := func() {}
 	return c, cleanup, nil
 }
 func (c *Client) Chat(ctx context.Context, msgs []*schema.Message) (*schema.Message, error) {
-	return c.model.Generate(ctx, msgs)
+	// 默认聊天使用生成模型
 	return c.generateModel.Generate(ctx, msgs)
 }
 func (c *Client) ToolSelect(ctx context.Context, msgs []*schema.Message, tools []*schema.ToolInfo) (*schema.Message, error) {
 	// 工具选择使用生成模型
 	return c.generateModel.Generate(ctx, msgs, model.WithTools(tools))
 }
 func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt string, imgURLs []string) (*schema.Message, error) {
 	// 图片识别使用视觉模型
 	in := []*schema.Message{
 		{
 			Role:    schema.System,
@ -58,11 +88,12 @@ func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt
 	}
 	in[1].UserInputMultiContent = parts
-	return c.model.Generate(ctx, in)
+	return c.vlModel.Generate(ctx, in)
 }
 // 识别图片by二进制文件
 func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPrompt string, imgBytes []byte, imgType string) (*schema.Message, error) {
 	// 图片识别使用视觉模型
 	in := []*schema.Message{
 		{
 			Role:    schema.System,
@ -82,9 +113,10 @@ func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPr
 				MIMEType:   imgType,
 				Base64Data: util.AnyToPoint(base64.StdEncoding.EncodeToString(imgBytes)),
 			},
 			Detail: schema.ImageURLDetailHigh,
 		},
 	})
 	in[1].UserInputMultiContent = parts
-	return c.model.Generate(ctx, in)
+	return c.vlModel.Generate(ctx, in)
 }