8 changed files with 24 additions and 237 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@ -17,14 +17,8 @@ ollama:
  format: "json"
 vllm:
-  vl_model:
+  base_url: "http://172.17.0.1:8001/v1"
-    base_url: "http://192.168.6.115:8001/v1"
+  vl_model: "qwen2.5-vl-3b-awq"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
  timeout: "120s"
  level: "info"
--- a/config/config_env.yaml
+++ b/config/config_env.yaml
@ -14,14 +14,8 @@ ollama:
  format: "json"
 vllm:
-  vl_model:
+  base_url: "http://117.175.169.61:16001/v1"
-    base_url: "http://192.168.6.115:8001/v1"
+  vl_model: "qwen2.5-vl-3b-awq"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
  timeout: "120s"
  level: "info"
--- a/config/config_test.yaml
+++ b/config/config_test.yaml
@ -14,14 +14,8 @@ ollama:
  format: "json"
 vllm:
-  vl_model:
+  base_url: "http://host.docker.internal:8001/v1"
-    base_url: "http://192.168.6.115:8001/v1"
+  vl_model: "qwen2.5-vl-3b-awq"
    model: "qwen2.5-vl-3b-awq"
    timeout: "120s"
    level: "info"
  text_model:
    base_url: "http://192.168.6.115:8002/v1"
    model: "qwen3-8b-fp8"
  timeout: "120s"
  level: "info"
--- a/internal/biz/do/handle.go
+++ b/internal/biz/do/handle.go
@ -36,7 +36,6 @@ import (
 type Handle struct {
 	Ollama                *llm_service.OllamaService
 	Vllm                  *llm_service.VllmService
 	toolManager           *tools.Manager
 	conf                  *config.Config
 	sessionImpl           *impl.SessionImpl
@ -48,7 +47,6 @@ type Handle struct {
 func NewHandle(
 	Ollama *llm_service.OllamaService,
 	Vllm *llm_service.VllmService,
 	toolManager *tools.Manager,
 	conf *config.Config,
 	sessionImpl *impl.SessionImpl,
@ -59,7 +57,6 @@ func NewHandle(
 ) *Handle {
 	return &Handle{
 		Ollama:                Ollama,
 		Vllm:                  Vllm,
 		toolManager:           toolManager,
 		conf:                  conf,
 		sessionImpl:           sessionImpl,
@ -75,8 +72,7 @@ func (r *Handle) Recognize(ctx context.Context, rec *entitys.Recognize, promptPr
 	prompt, err := promptProcessor.CreatePrompt(ctx, rec)
 	//意图识别
-	// recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
+	recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
 	recognizeMsg, err := r.Vllm.IntentRecognize(ctx, &entitys.ToolSelect{
 		Prompt: prompt,
 		Tools:  rec.Tasks,
 	})
--- a/internal/biz/llm_service/vllm.go
+++ b/internal/biz/llm_service/vllm.go
@ -1,153 +0,0 @@
 package llm_service
 import (
 	"ai_scheduler/internal/config"
 	"ai_scheduler/internal/entitys"
 	"ai_scheduler/internal/pkg"
 	"ai_scheduler/internal/pkg/utils_vllm"
 	"context"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"strings"
 	"github.com/cloudwego/eino/schema"
 	"github.com/ollama/ollama/api"
 )
 type VllmService struct {
 	client *utils_vllm.Client
 	config *config.Config
 }
 func NewVllmService(
 	client *utils_vllm.Client,
 	config *config.Config,
 ) *VllmService {
 	return &VllmService{
 		client: client,
 		config: config,
 	}
 }
 func (s *VllmService) IntentRecognize(ctx context.Context, req *entitys.ToolSelect) (msg string, err error) {
 	msgs := s.convertMessages(req.Prompt)
 	tools := s.convertTools(req.Tools)
 	resp, err := s.client.ToolSelect(ctx, msgs, tools)
 	if err != nil {
 		return
 	}
 	if resp.Content == "" {
 		if len(resp.ToolCalls) > 0 {
 			call := resp.ToolCalls[0]
 			var matchFromTools = &entitys.Match{
 				Confidence: 1,
 				Index:      call.Function.Name,
 				Parameters: call.Function.Arguments,
 				IsMatch:    true,
 			}
 			msg = pkg.JsonStringIgonErr(matchFromTools)
 		} else {
 			err = errors.New("不太明白你想表达的意思呢，可以在仔细描述一下您所需要的内容吗，感谢感谢")
 			return
 		}
 	} else {
 		msg = resp.Content
 	}
 	return
 }
 func (s *VllmService) convertMessages(prompts []api.Message) []*schema.Message {
 	msgs := make([]*schema.Message, 0, len(prompts))
 	for _, p := range prompts {
 		msg := &schema.Message{
 			Role:    schema.RoleType(p.Role),
 			Content: p.Content,
 		}
 		// 这里实际应该不会走进来
 		if len(p.Images) > 0 {
 			parts := []schema.MessageInputPart{
 				{Type: schema.ChatMessagePartTypeText, Text: p.Content},
 			}
 			for _, imgData := range p.Images {
 				b64 := base64.StdEncoding.EncodeToString(imgData)
 				mimeType := "image/jpeg"
 				parts = append(parts, schema.MessageInputPart{
 					Type: schema.ChatMessagePartTypeImageURL,
 					Image: &schema.MessageInputImage{
 						MessagePartCommon: schema.MessagePartCommon{
 							MIMEType:   mimeType,
 							Base64Data: &b64,
 						},
 					},
 				})
 			}
 			msg.UserInputMultiContent = parts
 		}
 		msgs = append(msgs, msg)
 	}
 	return msgs
 }
 func (s *VllmService) convertTools(tasks []entitys.RegistrationTask) []*schema.ToolInfo {
 	tools := make([]*schema.ToolInfo, 0, len(tasks))
 	for _, task := range tasks {
 		params := make(map[string]*schema.ParameterInfo)
 		for k, v := range task.TaskConfigDetail.Param.Properties {
 			dt := schema.String
 			// Handle v.Type dynamically to support both string and []string (compiler suggests []string)
 			// Using fmt.Sprint handles both cases safely without knowing exact type structure
 			typeStr := fmt.Sprintf("%v", v.Type)
 			typeStr = strings.Trim(typeStr, "[]") // normalize "[string]" -> "string"
 			switch typeStr {
 			case "string":
 				dt = schema.String
 			case "integer", "int":
 				dt = schema.Integer
 			case "number", "float":
 				dt = schema.Number
 			case "boolean", "bool":
 				dt = schema.Boolean
 			case "object":
 				dt = schema.Object
 			case "array":
 				dt = schema.Array
 			}
 			required := false
 			for _, r := range task.TaskConfigDetail.Param.Required {
 				if r == k {
 					required = true
 					break
 				}
 			}
 			desc := v.Description
 			if len(v.Enum) > 0 {
 				var enumStrs []string
 				for _, e := range v.Enum {
 					enumStrs = append(enumStrs, fmt.Sprintf("%v", e))
 				}
 				desc += " Enum: " + strings.Join(enumStrs, ", ")
 			}
 			params[k] = &schema.ParameterInfo{
 				Type:     dt,
 				Desc:     desc,
 				Required: required,
 			}
 		}
 		tools = append(tools, &schema.ToolInfo{
 			Name:        task.Name,
 			Desc:        task.Desc,
 			ParamsOneOf: schema.NewParamsOneOfByParams(params),
 		})
 	}
 	return tools
 }
--- a/internal/biz/provider_set.go
+++ b/internal/biz/provider_set.go
@ -13,7 +13,6 @@ var ProviderSetBiz = wire.NewSet(
 	NewChatHistoryBiz,
 	//llm_service.NewLangChainGenerate,
 	llm_service.NewOllamaGenerate,
 	llm_service.NewVllmService,
 	//handle.NewHandle,
 	do.NewDo,
 	do.NewHandle,
--- a/internal/config/config.go
+++ b/internal/config/config.go
@ -122,13 +122,8 @@ type OllamaConfig struct {
 }
 type VllmConfig struct {
 	VLModel   VllmModel `mapstructure:"vl_model"`
 	TextModel VllmModel `mapstructure:"text_model"`
 }
 type VllmModel struct {
 	BaseURL string        `mapstructure:"base_url"`
-	Model   string        `mapstructure:"model"`
+	VlModel string        `mapstructure:"vl_model"`
 	Timeout time.Duration `mapstructure:"timeout"`
 	Level   string        `mapstructure:"level"`
 }
--- a/internal/pkg/utils_vllm/client.go
+++ b/internal/pkg/utils_vllm/client.go
@ -7,63 +7,33 @@ import (
 	"encoding/base64"
 	"github.com/cloudwego/eino-ext/components/model/openai"
 	"github.com/cloudwego/eino/components/model"
 	"github.com/cloudwego/eino/schema"
 )
 type Client struct {
-	vlModel       *openai.ChatModel
+	model  *openai.ChatModel
 	generateModel *openai.ChatModel
 	config *config.Config
 }
 func NewClient(config *config.Config) (*Client, func(), error) {
-	// 初始化视觉模型
+	m, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
-	vl, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
+		BaseURL: config.Vllm.BaseURL,
-		BaseURL: config.Vllm.VLModel.BaseURL,
+		Model:   config.Vllm.VlModel,
-		Model:   config.Vllm.VLModel.Model,
+		Timeout: config.Vllm.Timeout,
 		Timeout: config.Vllm.VLModel.Timeout,
 	})
 	if err != nil {
 		return nil, nil, err
 	}
-
+	c := &Client{model: m, config: config}
 	// 初始化生成模型
 	gen, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
 		BaseURL: config.Vllm.TextModel.BaseURL,
 		Model:   config.Vllm.TextModel.Model,
 		Timeout: config.Vllm.TextModel.Timeout,
 		ExtraFields: map[string]any{
 			"chat_template_kwargs": map[string]any{
 				"enable_thinking": false,
 			},
 		},
 	})
 	if err != nil {
 		return nil, nil, err
 	}
 	c := &Client{
 		vlModel:       vl,
 		generateModel: gen,
 		config:        config,
 	}
 	cleanup := func() {}
 	return c, cleanup, nil
 }
 func (c *Client) Chat(ctx context.Context, msgs []*schema.Message) (*schema.Message, error) {
-	// 默认聊天使用生成模型
+	return c.model.Generate(ctx, msgs)
 	return c.generateModel.Generate(ctx, msgs)
 }
 func (c *Client) ToolSelect(ctx context.Context, msgs []*schema.Message, tools []*schema.ToolInfo) (*schema.Message, error) {
 	// 工具选择使用生成模型
 	return c.generateModel.Generate(ctx, msgs, model.WithTools(tools))
 }
 func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt string, imgURLs []string) (*schema.Message, error) {
 	// 图片识别使用视觉模型
 	in := []*schema.Message{
 		{
 			Role:    schema.System,
@ -88,12 +58,11 @@ func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt
 	}
 	in[1].UserInputMultiContent = parts
-	return c.vlModel.Generate(ctx, in)
+	return c.model.Generate(ctx, in)
 }
 // 识别图片by二进制文件
 func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPrompt string, imgBytes []byte, imgType string) (*schema.Message, error) {
 	// 图片识别使用视觉模型
 	in := []*schema.Message{
 		{
 			Role:    schema.System,
@ -113,10 +82,9 @@ func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPr
 				MIMEType:   imgType,
 				Base64Data: util.AnyToPoint(base64.StdEncoding.EncodeToString(imgBytes)),
 			},
 			Detail: schema.ImageURLDetailHigh,
 		},
 	})
 	in[1].UserInputMultiContent = parts
-	return c.vlModel.Generate(ctx, in)
+	return c.model.Generate(ctx, in)
 }