fix: 1. 调整vllm配置及其相关 2. 意图识别模型切换 ollama -> vllm
This commit is contained in:
parent
376c08e836
commit
ca671694f9
|
|
@ -17,8 +17,14 @@ ollama:
|
|||
format: "json"
|
||||
|
||||
vllm:
|
||||
base_url: "http://172.17.0.1:8001/v1"
|
||||
vl_model: "qwen2.5-vl-3b-awq"
|
||||
vl_model:
|
||||
base_url: "http://192.168.6.115:8001/v1"
|
||||
model: "qwen2.5-vl-3b-awq"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
text_model:
|
||||
base_url: "http://192.168.6.115:8002/v1"
|
||||
model: "qwen3-8b-fp8"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,14 @@ ollama:
|
|||
format: "json"
|
||||
|
||||
vllm:
|
||||
base_url: "http://117.175.169.61:16001/v1"
|
||||
vl_model: "qwen2.5-vl-3b-awq"
|
||||
vl_model:
|
||||
base_url: "http://192.168.6.115:8001/v1"
|
||||
model: "qwen2.5-vl-3b-awq"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
text_model:
|
||||
base_url: "http://192.168.6.115:8002/v1"
|
||||
model: "qwen3-8b-fp8"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,14 @@ ollama:
|
|||
format: "json"
|
||||
|
||||
vllm:
|
||||
base_url: "http://host.docker.internal:8001/v1"
|
||||
vl_model: "qwen2.5-vl-3b-awq"
|
||||
vl_model:
|
||||
base_url: "http://192.168.6.115:8001/v1"
|
||||
model: "qwen2.5-vl-3b-awq"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
text_model:
|
||||
base_url: "http://192.168.6.115:8002/v1"
|
||||
model: "qwen3-8b-fp8"
|
||||
timeout: "120s"
|
||||
level: "info"
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import (
|
|||
|
||||
type Handle struct {
|
||||
Ollama *llm_service.OllamaService
|
||||
Vllm *llm_service.VllmService
|
||||
toolManager *tools.Manager
|
||||
conf *config.Config
|
||||
sessionImpl *impl.SessionImpl
|
||||
|
|
@ -47,6 +48,7 @@ type Handle struct {
|
|||
|
||||
func NewHandle(
|
||||
Ollama *llm_service.OllamaService,
|
||||
Vllm *llm_service.VllmService,
|
||||
toolManager *tools.Manager,
|
||||
conf *config.Config,
|
||||
sessionImpl *impl.SessionImpl,
|
||||
|
|
@ -57,6 +59,7 @@ func NewHandle(
|
|||
) *Handle {
|
||||
return &Handle{
|
||||
Ollama: Ollama,
|
||||
Vllm: Vllm,
|
||||
toolManager: toolManager,
|
||||
conf: conf,
|
||||
sessionImpl: sessionImpl,
|
||||
|
|
@ -72,7 +75,8 @@ func (r *Handle) Recognize(ctx context.Context, rec *entitys.Recognize, promptPr
|
|||
|
||||
prompt, err := promptProcessor.CreatePrompt(ctx, rec)
|
||||
//意图识别
|
||||
recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
|
||||
// recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
|
||||
recognizeMsg, err := r.Vllm.IntentRecognize(ctx, &entitys.ToolSelect{
|
||||
Prompt: prompt,
|
||||
Tools: rec.Tasks,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -0,0 +1,153 @@
|
|||
package llm_service
|
||||
|
||||
import (
|
||||
"ai_scheduler/internal/config"
|
||||
"ai_scheduler/internal/entitys"
|
||||
"ai_scheduler/internal/pkg"
|
||||
"ai_scheduler/internal/pkg/utils_vllm"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/cloudwego/eino/schema"
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
type VllmService struct {
|
||||
client *utils_vllm.Client
|
||||
config *config.Config
|
||||
}
|
||||
|
||||
func NewVllmService(
|
||||
client *utils_vllm.Client,
|
||||
config *config.Config,
|
||||
) *VllmService {
|
||||
return &VllmService{
|
||||
client: client,
|
||||
config: config,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *VllmService) IntentRecognize(ctx context.Context, req *entitys.ToolSelect) (msg string, err error) {
|
||||
msgs := s.convertMessages(req.Prompt)
|
||||
tools := s.convertTools(req.Tools)
|
||||
|
||||
resp, err := s.client.ToolSelect(ctx, msgs, tools)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if resp.Content == "" {
|
||||
if len(resp.ToolCalls) > 0 {
|
||||
call := resp.ToolCalls[0]
|
||||
var matchFromTools = &entitys.Match{
|
||||
Confidence: 1,
|
||||
Index: call.Function.Name,
|
||||
Parameters: call.Function.Arguments,
|
||||
IsMatch: true,
|
||||
}
|
||||
msg = pkg.JsonStringIgonErr(matchFromTools)
|
||||
} else {
|
||||
err = errors.New("不太明白你想表达的意思呢,可以在仔细描述一下您所需要的内容吗,感谢感谢")
|
||||
return
|
||||
}
|
||||
} else {
|
||||
msg = resp.Content
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (s *VllmService) convertMessages(prompts []api.Message) []*schema.Message {
|
||||
msgs := make([]*schema.Message, 0, len(prompts))
|
||||
for _, p := range prompts {
|
||||
msg := &schema.Message{
|
||||
Role: schema.RoleType(p.Role),
|
||||
Content: p.Content,
|
||||
}
|
||||
|
||||
// 这里实际应该不会走进来
|
||||
if len(p.Images) > 0 {
|
||||
parts := []schema.MessageInputPart{
|
||||
{Type: schema.ChatMessagePartTypeText, Text: p.Content},
|
||||
}
|
||||
for _, imgData := range p.Images {
|
||||
b64 := base64.StdEncoding.EncodeToString(imgData)
|
||||
mimeType := "image/jpeg"
|
||||
parts = append(parts, schema.MessageInputPart{
|
||||
Type: schema.ChatMessagePartTypeImageURL,
|
||||
Image: &schema.MessageInputImage{
|
||||
MessagePartCommon: schema.MessagePartCommon{
|
||||
MIMEType: mimeType,
|
||||
Base64Data: &b64,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
msg.UserInputMultiContent = parts
|
||||
}
|
||||
msgs = append(msgs, msg)
|
||||
}
|
||||
return msgs
|
||||
}
|
||||
|
||||
func (s *VllmService) convertTools(tasks []entitys.RegistrationTask) []*schema.ToolInfo {
|
||||
tools := make([]*schema.ToolInfo, 0, len(tasks))
|
||||
for _, task := range tasks {
|
||||
params := make(map[string]*schema.ParameterInfo)
|
||||
for k, v := range task.TaskConfigDetail.Param.Properties {
|
||||
dt := schema.String
|
||||
|
||||
// Handle v.Type dynamically to support both string and []string (compiler suggests []string)
|
||||
// Using fmt.Sprint handles both cases safely without knowing exact type structure
|
||||
typeStr := fmt.Sprintf("%v", v.Type)
|
||||
typeStr = strings.Trim(typeStr, "[]") // normalize "[string]" -> "string"
|
||||
|
||||
switch typeStr {
|
||||
case "string":
|
||||
dt = schema.String
|
||||
case "integer", "int":
|
||||
dt = schema.Integer
|
||||
case "number", "float":
|
||||
dt = schema.Number
|
||||
case "boolean", "bool":
|
||||
dt = schema.Boolean
|
||||
case "object":
|
||||
dt = schema.Object
|
||||
case "array":
|
||||
dt = schema.Array
|
||||
}
|
||||
|
||||
required := false
|
||||
for _, r := range task.TaskConfigDetail.Param.Required {
|
||||
if r == k {
|
||||
required = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
desc := v.Description
|
||||
if len(v.Enum) > 0 {
|
||||
var enumStrs []string
|
||||
for _, e := range v.Enum {
|
||||
enumStrs = append(enumStrs, fmt.Sprintf("%v", e))
|
||||
}
|
||||
desc += " Enum: " + strings.Join(enumStrs, ", ")
|
||||
}
|
||||
|
||||
params[k] = &schema.ParameterInfo{
|
||||
Type: dt,
|
||||
Desc: desc,
|
||||
Required: required,
|
||||
}
|
||||
}
|
||||
|
||||
tools = append(tools, &schema.ToolInfo{
|
||||
Name: task.Name,
|
||||
Desc: task.Desc,
|
||||
ParamsOneOf: schema.NewParamsOneOfByParams(params),
|
||||
})
|
||||
}
|
||||
return tools
|
||||
}
|
||||
|
|
@ -13,6 +13,7 @@ var ProviderSetBiz = wire.NewSet(
|
|||
NewChatHistoryBiz,
|
||||
//llm_service.NewLangChainGenerate,
|
||||
llm_service.NewOllamaGenerate,
|
||||
llm_service.NewVllmService,
|
||||
//handle.NewHandle,
|
||||
do.NewDo,
|
||||
do.NewHandle,
|
||||
|
|
|
|||
|
|
@ -122,8 +122,13 @@ type OllamaConfig struct {
|
|||
}
|
||||
|
||||
type VllmConfig struct {
|
||||
VLModel VllmModel `mapstructure:"vl_model"`
|
||||
TextModel VllmModel `mapstructure:"text_model"`
|
||||
}
|
||||
|
||||
type VllmModel struct {
|
||||
BaseURL string `mapstructure:"base_url"`
|
||||
VlModel string `mapstructure:"vl_model"`
|
||||
Model string `mapstructure:"model"`
|
||||
Timeout time.Duration `mapstructure:"timeout"`
|
||||
Level string `mapstructure:"level"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,33 +7,63 @@ import (
|
|||
"encoding/base64"
|
||||
|
||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||
"github.com/cloudwego/eino/components/model"
|
||||
"github.com/cloudwego/eino/schema"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
model *openai.ChatModel
|
||||
vlModel *openai.ChatModel
|
||||
generateModel *openai.ChatModel
|
||||
config *config.Config
|
||||
}
|
||||
|
||||
func NewClient(config *config.Config) (*Client, func(), error) {
|
||||
m, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
||||
BaseURL: config.Vllm.BaseURL,
|
||||
Model: config.Vllm.VlModel,
|
||||
Timeout: config.Vllm.Timeout,
|
||||
// 初始化视觉模型
|
||||
vl, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
||||
BaseURL: config.Vllm.VLModel.BaseURL,
|
||||
Model: config.Vllm.VLModel.Model,
|
||||
Timeout: config.Vllm.VLModel.Timeout,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
c := &Client{model: m, config: config}
|
||||
|
||||
// 初始化生成模型
|
||||
gen, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
||||
BaseURL: config.Vllm.TextModel.BaseURL,
|
||||
Model: config.Vllm.TextModel.Model,
|
||||
Timeout: config.Vllm.TextModel.Timeout,
|
||||
ExtraFields: map[string]any{
|
||||
"chat_template_kwargs": map[string]any{
|
||||
"enable_thinking": false,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
c := &Client{
|
||||
vlModel: vl,
|
||||
generateModel: gen,
|
||||
config: config,
|
||||
}
|
||||
cleanup := func() {}
|
||||
return c, cleanup, nil
|
||||
}
|
||||
|
||||
func (c *Client) Chat(ctx context.Context, msgs []*schema.Message) (*schema.Message, error) {
|
||||
return c.model.Generate(ctx, msgs)
|
||||
// 默认聊天使用生成模型
|
||||
return c.generateModel.Generate(ctx, msgs)
|
||||
}
|
||||
|
||||
func (c *Client) ToolSelect(ctx context.Context, msgs []*schema.Message, tools []*schema.ToolInfo) (*schema.Message, error) {
|
||||
// 工具选择使用生成模型
|
||||
return c.generateModel.Generate(ctx, msgs, model.WithTools(tools))
|
||||
}
|
||||
|
||||
func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt string, imgURLs []string) (*schema.Message, error) {
|
||||
// 图片识别使用视觉模型
|
||||
in := []*schema.Message{
|
||||
{
|
||||
Role: schema.System,
|
||||
|
|
@ -58,11 +88,12 @@ func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt
|
|||
}
|
||||
|
||||
in[1].UserInputMultiContent = parts
|
||||
return c.model.Generate(ctx, in)
|
||||
return c.vlModel.Generate(ctx, in)
|
||||
}
|
||||
|
||||
// 识别图片by二进制文件
|
||||
func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPrompt string, imgBytes []byte, imgType string) (*schema.Message, error) {
|
||||
// 图片识别使用视觉模型
|
||||
in := []*schema.Message{
|
||||
{
|
||||
Role: schema.System,
|
||||
|
|
@ -82,9 +113,10 @@ func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPr
|
|||
MIMEType: imgType,
|
||||
Base64Data: util.AnyToPoint(base64.StdEncoding.EncodeToString(imgBytes)),
|
||||
},
|
||||
Detail: schema.ImageURLDetailHigh,
|
||||
},
|
||||
})
|
||||
|
||||
in[1].UserInputMultiContent = parts
|
||||
return c.model.Generate(ctx, in)
|
||||
return c.vlModel.Generate(ctx, in)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue