fix: 1. 调整vllm配置及其相关 2. 意图识别模型切换 ollama -> vllm
This commit is contained in:
parent
376c08e836
commit
ca671694f9
|
|
@ -17,8 +17,14 @@ ollama:
|
||||||
format: "json"
|
format: "json"
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
base_url: "http://172.17.0.1:8001/v1"
|
vl_model:
|
||||||
vl_model: "qwen2.5-vl-3b-awq"
|
base_url: "http://192.168.6.115:8001/v1"
|
||||||
|
model: "qwen2.5-vl-3b-awq"
|
||||||
|
timeout: "120s"
|
||||||
|
level: "info"
|
||||||
|
text_model:
|
||||||
|
base_url: "http://192.168.6.115:8002/v1"
|
||||||
|
model: "qwen3-8b-fp8"
|
||||||
timeout: "120s"
|
timeout: "120s"
|
||||||
level: "info"
|
level: "info"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,14 @@ ollama:
|
||||||
format: "json"
|
format: "json"
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
base_url: "http://117.175.169.61:16001/v1"
|
vl_model:
|
||||||
vl_model: "qwen2.5-vl-3b-awq"
|
base_url: "http://192.168.6.115:8001/v1"
|
||||||
|
model: "qwen2.5-vl-3b-awq"
|
||||||
|
timeout: "120s"
|
||||||
|
level: "info"
|
||||||
|
text_model:
|
||||||
|
base_url: "http://192.168.6.115:8002/v1"
|
||||||
|
model: "qwen3-8b-fp8"
|
||||||
timeout: "120s"
|
timeout: "120s"
|
||||||
level: "info"
|
level: "info"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,14 @@ ollama:
|
||||||
format: "json"
|
format: "json"
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
base_url: "http://host.docker.internal:8001/v1"
|
vl_model:
|
||||||
vl_model: "qwen2.5-vl-3b-awq"
|
base_url: "http://192.168.6.115:8001/v1"
|
||||||
|
model: "qwen2.5-vl-3b-awq"
|
||||||
|
timeout: "120s"
|
||||||
|
level: "info"
|
||||||
|
text_model:
|
||||||
|
base_url: "http://192.168.6.115:8002/v1"
|
||||||
|
model: "qwen3-8b-fp8"
|
||||||
timeout: "120s"
|
timeout: "120s"
|
||||||
level: "info"
|
level: "info"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,7 @@ import (
|
||||||
|
|
||||||
type Handle struct {
|
type Handle struct {
|
||||||
Ollama *llm_service.OllamaService
|
Ollama *llm_service.OllamaService
|
||||||
|
Vllm *llm_service.VllmService
|
||||||
toolManager *tools.Manager
|
toolManager *tools.Manager
|
||||||
conf *config.Config
|
conf *config.Config
|
||||||
sessionImpl *impl.SessionImpl
|
sessionImpl *impl.SessionImpl
|
||||||
|
|
@ -47,6 +48,7 @@ type Handle struct {
|
||||||
|
|
||||||
func NewHandle(
|
func NewHandle(
|
||||||
Ollama *llm_service.OllamaService,
|
Ollama *llm_service.OllamaService,
|
||||||
|
Vllm *llm_service.VllmService,
|
||||||
toolManager *tools.Manager,
|
toolManager *tools.Manager,
|
||||||
conf *config.Config,
|
conf *config.Config,
|
||||||
sessionImpl *impl.SessionImpl,
|
sessionImpl *impl.SessionImpl,
|
||||||
|
|
@ -57,6 +59,7 @@ func NewHandle(
|
||||||
) *Handle {
|
) *Handle {
|
||||||
return &Handle{
|
return &Handle{
|
||||||
Ollama: Ollama,
|
Ollama: Ollama,
|
||||||
|
Vllm: Vllm,
|
||||||
toolManager: toolManager,
|
toolManager: toolManager,
|
||||||
conf: conf,
|
conf: conf,
|
||||||
sessionImpl: sessionImpl,
|
sessionImpl: sessionImpl,
|
||||||
|
|
@ -72,7 +75,8 @@ func (r *Handle) Recognize(ctx context.Context, rec *entitys.Recognize, promptPr
|
||||||
|
|
||||||
prompt, err := promptProcessor.CreatePrompt(ctx, rec)
|
prompt, err := promptProcessor.CreatePrompt(ctx, rec)
|
||||||
//意图识别
|
//意图识别
|
||||||
recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
|
// recognizeMsg, err := r.Ollama.IntentRecognize(ctx, &entitys.ToolSelect{
|
||||||
|
recognizeMsg, err := r.Vllm.IntentRecognize(ctx, &entitys.ToolSelect{
|
||||||
Prompt: prompt,
|
Prompt: prompt,
|
||||||
Tools: rec.Tasks,
|
Tools: rec.Tasks,
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,153 @@
|
||||||
|
package llm_service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"ai_scheduler/internal/config"
|
||||||
|
"ai_scheduler/internal/entitys"
|
||||||
|
"ai_scheduler/internal/pkg"
|
||||||
|
"ai_scheduler/internal/pkg/utils_vllm"
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/cloudwego/eino/schema"
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
)
|
||||||
|
|
||||||
|
type VllmService struct {
|
||||||
|
client *utils_vllm.Client
|
||||||
|
config *config.Config
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewVllmService(
|
||||||
|
client *utils_vllm.Client,
|
||||||
|
config *config.Config,
|
||||||
|
) *VllmService {
|
||||||
|
return &VllmService{
|
||||||
|
client: client,
|
||||||
|
config: config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *VllmService) IntentRecognize(ctx context.Context, req *entitys.ToolSelect) (msg string, err error) {
|
||||||
|
msgs := s.convertMessages(req.Prompt)
|
||||||
|
tools := s.convertTools(req.Tools)
|
||||||
|
|
||||||
|
resp, err := s.client.ToolSelect(ctx, msgs, tools)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Content == "" {
|
||||||
|
if len(resp.ToolCalls) > 0 {
|
||||||
|
call := resp.ToolCalls[0]
|
||||||
|
var matchFromTools = &entitys.Match{
|
||||||
|
Confidence: 1,
|
||||||
|
Index: call.Function.Name,
|
||||||
|
Parameters: call.Function.Arguments,
|
||||||
|
IsMatch: true,
|
||||||
|
}
|
||||||
|
msg = pkg.JsonStringIgonErr(matchFromTools)
|
||||||
|
} else {
|
||||||
|
err = errors.New("不太明白你想表达的意思呢,可以在仔细描述一下您所需要的内容吗,感谢感谢")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
msg = resp.Content
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *VllmService) convertMessages(prompts []api.Message) []*schema.Message {
|
||||||
|
msgs := make([]*schema.Message, 0, len(prompts))
|
||||||
|
for _, p := range prompts {
|
||||||
|
msg := &schema.Message{
|
||||||
|
Role: schema.RoleType(p.Role),
|
||||||
|
Content: p.Content,
|
||||||
|
}
|
||||||
|
|
||||||
|
// 这里实际应该不会走进来
|
||||||
|
if len(p.Images) > 0 {
|
||||||
|
parts := []schema.MessageInputPart{
|
||||||
|
{Type: schema.ChatMessagePartTypeText, Text: p.Content},
|
||||||
|
}
|
||||||
|
for _, imgData := range p.Images {
|
||||||
|
b64 := base64.StdEncoding.EncodeToString(imgData)
|
||||||
|
mimeType := "image/jpeg"
|
||||||
|
parts = append(parts, schema.MessageInputPart{
|
||||||
|
Type: schema.ChatMessagePartTypeImageURL,
|
||||||
|
Image: &schema.MessageInputImage{
|
||||||
|
MessagePartCommon: schema.MessagePartCommon{
|
||||||
|
MIMEType: mimeType,
|
||||||
|
Base64Data: &b64,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
msg.UserInputMultiContent = parts
|
||||||
|
}
|
||||||
|
msgs = append(msgs, msg)
|
||||||
|
}
|
||||||
|
return msgs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *VllmService) convertTools(tasks []entitys.RegistrationTask) []*schema.ToolInfo {
|
||||||
|
tools := make([]*schema.ToolInfo, 0, len(tasks))
|
||||||
|
for _, task := range tasks {
|
||||||
|
params := make(map[string]*schema.ParameterInfo)
|
||||||
|
for k, v := range task.TaskConfigDetail.Param.Properties {
|
||||||
|
dt := schema.String
|
||||||
|
|
||||||
|
// Handle v.Type dynamically to support both string and []string (compiler suggests []string)
|
||||||
|
// Using fmt.Sprint handles both cases safely without knowing exact type structure
|
||||||
|
typeStr := fmt.Sprintf("%v", v.Type)
|
||||||
|
typeStr = strings.Trim(typeStr, "[]") // normalize "[string]" -> "string"
|
||||||
|
|
||||||
|
switch typeStr {
|
||||||
|
case "string":
|
||||||
|
dt = schema.String
|
||||||
|
case "integer", "int":
|
||||||
|
dt = schema.Integer
|
||||||
|
case "number", "float":
|
||||||
|
dt = schema.Number
|
||||||
|
case "boolean", "bool":
|
||||||
|
dt = schema.Boolean
|
||||||
|
case "object":
|
||||||
|
dt = schema.Object
|
||||||
|
case "array":
|
||||||
|
dt = schema.Array
|
||||||
|
}
|
||||||
|
|
||||||
|
required := false
|
||||||
|
for _, r := range task.TaskConfigDetail.Param.Required {
|
||||||
|
if r == k {
|
||||||
|
required = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
desc := v.Description
|
||||||
|
if len(v.Enum) > 0 {
|
||||||
|
var enumStrs []string
|
||||||
|
for _, e := range v.Enum {
|
||||||
|
enumStrs = append(enumStrs, fmt.Sprintf("%v", e))
|
||||||
|
}
|
||||||
|
desc += " Enum: " + strings.Join(enumStrs, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
params[k] = &schema.ParameterInfo{
|
||||||
|
Type: dt,
|
||||||
|
Desc: desc,
|
||||||
|
Required: required,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tools = append(tools, &schema.ToolInfo{
|
||||||
|
Name: task.Name,
|
||||||
|
Desc: task.Desc,
|
||||||
|
ParamsOneOf: schema.NewParamsOneOfByParams(params),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return tools
|
||||||
|
}
|
||||||
|
|
@ -13,6 +13,7 @@ var ProviderSetBiz = wire.NewSet(
|
||||||
NewChatHistoryBiz,
|
NewChatHistoryBiz,
|
||||||
//llm_service.NewLangChainGenerate,
|
//llm_service.NewLangChainGenerate,
|
||||||
llm_service.NewOllamaGenerate,
|
llm_service.NewOllamaGenerate,
|
||||||
|
llm_service.NewVllmService,
|
||||||
//handle.NewHandle,
|
//handle.NewHandle,
|
||||||
do.NewDo,
|
do.NewDo,
|
||||||
do.NewHandle,
|
do.NewHandle,
|
||||||
|
|
|
||||||
|
|
@ -122,8 +122,13 @@ type OllamaConfig struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type VllmConfig struct {
|
type VllmConfig struct {
|
||||||
|
VLModel VllmModel `mapstructure:"vl_model"`
|
||||||
|
TextModel VllmModel `mapstructure:"text_model"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type VllmModel struct {
|
||||||
BaseURL string `mapstructure:"base_url"`
|
BaseURL string `mapstructure:"base_url"`
|
||||||
VlModel string `mapstructure:"vl_model"`
|
Model string `mapstructure:"model"`
|
||||||
Timeout time.Duration `mapstructure:"timeout"`
|
Timeout time.Duration `mapstructure:"timeout"`
|
||||||
Level string `mapstructure:"level"`
|
Level string `mapstructure:"level"`
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,33 +7,63 @@ import (
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
|
||||||
"github.com/cloudwego/eino-ext/components/model/openai"
|
"github.com/cloudwego/eino-ext/components/model/openai"
|
||||||
|
"github.com/cloudwego/eino/components/model"
|
||||||
"github.com/cloudwego/eino/schema"
|
"github.com/cloudwego/eino/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Client struct {
|
type Client struct {
|
||||||
model *openai.ChatModel
|
vlModel *openai.ChatModel
|
||||||
|
generateModel *openai.ChatModel
|
||||||
config *config.Config
|
config *config.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewClient(config *config.Config) (*Client, func(), error) {
|
func NewClient(config *config.Config) (*Client, func(), error) {
|
||||||
m, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
// 初始化视觉模型
|
||||||
BaseURL: config.Vllm.BaseURL,
|
vl, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
||||||
Model: config.Vllm.VlModel,
|
BaseURL: config.Vllm.VLModel.BaseURL,
|
||||||
Timeout: config.Vllm.Timeout,
|
Model: config.Vllm.VLModel.Model,
|
||||||
|
Timeout: config.Vllm.VLModel.Timeout,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
c := &Client{model: m, config: config}
|
|
||||||
|
// 初始化生成模型
|
||||||
|
gen, err := openai.NewChatModel(context.Background(), &openai.ChatModelConfig{
|
||||||
|
BaseURL: config.Vllm.TextModel.BaseURL,
|
||||||
|
Model: config.Vllm.TextModel.Model,
|
||||||
|
Timeout: config.Vllm.TextModel.Timeout,
|
||||||
|
ExtraFields: map[string]any{
|
||||||
|
"chat_template_kwargs": map[string]any{
|
||||||
|
"enable_thinking": false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &Client{
|
||||||
|
vlModel: vl,
|
||||||
|
generateModel: gen,
|
||||||
|
config: config,
|
||||||
|
}
|
||||||
cleanup := func() {}
|
cleanup := func() {}
|
||||||
return c, cleanup, nil
|
return c, cleanup, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) Chat(ctx context.Context, msgs []*schema.Message) (*schema.Message, error) {
|
func (c *Client) Chat(ctx context.Context, msgs []*schema.Message) (*schema.Message, error) {
|
||||||
return c.model.Generate(ctx, msgs)
|
// 默认聊天使用生成模型
|
||||||
|
return c.generateModel.Generate(ctx, msgs)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) ToolSelect(ctx context.Context, msgs []*schema.Message, tools []*schema.ToolInfo) (*schema.Message, error) {
|
||||||
|
// 工具选择使用生成模型
|
||||||
|
return c.generateModel.Generate(ctx, msgs, model.WithTools(tools))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt string, imgURLs []string) (*schema.Message, error) {
|
func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt string, imgURLs []string) (*schema.Message, error) {
|
||||||
|
// 图片识别使用视觉模型
|
||||||
in := []*schema.Message{
|
in := []*schema.Message{
|
||||||
{
|
{
|
||||||
Role: schema.System,
|
Role: schema.System,
|
||||||
|
|
@ -58,11 +88,12 @@ func (c *Client) RecognizeWithImg(ctx context.Context, systemPrompt, userPrompt
|
||||||
}
|
}
|
||||||
|
|
||||||
in[1].UserInputMultiContent = parts
|
in[1].UserInputMultiContent = parts
|
||||||
return c.model.Generate(ctx, in)
|
return c.vlModel.Generate(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 识别图片by二进制文件
|
// 识别图片by二进制文件
|
||||||
func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPrompt string, imgBytes []byte, imgType string) (*schema.Message, error) {
|
func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPrompt string, imgBytes []byte, imgType string) (*schema.Message, error) {
|
||||||
|
// 图片识别使用视觉模型
|
||||||
in := []*schema.Message{
|
in := []*schema.Message{
|
||||||
{
|
{
|
||||||
Role: schema.System,
|
Role: schema.System,
|
||||||
|
|
@ -82,9 +113,10 @@ func (c *Client) RecognizeWithImgBytes(ctx context.Context, systemPrompt, userPr
|
||||||
MIMEType: imgType,
|
MIMEType: imgType,
|
||||||
Base64Data: util.AnyToPoint(base64.StdEncoding.EncodeToString(imgBytes)),
|
Base64Data: util.AnyToPoint(base64.StdEncoding.EncodeToString(imgBytes)),
|
||||||
},
|
},
|
||||||
|
Detail: schema.ImageURLDetailHigh,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
in[1].UserInputMultiContent = parts
|
in[1].UserInputMultiContent = parts
|
||||||
return c.model.Generate(ctx, in)
|
return c.vlModel.Generate(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue