fix: 1.新增think参数 2.移除enable_cot意外参数

This commit is contained in:
fuzhongyun 2026-01-15 18:13:43 +08:00
parent 1b641e6cd6
commit 13bc171e9d
2 changed files with 5 additions and 1 deletions

View File

@ -34,6 +34,7 @@ class QueryRequest(BaseModel):
mode: str = "hybrid" # 可选: naive, local, global, hybrid mode: str = "hybrid" # 可选: naive, local, global, hybrid
top_k: int = 5 top_k: int = 5
stream: bool = False stream: bool = False
think: bool = False
class IngestResponse(BaseModel): class IngestResponse(BaseModel):
filename: str filename: str
@ -111,6 +112,7 @@ async def query_knowledge_base(
request.query, request.query,
system_prompt=sys_prompt, system_prompt=sys_prompt,
stream=True, stream=True,
think=request.think,
hashing_kv=rag.llm_response_cache hashing_kv=rag.llm_response_cache
) )

View File

@ -19,11 +19,13 @@ async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs) ->
# 移除可能存在的 model 参数,避免冲突 # 移除可能存在的 model 参数,避免冲突
kwargs.pop('model', None) kwargs.pop('model', None)
kwargs.pop('hashing_kv', None) kwargs.pop('hashing_kv', None)
kwargs.pop('enable_cot', None) # 移除不支持的参数
keyword_extraction = kwargs.pop("keyword_extraction", False) keyword_extraction = kwargs.pop("keyword_extraction", False)
if keyword_extraction: if keyword_extraction:
kwargs["format"] = "json" kwargs["format"] = "json"
stream = kwargs.pop("stream", False) stream = kwargs.pop("stream", False)
think = kwargs.pop("think", False)
# Debug: 检查流式参数 # Debug: 检查流式参数
if stream: if stream:
logging.info("LLM called with stream=True") logging.info("LLM called with stream=True")
@ -40,7 +42,7 @@ async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs) ->
if stream: if stream:
async def inner(): async def inner():
# 使用 **kwargs 透传参数,确保 format 等顶级参数生效 # 使用 **kwargs 透传参数,确保 format 等顶级参数生效
response = await client.chat(model=settings.LLM_MODEL, messages=messages, stream=True, **kwargs) response = await client.chat(model=settings.LLM_MODEL, messages=messages, stream=True, think=think, **kwargs)
async for chunk in response: async for chunk in response:
msg = chunk.get("message", {}) msg = chunk.get("message", {})