From 4257ff57688521b0836d15b667e1d4a63d7705f9 Mon Sep 17 00:00:00 2001 From: fuzhongyun <15339891972@163.com> Date: Wed, 28 Jan 2026 13:47:59 +0800 Subject: [PATCH] =?UTF-8?q?fix:=201.=E4=BC=98=E5=8C=96=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E9=A1=B9=E3=80=81=E9=85=8D=E7=BD=AE=202.=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E6=9C=AA=E6=9F=A5=E8=AF=A2=E5=88=B0=E4=B8=8A=E4=B8=8B=E6=96=87?= =?UTF-8?q?=E6=97=B6=E7=9A=84=E5=BC=82=E5=B8=B8=E8=BF=94=E5=9B=9E=E6=83=85?= =?UTF-8?q?=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 5 +++-- app/api/routes.py | 19 ++++++++++++++++--- app/config.py | 1 + app/core/rag.py | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index e2fdce6..dc4c689 100644 --- a/.env.example +++ b/.env.example @@ -7,8 +7,9 @@ PORT=9600 # LLM(Text) Configuration LLM_BINDING=vllm # ollama, vllm, openai LLM_BINDING_HOST=http://192.168.6.115:8002/v1 # vLLM OpenAI API base -LLM_MODEL=qwen2.5-7b-awq +LLM_MODEL=qwen3-8b-fp8 LLM_KEY=EMPTY # vLLM default key +LLM_MODEL_MAX_ASYNC=4 # vLLM 并发能力强,可以调高 # LLM(Vision) Configuration VL_BINDING=vllm # ollama, vllm, openai @@ -35,4 +36,4 @@ DATA_DIR=./index_data EMBEDDING_DIM=1024 MAX_TOKEN_SIZE=8192 MAX_RAG_INSTANCES=5 # 最大活跃 RAG 实例数 -COSINE_THRESHOLD=0.4 # 余弦相似度阈值 \ No newline at end of file +COSINE_THRESHOLD=0.6 # 余弦相似度阈值 \ No newline at end of file diff --git a/app/api/routes.py b/app/api/routes.py index 24f0ee1..c758a00 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -125,13 +125,26 @@ async def query_knowledge_base( # 执行上下文检索 context_resp = await rag.aquery(request.query, param=context_param) + + logging.info(f"Context response: {context_resp}") - # 判断检索命中状态 - rag_status = "miss" + # 判断检索状态 has_context = False + + # 1. 基础检查:排除空字符串和明确的无上下文标记 if context_resp and "[no-context]" not in context_resp and "None" not in context_resp: + # 2. 严谨检查:只有包含具体的 Document Chunks (原文片段) 才视为有效命中 + # 实体(Entities)容易因通用词产生脏匹配,不宜单独作为命中依据 + if "Document Chunks" in context_resp: + chunks_part = context_resp.split("Document Chunks")[1] + # 检查 Chunks 部分是否包含 JSON 格式的内容字段 + if '"content":' in chunks_part or '"text":' in chunks_part: + has_context = True + + if has_context: rag_status = "hit" - has_context = True + else: + rag_status = "miss" # 处理流式输出 (SSE 协议 - OpenAI 兼容格式) if request.stream: diff --git a/app/config.py b/app/config.py index 0c5faf5..11c220d 100644 --- a/app/config.py +++ b/app/config.py @@ -16,6 +16,7 @@ class Settings(BaseSettings): LLM_BINDING_HOST: str = "http://192.168.6.115:8002/v1" # vLLM OpenAI API base LLM_MODEL: str = "qwen2.5-7b-awq" LLM_KEY: str = "EMPTY" # vLLM default key + LLM_MODEL_MAX_ASYNC: int = 4 # vLLM 并发能力强,可以调高 # LLM (Vision) - vLLM VL_BINDING: str = "vllm" # ollama, vllm, openai diff --git a/app/core/rag.py b/app/core/rag.py index 4ce2c28..0f25894 100644 --- a/app/core/rag.py +++ b/app/core/rag.py @@ -234,7 +234,7 @@ class RAGManager: "working_dir": user_data_dir, "llm_model_func": llm_func, "llm_model_name": settings.LLM_MODEL, - "llm_model_max_async": 4, # vLLM 并发能力强,可以调高 + "llm_model_max_async": settings.LLM_MODEL_MAX_ASYNC, # vLLM 并发能力强,可以调高 "max_parallel_insert": 1, "embedding_func": EmbeddingFunc( embedding_dim=settings.EMBEDDING_DIM,