fix: 1.优化配置项、配置 2.修复未查询到上下文时的异常返回情况
This commit is contained in:
parent
be9801898e
commit
4257ff5768
|
|
@ -7,8 +7,9 @@ PORT=9600
|
||||||
# LLM(Text) Configuration
|
# LLM(Text) Configuration
|
||||||
LLM_BINDING=vllm # ollama, vllm, openai
|
LLM_BINDING=vllm # ollama, vllm, openai
|
||||||
LLM_BINDING_HOST=http://192.168.6.115:8002/v1 # vLLM OpenAI API base
|
LLM_BINDING_HOST=http://192.168.6.115:8002/v1 # vLLM OpenAI API base
|
||||||
LLM_MODEL=qwen2.5-7b-awq
|
LLM_MODEL=qwen3-8b-fp8
|
||||||
LLM_KEY=EMPTY # vLLM default key
|
LLM_KEY=EMPTY # vLLM default key
|
||||||
|
LLM_MODEL_MAX_ASYNC=4 # vLLM 并发能力强,可以调高
|
||||||
|
|
||||||
# LLM(Vision) Configuration
|
# LLM(Vision) Configuration
|
||||||
VL_BINDING=vllm # ollama, vllm, openai
|
VL_BINDING=vllm # ollama, vllm, openai
|
||||||
|
|
@ -35,4 +36,4 @@ DATA_DIR=./index_data
|
||||||
EMBEDDING_DIM=1024
|
EMBEDDING_DIM=1024
|
||||||
MAX_TOKEN_SIZE=8192
|
MAX_TOKEN_SIZE=8192
|
||||||
MAX_RAG_INSTANCES=5 # 最大活跃 RAG 实例数
|
MAX_RAG_INSTANCES=5 # 最大活跃 RAG 实例数
|
||||||
COSINE_THRESHOLD=0.4 # 余弦相似度阈值
|
COSINE_THRESHOLD=0.6 # 余弦相似度阈值
|
||||||
|
|
@ -125,13 +125,26 @@ async def query_knowledge_base(
|
||||||
|
|
||||||
# 执行上下文检索
|
# 执行上下文检索
|
||||||
context_resp = await rag.aquery(request.query, param=context_param)
|
context_resp = await rag.aquery(request.query, param=context_param)
|
||||||
|
|
||||||
|
logging.info(f"Context response: {context_resp}")
|
||||||
|
|
||||||
# 判断检索命中状态
|
# 判断检索状态
|
||||||
rag_status = "miss"
|
|
||||||
has_context = False
|
has_context = False
|
||||||
|
|
||||||
|
# 1. 基础检查:排除空字符串和明确的无上下文标记
|
||||||
if context_resp and "[no-context]" not in context_resp and "None" not in context_resp:
|
if context_resp and "[no-context]" not in context_resp and "None" not in context_resp:
|
||||||
|
# 2. 严谨检查:只有包含具体的 Document Chunks (原文片段) 才视为有效命中
|
||||||
|
# 实体(Entities)容易因通用词产生脏匹配,不宜单独作为命中依据
|
||||||
|
if "Document Chunks" in context_resp:
|
||||||
|
chunks_part = context_resp.split("Document Chunks")[1]
|
||||||
|
# 检查 Chunks 部分是否包含 JSON 格式的内容字段
|
||||||
|
if '"content":' in chunks_part or '"text":' in chunks_part:
|
||||||
|
has_context = True
|
||||||
|
|
||||||
|
if has_context:
|
||||||
rag_status = "hit"
|
rag_status = "hit"
|
||||||
has_context = True
|
else:
|
||||||
|
rag_status = "miss"
|
||||||
|
|
||||||
# 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
|
# 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
|
||||||
if request.stream:
|
if request.stream:
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ class Settings(BaseSettings):
|
||||||
LLM_BINDING_HOST: str = "http://192.168.6.115:8002/v1" # vLLM OpenAI API base
|
LLM_BINDING_HOST: str = "http://192.168.6.115:8002/v1" # vLLM OpenAI API base
|
||||||
LLM_MODEL: str = "qwen2.5-7b-awq"
|
LLM_MODEL: str = "qwen2.5-7b-awq"
|
||||||
LLM_KEY: str = "EMPTY" # vLLM default key
|
LLM_KEY: str = "EMPTY" # vLLM default key
|
||||||
|
LLM_MODEL_MAX_ASYNC: int = 4 # vLLM 并发能力强,可以调高
|
||||||
|
|
||||||
# LLM (Vision) - vLLM
|
# LLM (Vision) - vLLM
|
||||||
VL_BINDING: str = "vllm" # ollama, vllm, openai
|
VL_BINDING: str = "vllm" # ollama, vllm, openai
|
||||||
|
|
|
||||||
|
|
@ -234,7 +234,7 @@ class RAGManager:
|
||||||
"working_dir": user_data_dir,
|
"working_dir": user_data_dir,
|
||||||
"llm_model_func": llm_func,
|
"llm_model_func": llm_func,
|
||||||
"llm_model_name": settings.LLM_MODEL,
|
"llm_model_name": settings.LLM_MODEL,
|
||||||
"llm_model_max_async": 4, # vLLM 并发能力强,可以调高
|
"llm_model_max_async": settings.LLM_MODEL_MAX_ASYNC, # vLLM 并发能力强,可以调高
|
||||||
"max_parallel_insert": 1,
|
"max_parallel_insert": 1,
|
||||||
"embedding_func": EmbeddingFunc(
|
"embedding_func": EmbeddingFunc(
|
||||||
embedding_dim=settings.EMBEDDING_DIM,
|
embedding_dim=settings.EMBEDDING_DIM,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue