diff --git a/.env.example b/.env.example
index e2fdce6..dc4c689 100644
--- a/.env.example
+++ b/.env.example
@@ -7,8 +7,9 @@ PORT=9600
 # LLM（Text） Configuration
 LLM_BINDING=vllm # ollama, vllm, openai
 LLM_BINDING_HOST=http://192.168.6.115:8002/v1 # vLLM OpenAI API base
-LLM_MODEL=qwen2.5-7b-awq
+LLM_MODEL=qwen3-8b-fp8
 LLM_KEY=EMPTY # vLLM default key
+LLM_MODEL_MAX_ASYNC=4 # vLLM 并发能力强，可以调高
 
 # LLM（Vision） Configuration
 VL_BINDING=vllm # ollama, vllm, openai
@@ -35,4 +36,4 @@ DATA_DIR=./index_data
 EMBEDDING_DIM=1024
 MAX_TOKEN_SIZE=8192
 MAX_RAG_INSTANCES=5  # 最大活跃 RAG 实例数
-COSINE_THRESHOLD=0.4  # 余弦相似度阈值
\ No newline at end of file
+COSINE_THRESHOLD=0.6  # 余弦相似度阈值
\ No newline at end of file
diff --git a/app/api/routes.py b/app/api/routes.py
index 24f0ee1..c758a00 100644
--- a/app/api/routes.py
+++ b/app/api/routes.py
@@ -125,13 +125,26 @@ async def query_knowledge_base(
         
         # 执行上下文检索
         context_resp = await rag.aquery(request.query, param=context_param)
+
+        logging.info(f"Context response: {context_resp}")
         
-        # 判断检索命中状态
-        rag_status = "miss"
+        # 判断检索状态
         has_context = False
+        
+        # 1. 基础检查：排除空字符串和明确的无上下文标记
         if context_resp and "[no-context]" not in context_resp and "None" not in context_resp:
+            # 2. 严谨检查：只有包含具体的 Document Chunks (原文片段) 才视为有效命中
+            # 实体(Entities)容易因通用词产生脏匹配，不宜单独作为命中依据
+            if "Document Chunks" in context_resp:
+                chunks_part = context_resp.split("Document Chunks")[1]
+                # 检查 Chunks 部分是否包含 JSON 格式的内容字段
+                if '"content":' in chunks_part or '"text":' in chunks_part:
+                    has_context = True
+                
+        if has_context:
             rag_status = "hit"
-            has_context = True
+        else:
+            rag_status = "miss"
 
         # 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
         if request.stream:
diff --git a/app/config.py b/app/config.py
index 0c5faf5..11c220d 100644
--- a/app/config.py
+++ b/app/config.py
@@ -16,6 +16,7 @@ class Settings(BaseSettings):
     LLM_BINDING_HOST: str = "http://192.168.6.115:8002/v1" # vLLM OpenAI API base
     LLM_MODEL: str = "qwen2.5-7b-awq"
     LLM_KEY: str = "EMPTY" # vLLM default key
+    LLM_MODEL_MAX_ASYNC: int = 4 # vLLM 并发能力强，可以调高
 
     # LLM (Vision) - vLLM
     VL_BINDING: str = "vllm" # ollama, vllm, openai
diff --git a/app/core/rag.py b/app/core/rag.py
index 4ce2c28..0f25894 100644
--- a/app/core/rag.py
+++ b/app/core/rag.py
@@ -234,7 +234,7 @@ class RAGManager:
             "working_dir": user_data_dir,
             "llm_model_func": llm_func,
             "llm_model_name": settings.LLM_MODEL,
-            "llm_model_max_async": 4, # vLLM 并发能力强，可以调高
+            "llm_model_max_async": settings.LLM_MODEL_MAX_ASYNC, # vLLM 并发能力强，可以调高
             "max_parallel_insert": 1,
             "embedding_func": EmbeddingFunc(
                 embedding_dim=settings.EMBEDDING_DIM,