From 41ddbcde2ee9b2e4d4e1e5a9a71d279d64094084 Mon Sep 17 00:00:00 2001
From: fuzhongyun <15339891972@163.com>
Date: Fri, 23 Jan 2026 10:16:16 +0800
Subject: [PATCH] =?UTF-8?q?fix:=201.=E5=A2=9E=E5=8A=A0=E6=98=AF=E5=90=A6?=
 =?UTF-8?q?=E4=BB=85=E4=BD=BF=E7=94=A8RAG=E6=A3=80=E7=B4=A2=E7=BB=93?=
 =?UTF-8?q?=E6=9E=9C=E5=8F=82=E6=95=B0=202.=E8=A7=84=E8=8C=83=E7=9F=A5?=
 =?UTF-8?q?=E8=AF=86=E5=BA=93=E8=BE=93=E5=87=BA=EF=BC=8C=E4=BD=BF=E7=94=A8?=
 =?UTF-8?q?openai=E6=A0=87=E5=87=86=E7=BB=93=E6=9E=84=203.=E5=89=8D?=
 =?UTF-8?q?=E7=AB=AF=E7=9B=B8=E5=BA=94=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/api/routes.py     | 85 +++++++++++++++++++++++++++++--------------
 app/static/admin.html | 75 ++++++++++++++++++++++++--------------
 2 files changed, 105 insertions(+), 55 deletions(-)

diff --git a/app/api/routes.py b/app/api/routes.py
index cc4a59d..43735b7 100644
--- a/app/api/routes.py
+++ b/app/api/routes.py
@@ -34,6 +34,7 @@ class QueryRequest(BaseModel):
     top_k: int = 5
     stream: bool = False
     think: bool = False
+    only_rag: bool = False # 是否仅使用RAG检索结果，不进行LLM兜底
 
 class IngestResponse(BaseModel):
     filename: str
@@ -121,16 +122,41 @@ async def query_knowledge_base(
             enable_rerank=settings.RERANK_ENABLED
         )
 
-        # 处理流式输出 (SSE 协议)
+        # 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
         if request.stream:
+            import time
             async def stream_generator():
-                # SSE 格式化辅助函数
-                def sse_pack(event: str, text: str) -> str:
-                    # 使用 JSON 包装 data 内容，确保换行符和特殊字符被正确转义
-                    data = json.dumps({"text": text}, ensure_ascii=False)
-                    return f"event: {event}\ndata: {data}\n\n"
+                chat_id = f"chatcmpl-{secrets.token_hex(12)}"
+                created_time = int(time.time())
+                model_name = settings.LLM_MODEL
 
-                yield sse_pack("thinking", "1. 上下文检索中...\n")
+                # 辅助函数：构造 OpenAI 兼容的 Chunk
+                def openai_chunk(content=None, reasoning_content=None, finish_reason=None, extra_delta=None):
+                    delta = {}
+                    if content:
+                        delta["content"] = content
+                    if reasoning_content:
+                        delta["reasoning_content"] = reasoning_content
+                    if extra_delta:
+                        delta.update(extra_delta)
+                    
+                    chunk = {
+                        "id": chat_id,
+                        "object": "chat.completion.chunk",
+                        "created": created_time,
+                        "model": model_name,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": delta,
+                                "finish_reason": finish_reason
+                            }
+                        ]
+                    }
+                    return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
+
+                # 1. 发送检索状态 (作为思考过程的一部分)
+                yield openai_chunk(reasoning_content="1. 上下文检索中...\n")
                 
                 context_param = QueryParam(
                     mode=request.mode, 
@@ -139,9 +165,8 @@ async def query_knowledge_base(
                     enable_rerank=settings.RERANK_ENABLED
                 )
                 
-                # 获取上下文 (这步耗时较长，包含图遍历)
+                # 获取上下文
                 context_resp = await rag.aquery(request.query, param=context_param)
-
                 logging.info(f"Context Response: {context_resp}")
                 
                 # 判断检索状态
@@ -153,17 +178,28 @@ async def query_knowledge_base(
                 think = request.think
 
                 if has_context:
-                    yield sse_pack("system", "retrieved") # 发送系统事件：已检索到信息
-                    yield sse_pack("thinking", f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n")
+                    yield openai_chunk(
+                        reasoning_content=f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n",
+                        extra_delta={"x_rag_status": "hit"}
+                    )
                 else:
-                    yield sse_pack("system", "missed") # 发送系统事件：未检索到信息
-                    yield sse_pack("thinking", "2. 未找到相关上下文，将依赖 LLM 自身知识\n")
+                    yield openai_chunk(
+                        reasoning_content="2. 未找到相关上下文\n",
+                        extra_delta={"x_rag_status": "miss"}
+                    )
+                    
+                    # 如果开启了仅RAG模式且未找到上下文，则直接结束
+                    if request.only_rag:
+                        yield openai_chunk(content="未找到相关知识库内容。", finish_reason="stop")
+                        yield "data: [DONE]\n\n"
+                        return
+
+                    yield openai_chunk(reasoning_content="   (将依赖 LLM 自身知识)\n")
                     think = False
 
-                yield sse_pack("thinking", "3. 答案生成中...\n")
+                yield openai_chunk(reasoning_content="3. 答案生成中...\n")
                 
                 # 2. 生成答案
-                # 手动构建 System Prompt
                 sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format(
                     context_data=context_resp, 
                     response_type="Multiple Paragraphs",
@@ -179,23 +215,18 @@ async def query_knowledge_base(
                     hashing_kv=rag.llm_response_cache
                 )
                 
-                thinkState = 0 # think 状态 0: 未开始 1: 开始 2: 结束
                 async for chunk in stream_resp:
                     if isinstance(chunk, dict):
                         if chunk.get("type") == "thinking":
-                            if thinkState == 0:
-                                yield sse_pack("thinking", "\n思考:\n")
-                                thinkState = 1
-
-                            yield sse_pack("thinking", chunk["content"])
+                            yield openai_chunk(reasoning_content=chunk["content"])
                         elif chunk.get("type") == "content":
-                            if thinkState == 1:
-                                yield sse_pack("none", "\n\n\n")
-                                thinkState = 2
-
-                            yield sse_pack("answer", chunk["content"])
+                            yield openai_chunk(content=chunk["content"])
                     elif chunk:
-                        yield sse_pack("answer", chunk)
+                        yield openai_chunk(content=chunk)
+                
+                # 发送结束标记
+                yield openai_chunk(finish_reason="stop")
+                yield "data: [DONE]\n\n"
             
             # 使用 text/event-stream Content-Type
             return StreamingResponse(stream_generator(), media_type="text/event-stream")
diff --git a/app/static/admin.html b/app/static/admin.html
index 9607935..fb17267 100644
--- a/app/static/admin.html
+++ b/app/static/admin.html
@@ -103,8 +103,9 @@
                                 <div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div>
                             </div>
                         </div>
-                        <div style="display: flex; gap: 10px;">
-                            <el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery"></el-input>
+                        <div style="display: flex; gap: 10px; align-items: center;">
+                            <el-checkbox v-model="onlyRag" label="仅使用知识库" border></el-checkbox>
+                            <el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery" style="flex: 1;"></el-input>
                             <el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button>
                         </div>
                     </el-tab-pane>
@@ -201,6 +202,7 @@
 
                 // 聊天
                 const queryInput = ref('');
+                const onlyRag = ref(false);
                 const chatHistory = ref([]);
                 const chatLoading = ref(false);
                 const chatBox = ref(null);
@@ -375,7 +377,13 @@
                                 'Content-Type': 'application/json',
                                 'X-Tenant-ID': currentTenantId.value
                             },
-                            body: JSON.stringify({ query: q, stream: true, mode: 'mix', think: true })
+                            body: JSON.stringify({ 
+                                query: q, 
+                                stream: true, 
+                                mode: 'mix', 
+                                think: true,
+                                only_rag: onlyRag.value
+                            })
                         });
 
                         const reader = response.body.getReader();
@@ -397,32 +405,43 @@
                             buffer = blocks.pop(); // 保留最后一个可能不完整的块
                             
                             for (const block of blocks) {
+                                if (!block.trim() || block.trim() === 'data: [DONE]') continue;
+
                                 const lines = block.split('\n');
-                                let eventType = 'answer';
-                                let dataText = '';
-                                
                                 for (const line of lines) {
-                                    if (line.startsWith('event: ')) {
-                                        eventType = line.slice(7).trim();
-                                    } else if (line.startsWith('data: ')) {
+                                    if (line.startsWith('data: ')) {
                                         try {
-                                            const data = JSON.parse(line.slice(6));
-                                            dataText = data.text;
-                                        } catch (e) {}
+                                            const jsonStr = line.slice(6);
+                                            const chunk = JSON.parse(jsonStr);
+                                            
+                                            // 解析 OpenAI 兼容格式
+                                            if (chunk.choices && chunk.choices[0].delta) {
+                                                const delta = chunk.choices[0].delta;
+                                                
+                                                // 处理 x_rag_status
+                                                if (delta.x_rag_status) {
+                                                    assistantMsg.retrievalStatus = delta.x_rag_status;
+                                                }
+                                                
+                                                // 处理思考过程
+                                                if (delta.reasoning_content) {
+                                                    assistantMsg.thinking += delta.reasoning_content;
+                                                }
+                                                
+                                                // 处理正文内容
+                                                if (delta.content) {
+                                                    assistantMsg.content += delta.content;
+                                                }
+                                            }
+                                            
+                                            // 滚动到底部
+                                            if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
+                                            
+                                        } catch (e) {
+                                            console.error('JSON parse error:', e);
+                                        }
                                     }
                                 }
-                                
-                                if (eventType === 'system') {
-                                    assistantMsg.retrievalStatus = dataText;
-                                } else if (dataText) {
-                                    if (eventType === 'thinking') {
-                                        assistantMsg.thinking += dataText;
-                                    } else if (eventType === 'answer') {
-                                        assistantMsg.content += dataText;
-                                    }
-                                    // 滚动到底部
-                                    if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
-                                }
                             }
                         }
                     } catch (e) {
@@ -436,9 +455,9 @@
                     let html = '';
                     
                     if (retrievalStatus) {
-                         const color = retrievalStatus === 'retrieved' ? '#67c23a' : '#e6a23c';
-                         const text = retrievalStatus === 'retrieved' ? '已检索到相关知识' : '未检索到相关知识，使用通用知识回答';
-                         const icon = retrievalStatus === 'retrieved' ? '✔️' : '⚠️';
+                         const color = retrievalStatus === 'hit' ? '#67c23a' : '#e6a23c';
+                         const text = retrievalStatus === 'hit' ? '已检索到相关知识' : '未检索到相关知识';
+                         const icon = retrievalStatus === 'hit' ? '✔️' : '⚠️';
                          html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`;
                     }
 
@@ -468,7 +487,7 @@
                     goHome, refreshTenants, enterTenant, fetchDocuments,
                     viewDocument, deleteDocument, deleteCurrentDoc,
                     uploadFile, uploadText, addQA, removeQA, uploadQA,
-                    sendQuery, renderMarkdown, formatDate, isAdmin
+                    sendQuery, renderMarkdown, formatDate, isAdmin, onlyRag
                 };
             }
         });