fix: 1.增加是否仅使用RAG检索结果参数 2.规范知识库输出,使用openai标准结构 3.前端相应调整

This commit is contained in:
fuzhongyun 2026-01-23 10:16:16 +08:00
parent eaa6fc6fe7
commit 41ddbcde2e
2 changed files with 105 additions and 55 deletions

View File

@ -34,6 +34,7 @@ class QueryRequest(BaseModel):
top_k: int = 5 top_k: int = 5
stream: bool = False stream: bool = False
think: bool = False think: bool = False
only_rag: bool = False # 是否仅使用RAG检索结果不进行LLM兜底
class IngestResponse(BaseModel): class IngestResponse(BaseModel):
filename: str filename: str
@ -121,16 +122,41 @@ async def query_knowledge_base(
enable_rerank=settings.RERANK_ENABLED enable_rerank=settings.RERANK_ENABLED
) )
# 处理流式输出 (SSE 协议) # 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
if request.stream: if request.stream:
import time
async def stream_generator(): async def stream_generator():
# SSE 格式化辅助函数 chat_id = f"chatcmpl-{secrets.token_hex(12)}"
def sse_pack(event: str, text: str) -> str: created_time = int(time.time())
# 使用 JSON 包装 data 内容,确保换行符和特殊字符被正确转义 model_name = settings.LLM_MODEL
data = json.dumps({"text": text}, ensure_ascii=False)
return f"event: {event}\ndata: {data}\n\n"
yield sse_pack("thinking", "1. 上下文检索中...\n") # 辅助函数:构造 OpenAI 兼容的 Chunk
def openai_chunk(content=None, reasoning_content=None, finish_reason=None, extra_delta=None):
delta = {}
if content:
delta["content"] = content
if reasoning_content:
delta["reasoning_content"] = reasoning_content
if extra_delta:
delta.update(extra_delta)
chunk = {
"id": chat_id,
"object": "chat.completion.chunk",
"created": created_time,
"model": model_name,
"choices": [
{
"index": 0,
"delta": delta,
"finish_reason": finish_reason
}
]
}
return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
# 1. 发送检索状态 (作为思考过程的一部分)
yield openai_chunk(reasoning_content="1. 上下文检索中...\n")
context_param = QueryParam( context_param = QueryParam(
mode=request.mode, mode=request.mode,
@ -139,9 +165,8 @@ async def query_knowledge_base(
enable_rerank=settings.RERANK_ENABLED enable_rerank=settings.RERANK_ENABLED
) )
# 获取上下文 (这步耗时较长,包含图遍历) # 获取上下文
context_resp = await rag.aquery(request.query, param=context_param) context_resp = await rag.aquery(request.query, param=context_param)
logging.info(f"Context Response: {context_resp}") logging.info(f"Context Response: {context_resp}")
# 判断检索状态 # 判断检索状态
@ -153,17 +178,28 @@ async def query_knowledge_base(
think = request.think think = request.think
if has_context: if has_context:
yield sse_pack("system", "retrieved") # 发送系统事件:已检索到信息 yield openai_chunk(
yield sse_pack("thinking", f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n") reasoning_content=f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n",
extra_delta={"x_rag_status": "hit"}
)
else: else:
yield sse_pack("system", "missed") # 发送系统事件:未检索到信息 yield openai_chunk(
yield sse_pack("thinking", "2. 未找到相关上下文,将依赖 LLM 自身知识\n") reasoning_content="2. 未找到相关上下文\n",
extra_delta={"x_rag_status": "miss"}
)
# 如果开启了仅RAG模式且未找到上下文则直接结束
if request.only_rag:
yield openai_chunk(content="未找到相关知识库内容。", finish_reason="stop")
yield "data: [DONE]\n\n"
return
yield openai_chunk(reasoning_content=" (将依赖 LLM 自身知识)\n")
think = False think = False
yield sse_pack("thinking", "3. 答案生成中...\n") yield openai_chunk(reasoning_content="3. 答案生成中...\n")
# 2. 生成答案 # 2. 生成答案
# 手动构建 System Prompt
sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format( sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format(
context_data=context_resp, context_data=context_resp,
response_type="Multiple Paragraphs", response_type="Multiple Paragraphs",
@ -179,23 +215,18 @@ async def query_knowledge_base(
hashing_kv=rag.llm_response_cache hashing_kv=rag.llm_response_cache
) )
thinkState = 0 # think 状态 0: 未开始 1: 开始 2: 结束
async for chunk in stream_resp: async for chunk in stream_resp:
if isinstance(chunk, dict): if isinstance(chunk, dict):
if chunk.get("type") == "thinking": if chunk.get("type") == "thinking":
if thinkState == 0: yield openai_chunk(reasoning_content=chunk["content"])
yield sse_pack("thinking", "\n思考:\n")
thinkState = 1
yield sse_pack("thinking", chunk["content"])
elif chunk.get("type") == "content": elif chunk.get("type") == "content":
if thinkState == 1: yield openai_chunk(content=chunk["content"])
yield sse_pack("none", "\n\n\n")
thinkState = 2
yield sse_pack("answer", chunk["content"])
elif chunk: elif chunk:
yield sse_pack("answer", chunk) yield openai_chunk(content=chunk)
# 发送结束标记
yield openai_chunk(finish_reason="stop")
yield "data: [DONE]\n\n"
# 使用 text/event-stream Content-Type # 使用 text/event-stream Content-Type
return StreamingResponse(stream_generator(), media_type="text/event-stream") return StreamingResponse(stream_generator(), media_type="text/event-stream")

View File

@ -103,8 +103,9 @@
<div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div> <div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div>
</div> </div>
</div> </div>
<div style="display: flex; gap: 10px;"> <div style="display: flex; gap: 10px; align-items: center;">
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery"></el-input> <el-checkbox v-model="onlyRag" label="仅使用知识库" border></el-checkbox>
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery" style="flex: 1;"></el-input>
<el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button> <el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button>
</div> </div>
</el-tab-pane> </el-tab-pane>
@ -201,6 +202,7 @@
// 聊天 // 聊天
const queryInput = ref(''); const queryInput = ref('');
const onlyRag = ref(false);
const chatHistory = ref([]); const chatHistory = ref([]);
const chatLoading = ref(false); const chatLoading = ref(false);
const chatBox = ref(null); const chatBox = ref(null);
@ -375,7 +377,13 @@
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'X-Tenant-ID': currentTenantId.value 'X-Tenant-ID': currentTenantId.value
}, },
body: JSON.stringify({ query: q, stream: true, mode: 'mix', think: true }) body: JSON.stringify({
query: q,
stream: true,
mode: 'mix',
think: true,
only_rag: onlyRag.value
})
}); });
const reader = response.body.getReader(); const reader = response.body.getReader();
@ -397,32 +405,43 @@
buffer = blocks.pop(); // 保留最后一个可能不完整的块 buffer = blocks.pop(); // 保留最后一个可能不完整的块
for (const block of blocks) { for (const block of blocks) {
if (!block.trim() || block.trim() === 'data: [DONE]') continue;
const lines = block.split('\n'); const lines = block.split('\n');
let eventType = 'answer';
let dataText = '';
for (const line of lines) { for (const line of lines) {
if (line.startsWith('event: ')) { if (line.startsWith('data: ')) {
eventType = line.slice(7).trim();
} else if (line.startsWith('data: ')) {
try { try {
const data = JSON.parse(line.slice(6)); const jsonStr = line.slice(6);
dataText = data.text; const chunk = JSON.parse(jsonStr);
} catch (e) {}
// 解析 OpenAI 兼容格式
if (chunk.choices && chunk.choices[0].delta) {
const delta = chunk.choices[0].delta;
// 处理 x_rag_status
if (delta.x_rag_status) {
assistantMsg.retrievalStatus = delta.x_rag_status;
}
// 处理思考过程
if (delta.reasoning_content) {
assistantMsg.thinking += delta.reasoning_content;
}
// 处理正文内容
if (delta.content) {
assistantMsg.content += delta.content;
}
}
// 滚动到底部
if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
} catch (e) {
console.error('JSON parse error:', e);
}
} }
} }
if (eventType === 'system') {
assistantMsg.retrievalStatus = dataText;
} else if (dataText) {
if (eventType === 'thinking') {
assistantMsg.thinking += dataText;
} else if (eventType === 'answer') {
assistantMsg.content += dataText;
}
// 滚动到底部
if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
}
} }
} }
} catch (e) { } catch (e) {
@ -436,9 +455,9 @@
let html = ''; let html = '';
if (retrievalStatus) { if (retrievalStatus) {
const color = retrievalStatus === 'retrieved' ? '#67c23a' : '#e6a23c'; const color = retrievalStatus === 'hit' ? '#67c23a' : '#e6a23c';
const text = retrievalStatus === 'retrieved' ? '已检索到相关知识' : '未检索到相关知识,使用通用知识回答'; const text = retrievalStatus === 'hit' ? '已检索到相关知识' : '未检索到相关知识';
const icon = retrievalStatus === 'retrieved' ? '✔️' : '⚠️'; const icon = retrievalStatus === 'hit' ? '✔️' : '⚠️';
html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`; html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`;
} }
@ -468,7 +487,7 @@
goHome, refreshTenants, enterTenant, fetchDocuments, goHome, refreshTenants, enterTenant, fetchDocuments,
viewDocument, deleteDocument, deleteCurrentDoc, viewDocument, deleteDocument, deleteCurrentDoc,
uploadFile, uploadText, addQA, removeQA, uploadQA, uploadFile, uploadText, addQA, removeQA, uploadQA,
sendQuery, renderMarkdown, formatDate, isAdmin sendQuery, renderMarkdown, formatDate, isAdmin, onlyRag
}; };
} }
}); });