fix: 1.增加是否仅使用RAG检索结果参数 2.规范知识库输出,使用openai标准结构 3.前端相应调整
This commit is contained in:
parent
eaa6fc6fe7
commit
41ddbcde2e
|
|
@ -34,6 +34,7 @@ class QueryRequest(BaseModel):
|
|||
top_k: int = 5
|
||||
stream: bool = False
|
||||
think: bool = False
|
||||
only_rag: bool = False # 是否仅使用RAG检索结果,不进行LLM兜底
|
||||
|
||||
class IngestResponse(BaseModel):
|
||||
filename: str
|
||||
|
|
@ -121,16 +122,41 @@ async def query_knowledge_base(
|
|||
enable_rerank=settings.RERANK_ENABLED
|
||||
)
|
||||
|
||||
# 处理流式输出 (SSE 协议)
|
||||
# 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
|
||||
if request.stream:
|
||||
import time
|
||||
async def stream_generator():
|
||||
# SSE 格式化辅助函数
|
||||
def sse_pack(event: str, text: str) -> str:
|
||||
# 使用 JSON 包装 data 内容,确保换行符和特殊字符被正确转义
|
||||
data = json.dumps({"text": text}, ensure_ascii=False)
|
||||
return f"event: {event}\ndata: {data}\n\n"
|
||||
chat_id = f"chatcmpl-{secrets.token_hex(12)}"
|
||||
created_time = int(time.time())
|
||||
model_name = settings.LLM_MODEL
|
||||
|
||||
yield sse_pack("thinking", "1. 上下文检索中...\n")
|
||||
# 辅助函数:构造 OpenAI 兼容的 Chunk
|
||||
def openai_chunk(content=None, reasoning_content=None, finish_reason=None, extra_delta=None):
|
||||
delta = {}
|
||||
if content:
|
||||
delta["content"] = content
|
||||
if reasoning_content:
|
||||
delta["reasoning_content"] = reasoning_content
|
||||
if extra_delta:
|
||||
delta.update(extra_delta)
|
||||
|
||||
chunk = {
|
||||
"id": chat_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": created_time,
|
||||
"model": model_name,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": delta,
|
||||
"finish_reason": finish_reason
|
||||
}
|
||||
]
|
||||
}
|
||||
return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 1. 发送检索状态 (作为思考过程的一部分)
|
||||
yield openai_chunk(reasoning_content="1. 上下文检索中...\n")
|
||||
|
||||
context_param = QueryParam(
|
||||
mode=request.mode,
|
||||
|
|
@ -139,9 +165,8 @@ async def query_knowledge_base(
|
|||
enable_rerank=settings.RERANK_ENABLED
|
||||
)
|
||||
|
||||
# 获取上下文 (这步耗时较长,包含图遍历)
|
||||
# 获取上下文
|
||||
context_resp = await rag.aquery(request.query, param=context_param)
|
||||
|
||||
logging.info(f"Context Response: {context_resp}")
|
||||
|
||||
# 判断检索状态
|
||||
|
|
@ -153,17 +178,28 @@ async def query_knowledge_base(
|
|||
think = request.think
|
||||
|
||||
if has_context:
|
||||
yield sse_pack("system", "retrieved") # 发送系统事件:已检索到信息
|
||||
yield sse_pack("thinking", f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n")
|
||||
yield openai_chunk(
|
||||
reasoning_content=f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n",
|
||||
extra_delta={"x_rag_status": "hit"}
|
||||
)
|
||||
else:
|
||||
yield sse_pack("system", "missed") # 发送系统事件:未检索到信息
|
||||
yield sse_pack("thinking", "2. 未找到相关上下文,将依赖 LLM 自身知识\n")
|
||||
yield openai_chunk(
|
||||
reasoning_content="2. 未找到相关上下文\n",
|
||||
extra_delta={"x_rag_status": "miss"}
|
||||
)
|
||||
|
||||
# 如果开启了仅RAG模式且未找到上下文,则直接结束
|
||||
if request.only_rag:
|
||||
yield openai_chunk(content="未找到相关知识库内容。", finish_reason="stop")
|
||||
yield "data: [DONE]\n\n"
|
||||
return
|
||||
|
||||
yield openai_chunk(reasoning_content=" (将依赖 LLM 自身知识)\n")
|
||||
think = False
|
||||
|
||||
yield sse_pack("thinking", "3. 答案生成中...\n")
|
||||
yield openai_chunk(reasoning_content="3. 答案生成中...\n")
|
||||
|
||||
# 2. 生成答案
|
||||
# 手动构建 System Prompt
|
||||
sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format(
|
||||
context_data=context_resp,
|
||||
response_type="Multiple Paragraphs",
|
||||
|
|
@ -179,23 +215,18 @@ async def query_knowledge_base(
|
|||
hashing_kv=rag.llm_response_cache
|
||||
)
|
||||
|
||||
thinkState = 0 # think 状态 0: 未开始 1: 开始 2: 结束
|
||||
async for chunk in stream_resp:
|
||||
if isinstance(chunk, dict):
|
||||
if chunk.get("type") == "thinking":
|
||||
if thinkState == 0:
|
||||
yield sse_pack("thinking", "\n思考:\n")
|
||||
thinkState = 1
|
||||
|
||||
yield sse_pack("thinking", chunk["content"])
|
||||
yield openai_chunk(reasoning_content=chunk["content"])
|
||||
elif chunk.get("type") == "content":
|
||||
if thinkState == 1:
|
||||
yield sse_pack("none", "\n\n\n")
|
||||
thinkState = 2
|
||||
|
||||
yield sse_pack("answer", chunk["content"])
|
||||
yield openai_chunk(content=chunk["content"])
|
||||
elif chunk:
|
||||
yield sse_pack("answer", chunk)
|
||||
yield openai_chunk(content=chunk)
|
||||
|
||||
# 发送结束标记
|
||||
yield openai_chunk(finish_reason="stop")
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
# 使用 text/event-stream Content-Type
|
||||
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
||||
|
|
|
|||
|
|
@ -103,8 +103,9 @@
|
|||
<div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display: flex; gap: 10px;">
|
||||
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery"></el-input>
|
||||
<div style="display: flex; gap: 10px; align-items: center;">
|
||||
<el-checkbox v-model="onlyRag" label="仅使用知识库" border></el-checkbox>
|
||||
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery" style="flex: 1;"></el-input>
|
||||
<el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button>
|
||||
</div>
|
||||
</el-tab-pane>
|
||||
|
|
@ -201,6 +202,7 @@
|
|||
|
||||
// 聊天
|
||||
const queryInput = ref('');
|
||||
const onlyRag = ref(false);
|
||||
const chatHistory = ref([]);
|
||||
const chatLoading = ref(false);
|
||||
const chatBox = ref(null);
|
||||
|
|
@ -375,7 +377,13 @@
|
|||
'Content-Type': 'application/json',
|
||||
'X-Tenant-ID': currentTenantId.value
|
||||
},
|
||||
body: JSON.stringify({ query: q, stream: true, mode: 'mix', think: true })
|
||||
body: JSON.stringify({
|
||||
query: q,
|
||||
stream: true,
|
||||
mode: 'mix',
|
||||
think: true,
|
||||
only_rag: onlyRag.value
|
||||
})
|
||||
});
|
||||
|
||||
const reader = response.body.getReader();
|
||||
|
|
@ -397,31 +405,42 @@
|
|||
buffer = blocks.pop(); // 保留最后一个可能不完整的块
|
||||
|
||||
for (const block of blocks) {
|
||||
if (!block.trim() || block.trim() === 'data: [DONE]') continue;
|
||||
|
||||
const lines = block.split('\n');
|
||||
let eventType = 'answer';
|
||||
let dataText = '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('event: ')) {
|
||||
eventType = line.slice(7).trim();
|
||||
} else if (line.startsWith('data: ')) {
|
||||
if (line.startsWith('data: ')) {
|
||||
try {
|
||||
const data = JSON.parse(line.slice(6));
|
||||
dataText = data.text;
|
||||
} catch (e) {}
|
||||
const jsonStr = line.slice(6);
|
||||
const chunk = JSON.parse(jsonStr);
|
||||
|
||||
// 解析 OpenAI 兼容格式
|
||||
if (chunk.choices && chunk.choices[0].delta) {
|
||||
const delta = chunk.choices[0].delta;
|
||||
|
||||
// 处理 x_rag_status
|
||||
if (delta.x_rag_status) {
|
||||
assistantMsg.retrievalStatus = delta.x_rag_status;
|
||||
}
|
||||
|
||||
// 处理思考过程
|
||||
if (delta.reasoning_content) {
|
||||
assistantMsg.thinking += delta.reasoning_content;
|
||||
}
|
||||
|
||||
// 处理正文内容
|
||||
if (delta.content) {
|
||||
assistantMsg.content += delta.content;
|
||||
}
|
||||
}
|
||||
|
||||
if (eventType === 'system') {
|
||||
assistantMsg.retrievalStatus = dataText;
|
||||
} else if (dataText) {
|
||||
if (eventType === 'thinking') {
|
||||
assistantMsg.thinking += dataText;
|
||||
} else if (eventType === 'answer') {
|
||||
assistantMsg.content += dataText;
|
||||
}
|
||||
// 滚动到底部
|
||||
if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
|
||||
|
||||
} catch (e) {
|
||||
console.error('JSON parse error:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -436,9 +455,9 @@
|
|||
let html = '';
|
||||
|
||||
if (retrievalStatus) {
|
||||
const color = retrievalStatus === 'retrieved' ? '#67c23a' : '#e6a23c';
|
||||
const text = retrievalStatus === 'retrieved' ? '已检索到相关知识' : '未检索到相关知识,使用通用知识回答';
|
||||
const icon = retrievalStatus === 'retrieved' ? '✔️' : '⚠️';
|
||||
const color = retrievalStatus === 'hit' ? '#67c23a' : '#e6a23c';
|
||||
const text = retrievalStatus === 'hit' ? '已检索到相关知识' : '未检索到相关知识';
|
||||
const icon = retrievalStatus === 'hit' ? '✔️' : '⚠️';
|
||||
html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`;
|
||||
}
|
||||
|
||||
|
|
@ -468,7 +487,7 @@
|
|||
goHome, refreshTenants, enterTenant, fetchDocuments,
|
||||
viewDocument, deleteDocument, deleteCurrentDoc,
|
||||
uploadFile, uploadText, addQA, removeQA, uploadQA,
|
||||
sendQuery, renderMarkdown, formatDate, isAdmin
|
||||
sendQuery, renderMarkdown, formatDate, isAdmin, onlyRag
|
||||
};
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in New Issue