fix: 1.增加是否仅使用RAG检索结果参数 2.规范知识库输出,使用openai标准结构 3.前端相应调整
This commit is contained in:
parent
eaa6fc6fe7
commit
41ddbcde2e
|
|
@ -34,6 +34,7 @@ class QueryRequest(BaseModel):
|
||||||
top_k: int = 5
|
top_k: int = 5
|
||||||
stream: bool = False
|
stream: bool = False
|
||||||
think: bool = False
|
think: bool = False
|
||||||
|
only_rag: bool = False # 是否仅使用RAG检索结果,不进行LLM兜底
|
||||||
|
|
||||||
class IngestResponse(BaseModel):
|
class IngestResponse(BaseModel):
|
||||||
filename: str
|
filename: str
|
||||||
|
|
@ -121,16 +122,41 @@ async def query_knowledge_base(
|
||||||
enable_rerank=settings.RERANK_ENABLED
|
enable_rerank=settings.RERANK_ENABLED
|
||||||
)
|
)
|
||||||
|
|
||||||
# 处理流式输出 (SSE 协议)
|
# 处理流式输出 (SSE 协议 - OpenAI 兼容格式)
|
||||||
if request.stream:
|
if request.stream:
|
||||||
|
import time
|
||||||
async def stream_generator():
|
async def stream_generator():
|
||||||
# SSE 格式化辅助函数
|
chat_id = f"chatcmpl-{secrets.token_hex(12)}"
|
||||||
def sse_pack(event: str, text: str) -> str:
|
created_time = int(time.time())
|
||||||
# 使用 JSON 包装 data 内容,确保换行符和特殊字符被正确转义
|
model_name = settings.LLM_MODEL
|
||||||
data = json.dumps({"text": text}, ensure_ascii=False)
|
|
||||||
return f"event: {event}\ndata: {data}\n\n"
|
|
||||||
|
|
||||||
yield sse_pack("thinking", "1. 上下文检索中...\n")
|
# 辅助函数:构造 OpenAI 兼容的 Chunk
|
||||||
|
def openai_chunk(content=None, reasoning_content=None, finish_reason=None, extra_delta=None):
|
||||||
|
delta = {}
|
||||||
|
if content:
|
||||||
|
delta["content"] = content
|
||||||
|
if reasoning_content:
|
||||||
|
delta["reasoning_content"] = reasoning_content
|
||||||
|
if extra_delta:
|
||||||
|
delta.update(extra_delta)
|
||||||
|
|
||||||
|
chunk = {
|
||||||
|
"id": chat_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_time,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": delta,
|
||||||
|
"finish_reason": finish_reason
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
# 1. 发送检索状态 (作为思考过程的一部分)
|
||||||
|
yield openai_chunk(reasoning_content="1. 上下文检索中...\n")
|
||||||
|
|
||||||
context_param = QueryParam(
|
context_param = QueryParam(
|
||||||
mode=request.mode,
|
mode=request.mode,
|
||||||
|
|
@ -139,9 +165,8 @@ async def query_knowledge_base(
|
||||||
enable_rerank=settings.RERANK_ENABLED
|
enable_rerank=settings.RERANK_ENABLED
|
||||||
)
|
)
|
||||||
|
|
||||||
# 获取上下文 (这步耗时较长,包含图遍历)
|
# 获取上下文
|
||||||
context_resp = await rag.aquery(request.query, param=context_param)
|
context_resp = await rag.aquery(request.query, param=context_param)
|
||||||
|
|
||||||
logging.info(f"Context Response: {context_resp}")
|
logging.info(f"Context Response: {context_resp}")
|
||||||
|
|
||||||
# 判断检索状态
|
# 判断检索状态
|
||||||
|
|
@ -153,17 +178,28 @@ async def query_knowledge_base(
|
||||||
think = request.think
|
think = request.think
|
||||||
|
|
||||||
if has_context:
|
if has_context:
|
||||||
yield sse_pack("system", "retrieved") # 发送系统事件:已检索到信息
|
yield openai_chunk(
|
||||||
yield sse_pack("thinking", f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n")
|
reasoning_content=f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n",
|
||||||
|
extra_delta={"x_rag_status": "hit"}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
yield sse_pack("system", "missed") # 发送系统事件:未检索到信息
|
yield openai_chunk(
|
||||||
yield sse_pack("thinking", "2. 未找到相关上下文,将依赖 LLM 自身知识\n")
|
reasoning_content="2. 未找到相关上下文\n",
|
||||||
|
extra_delta={"x_rag_status": "miss"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 如果开启了仅RAG模式且未找到上下文,则直接结束
|
||||||
|
if request.only_rag:
|
||||||
|
yield openai_chunk(content="未找到相关知识库内容。", finish_reason="stop")
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
return
|
||||||
|
|
||||||
|
yield openai_chunk(reasoning_content=" (将依赖 LLM 自身知识)\n")
|
||||||
think = False
|
think = False
|
||||||
|
|
||||||
yield sse_pack("thinking", "3. 答案生成中...\n")
|
yield openai_chunk(reasoning_content="3. 答案生成中...\n")
|
||||||
|
|
||||||
# 2. 生成答案
|
# 2. 生成答案
|
||||||
# 手动构建 System Prompt
|
|
||||||
sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format(
|
sys_prompt = CUSTOM_RAG_RESPONSE_PROMPT.format(
|
||||||
context_data=context_resp,
|
context_data=context_resp,
|
||||||
response_type="Multiple Paragraphs",
|
response_type="Multiple Paragraphs",
|
||||||
|
|
@ -179,23 +215,18 @@ async def query_knowledge_base(
|
||||||
hashing_kv=rag.llm_response_cache
|
hashing_kv=rag.llm_response_cache
|
||||||
)
|
)
|
||||||
|
|
||||||
thinkState = 0 # think 状态 0: 未开始 1: 开始 2: 结束
|
|
||||||
async for chunk in stream_resp:
|
async for chunk in stream_resp:
|
||||||
if isinstance(chunk, dict):
|
if isinstance(chunk, dict):
|
||||||
if chunk.get("type") == "thinking":
|
if chunk.get("type") == "thinking":
|
||||||
if thinkState == 0:
|
yield openai_chunk(reasoning_content=chunk["content"])
|
||||||
yield sse_pack("thinking", "\n思考:\n")
|
|
||||||
thinkState = 1
|
|
||||||
|
|
||||||
yield sse_pack("thinking", chunk["content"])
|
|
||||||
elif chunk.get("type") == "content":
|
elif chunk.get("type") == "content":
|
||||||
if thinkState == 1:
|
yield openai_chunk(content=chunk["content"])
|
||||||
yield sse_pack("none", "\n\n\n")
|
|
||||||
thinkState = 2
|
|
||||||
|
|
||||||
yield sse_pack("answer", chunk["content"])
|
|
||||||
elif chunk:
|
elif chunk:
|
||||||
yield sse_pack("answer", chunk)
|
yield openai_chunk(content=chunk)
|
||||||
|
|
||||||
|
# 发送结束标记
|
||||||
|
yield openai_chunk(finish_reason="stop")
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
# 使用 text/event-stream Content-Type
|
# 使用 text/event-stream Content-Type
|
||||||
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
||||||
|
|
|
||||||
|
|
@ -103,8 +103,9 @@
|
||||||
<div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div>
|
<div class="message-content markdown-body" v-html="renderMarkdown(msg.content, msg.thinking, msg.retrievalStatus)"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div style="display: flex; gap: 10px;">
|
<div style="display: flex; gap: 10px; align-items: center;">
|
||||||
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery"></el-input>
|
<el-checkbox v-model="onlyRag" label="仅使用知识库" border></el-checkbox>
|
||||||
|
<el-input v-model="queryInput" placeholder="请输入问题..." @keyup.enter="sendQuery" style="flex: 1;"></el-input>
|
||||||
<el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button>
|
<el-button type="primary" :loading="chatLoading" @click="sendQuery">发送</el-button>
|
||||||
</div>
|
</div>
|
||||||
</el-tab-pane>
|
</el-tab-pane>
|
||||||
|
|
@ -201,6 +202,7 @@
|
||||||
|
|
||||||
// 聊天
|
// 聊天
|
||||||
const queryInput = ref('');
|
const queryInput = ref('');
|
||||||
|
const onlyRag = ref(false);
|
||||||
const chatHistory = ref([]);
|
const chatHistory = ref([]);
|
||||||
const chatLoading = ref(false);
|
const chatLoading = ref(false);
|
||||||
const chatBox = ref(null);
|
const chatBox = ref(null);
|
||||||
|
|
@ -375,7 +377,13 @@
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'X-Tenant-ID': currentTenantId.value
|
'X-Tenant-ID': currentTenantId.value
|
||||||
},
|
},
|
||||||
body: JSON.stringify({ query: q, stream: true, mode: 'mix', think: true })
|
body: JSON.stringify({
|
||||||
|
query: q,
|
||||||
|
stream: true,
|
||||||
|
mode: 'mix',
|
||||||
|
think: true,
|
||||||
|
only_rag: onlyRag.value
|
||||||
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
const reader = response.body.getReader();
|
const reader = response.body.getReader();
|
||||||
|
|
@ -397,32 +405,43 @@
|
||||||
buffer = blocks.pop(); // 保留最后一个可能不完整的块
|
buffer = blocks.pop(); // 保留最后一个可能不完整的块
|
||||||
|
|
||||||
for (const block of blocks) {
|
for (const block of blocks) {
|
||||||
|
if (!block.trim() || block.trim() === 'data: [DONE]') continue;
|
||||||
|
|
||||||
const lines = block.split('\n');
|
const lines = block.split('\n');
|
||||||
let eventType = 'answer';
|
|
||||||
let dataText = '';
|
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
if (line.startsWith('event: ')) {
|
if (line.startsWith('data: ')) {
|
||||||
eventType = line.slice(7).trim();
|
|
||||||
} else if (line.startsWith('data: ')) {
|
|
||||||
try {
|
try {
|
||||||
const data = JSON.parse(line.slice(6));
|
const jsonStr = line.slice(6);
|
||||||
dataText = data.text;
|
const chunk = JSON.parse(jsonStr);
|
||||||
} catch (e) {}
|
|
||||||
|
// 解析 OpenAI 兼容格式
|
||||||
|
if (chunk.choices && chunk.choices[0].delta) {
|
||||||
|
const delta = chunk.choices[0].delta;
|
||||||
|
|
||||||
|
// 处理 x_rag_status
|
||||||
|
if (delta.x_rag_status) {
|
||||||
|
assistantMsg.retrievalStatus = delta.x_rag_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理思考过程
|
||||||
|
if (delta.reasoning_content) {
|
||||||
|
assistantMsg.thinking += delta.reasoning_content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理正文内容
|
||||||
|
if (delta.content) {
|
||||||
|
assistantMsg.content += delta.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 滚动到底部
|
||||||
|
if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.error('JSON parse error:', e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eventType === 'system') {
|
|
||||||
assistantMsg.retrievalStatus = dataText;
|
|
||||||
} else if (dataText) {
|
|
||||||
if (eventType === 'thinking') {
|
|
||||||
assistantMsg.thinking += dataText;
|
|
||||||
} else if (eventType === 'answer') {
|
|
||||||
assistantMsg.content += dataText;
|
|
||||||
}
|
|
||||||
// 滚动到底部
|
|
||||||
if (chatBox.value) chatBox.value.scrollTop = chatBox.value.scrollHeight;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|
@ -436,9 +455,9 @@
|
||||||
let html = '';
|
let html = '';
|
||||||
|
|
||||||
if (retrievalStatus) {
|
if (retrievalStatus) {
|
||||||
const color = retrievalStatus === 'retrieved' ? '#67c23a' : '#e6a23c';
|
const color = retrievalStatus === 'hit' ? '#67c23a' : '#e6a23c';
|
||||||
const text = retrievalStatus === 'retrieved' ? '已检索到相关知识' : '未检索到相关知识,使用通用知识回答';
|
const text = retrievalStatus === 'hit' ? '已检索到相关知识' : '未检索到相关知识';
|
||||||
const icon = retrievalStatus === 'retrieved' ? '✔️' : '⚠️';
|
const icon = retrievalStatus === 'hit' ? '✔️' : '⚠️';
|
||||||
html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`;
|
html += `<div style="margin-bottom: 8px; font-size: 12px; color: ${color}; font-weight: bold;">${icon} ${text}</div>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -468,7 +487,7 @@
|
||||||
goHome, refreshTenants, enterTenant, fetchDocuments,
|
goHome, refreshTenants, enterTenant, fetchDocuments,
|
||||||
viewDocument, deleteDocument, deleteCurrentDoc,
|
viewDocument, deleteDocument, deleteCurrentDoc,
|
||||||
uploadFile, uploadText, addQA, removeQA, uploadQA,
|
uploadFile, uploadText, addQA, removeQA, uploadQA,
|
||||||
sendQuery, renderMarkdown, formatDate, isAdmin
|
sendQuery, renderMarkdown, formatDate, isAdmin, onlyRag
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue