diff --git a/.env.example b/.env.example index 8525e65..bb77f59 100644 --- a/.env.example +++ b/.env.example @@ -33,4 +33,5 @@ DATA_DIR=./index_data # RAG Configuration EMBEDDING_DIM=1024 MAX_TOKEN_SIZE=8192 -MAX_RAG_INSTANCES=5 # 最大活跃 RAG 实例数 \ No newline at end of file +MAX_RAG_INSTANCES=5 # 最大活跃 RAG 实例数 +COSINE_THRESHOLD=0.4 # 余弦相似度阈值 \ No newline at end of file diff --git a/app/api/routes.py b/app/api/routes.py index 137c4a6..cc4a59d 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -48,6 +48,53 @@ class QAPair(BaseModel): # 接口实现 # ========================================== +import secrets +import string + +@router.get("/admin/tenants") +async def list_tenants(token: str): + """ + 管理员接口:获取租户列表 + """ + if token != settings.ADMIN_TOKEN: + raise HTTPException(status_code=403, detail="Invalid admin token") + + try: + if not os.path.exists(settings.DATA_DIR): + return {"tenants": []} + + tenants = [] + for entry in os.scandir(settings.DATA_DIR): + if entry.is_dir() and not entry.name.startswith("."): + tenant_id = entry.name + secret_file = os.path.join(entry.path, ".secret") + + # 读取或生成租户专属 Secret + if os.path.exists(secret_file): + with open(secret_file, "r") as f: + secret = f.read().strip() + else: + # 生成16位随机字符串 + alphabet = string.ascii_letters + string.digits + secret = ''.join(secrets.choice(alphabet) for i in range(16)) + try: + with open(secret_file, "w") as f: + f.write(secret) + except Exception as e: + logging.error(f"Failed to write secret for tenant {tenant_id}: {e}") + continue + + # 生成租户访问 Token (租户ID_随机串) + tenant_token = f"{tenant_id}_{secret}" + tenants.append({ + "id": tenant_id, + "token": tenant_token + }) + return {"tenants": tenants} + except Exception as e: + logging.error(f"Failed to list tenants: {e}") + raise HTTPException(status_code=500, detail=str(e)) + @router.get("/health") async def health_check(): """健康检查接口""" @@ -63,6 +110,7 @@ async def query_knowledge_base( - query: 用户问题 - mode: 检索模式 (推荐 hybrid 用于事实类查询) - stream: 是否流式输出 (默认 False) + - think: 是否启用思考模式 (默认 False) """ try: # 构造查询参数 @@ -93,12 +141,24 @@ async def query_knowledge_base( # 获取上下文 (这步耗时较长,包含图遍历) context_resp = await rag.aquery(request.query, param=context_param) + + logging.info(f"Context Response: {context_resp}") - # 简单判断是否找到内容 - if not context_resp or "Sorry, I'm not able to answer" in context_resp: - yield sse_pack("thinking", " (未找到相关上下文,将依赖 LLM 自身知识)") - else: + # 判断检索状态 + has_context = False + if context_resp and "[no-context]" not in context_resp and "None" not in context_resp: + has_context = True + + # 判断是否开启think + think = request.think + + if has_context: + yield sse_pack("system", "retrieved") # 发送系统事件:已检索到信息 yield sse_pack("thinking", f"2. 上下文已检索 (长度: {len(context_resp)} 字符).\n") + else: + yield sse_pack("system", "missed") # 发送系统事件:未检索到信息 + yield sse_pack("thinking", "2. 未找到相关上下文,将依赖 LLM 自身知识\n") + think = False yield sse_pack("thinking", "3. 答案生成中...\n") @@ -115,7 +175,7 @@ async def query_knowledge_base( request.query, system_prompt=sys_prompt, stream=True, - think=request.think, + think=think, hashing_kv=rag.llm_response_cache ) diff --git a/app/config.py b/app/config.py index 4fee328..10ee6ef 100644 --- a/app/config.py +++ b/app/config.py @@ -38,6 +38,10 @@ class Settings(BaseSettings): EMBEDDING_DIM: int = 1024 MAX_TOKEN_SIZE: int = 8192 MAX_RAG_INSTANCES: int = 3 # 最大活跃 RAG 实例数 + COSINE_THRESHOLD: float = 0.4 # 向量检索相似度阈值 + + # Admin & Security + ADMIN_TOKEN: str = "fzy" class Config: env_file = ".env" diff --git a/app/core/ingest.py b/app/core/ingest.py index a1e1fcc..77a76b5 100644 --- a/app/core/ingest.py +++ b/app/core/ingest.py @@ -70,7 +70,7 @@ async def process_pdf_with_images(file_bytes: bytes) -> str: text_content += f"--- Page {page_num + 1} Text ---\n{page_text}\n\n" # 2. 提取图片 - if settings.VL_BINDING_HOST: + if False and settings.VL_BINDING_HOST: for count, image_file_object in enumerate(page.images): try: # 获取图片数据 diff --git a/app/core/rag.py b/app/core/rag.py index 3734472..4ce2c28 100644 --- a/app/core/rag.py +++ b/app/core/rag.py @@ -68,7 +68,9 @@ async def openai_llm_func(prompt, system_prompt=None, history_messages=[], **kwa stream = kwargs.pop("stream", False) # think 参数是 DeepSeek 特有的,OpenAI 标准接口不支持,暂时忽略 - kwargs.pop("think", None) + think = kwargs.pop("think", None) + # 这里使用qwen3指定的chat_template_kwargs,开启/禁用思考模式 + kwargs["chat_template_kwargs"] = {"enable_thinking": think} messages = [] if system_prompt: @@ -169,16 +171,17 @@ async def embedding_func(texts: list[str]) -> np.ndarray: # Rerank Functions # ============================================================================== -async def tei_rerank_func(query: str, documents: list[str]) -> np.ndarray: +async def tei_rerank_func(query: str, documents: list[str], top_n: int = 10) -> list[dict]: """TEI Rerank 实现""" if not documents: - return np.array([]) + return [] url = f"{settings.RERANK_BINDING_HOST}/rerank" headers = {"Content-Type": "application/json"} if settings.RERANK_KEY and settings.RERANK_KEY != "EMPTY": headers["Authorization"] = f"Bearer {settings.RERANK_KEY}" + # TEI 不支持 top_n 参数,我们手动截断或忽略 payload = { "query": query, "texts": documents, @@ -191,14 +194,16 @@ async def tei_rerank_func(query: str, documents: list[str]) -> np.ndarray: # TEI 返回: [{"index": 0, "score": 0.99}, {"index": 1, "score": 0.5}] results = response.json() - # LightRAG 期望返回一个分数数组,对应输入的 documents 顺序 - # TEI 返回的结果是排序过的,我们需要根据 index 还原顺序 - scores = np.zeros(len(documents)) + # LightRAG 期望返回包含 index 和 relevance_score 的字典列表 + # 这样 LightRAG 才能正确映射回原始文档并进行排序 + formatted_results = [] for res in results: - idx = res['index'] - scores[idx] = res['score'] + formatted_results.append({ + "index": res['index'], + "relevance_score": res['score'] + }) - return scores + return formatted_results # ============================================================================== # RAG Manager @@ -237,7 +242,8 @@ class RAGManager: func=embedding_func ), "embedding_func_max_async": 8, # TEI 并发强 - "enable_llm_cache": True + "enable_llm_cache": True, + "cosine_threshold": settings.COSINE_THRESHOLD } # 如果启用了 Rerank,注入 rerank_model_func diff --git a/app/main.py b/app/main.py index 4a48978..d456f57 100644 --- a/app/main.py +++ b/app/main.py @@ -1,10 +1,12 @@ import logging from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles from contextlib import asynccontextmanager from app.config import settings from app.core.rag import initialize_rag_manager from app.core.prompts import patch_prompts from app.api.routes import router +import os # 配置日志 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) @@ -26,6 +28,14 @@ app = FastAPI( lifespan=lifespan ) +# 确保静态目录存在 +static_dir = os.path.join(os.path.dirname(__file__), "static") +if not os.path.exists(static_dir): + os.makedirs(static_dir) + +# 挂载静态文件 +app.mount("/static", StaticFiles(directory=static_dir), name="static") + app.include_router(router) if __name__ == "__main__": diff --git a/app/static/admin.html b/app/static/admin.html new file mode 100644 index 0000000..8413afc --- /dev/null +++ b/app/static/admin.html @@ -0,0 +1,471 @@ + + +
+ + +{{ currentDocContent }}
+