From de547c99523182f33b9df539ebee487da0c992c2 Mon Sep 17 00:00:00 2001
From: fuzhongyun <15339891972@163.com>
Date: Wed, 28 Jan 2026 14:03:03 +0800
Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E8=B0=83=E6=95=B4=E7=94=9F?=
 =?UTF-8?q?=E4=BA=A7=E9=83=A8=E7=BD=B2=E5=9F=BA=E7=A1=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile       | 20 +++++++++----------
 README.md        | 50 ++++++++++++++++++++++++++++++------------------
 deploy.sh        |  7 +++++--
 requirements.txt |  2 ++
 4 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a39a782..a0c03d6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,29 +1,29 @@
-# Use an official Python runtime as a parent image
+# 使用官方 Python 3.11 运行环境作为基础镜像
 FROM python:3.11-slim
 
-# Set the working directory in the container
+# 设置工作目录
 WORKDIR /app
 
-# Install system dependencies
-# build-essential contains gcc and other tools often needed for python packages
+# 安装系统依赖
+# build-essential 包含 gcc 等编译工具，某些 Python 包安装时需要
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy the requirements file into the container at /app
+# 将依赖文件复制到容器中
 COPY requirements.txt .
 
-# Install any needed packages specified in requirements.txt
+# 安装 requirements.txt 中指定的 Python 包
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy the current directory contents into the container at /app
+# 将当前目录内容复制到容器中的 /app 目录下
 COPY . .
 
-# Make port available to the world outside this container
+# 暴露 9600 端口供外部访问
 EXPOSE 9600
 
-# Define environment variable
+# 设置环境变量，确保 Python 输出直接打印到控制台
 ENV PYTHONUNBUFFERED=1
 
-# Run app.py when the container launches
+# 容器启动时运行 uvicorn
 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "9600"]
diff --git a/README.md b/README.md
index 4a5bfd0..8132f31 100644
--- a/README.md
+++ b/README.md
@@ -53,25 +53,35 @@ docker run -p 9600:9600 --env-file .env lightrag-api
 
 ## 📚 API 文档 (核心)
 
+服务接口完全兼容 OpenAI 响应标准，支持流式与非流式输出。
+
 | 接口 | 方法 | 描述 | 示例 |
 | :--- | :--- | :--- | :--- |
-| `/query` | POST | 知识检索 | `{"query": "问题", "mode": "hybrid", "stream": true}` |
+| `/query` | POST | 知识检索 | `{"query": "问题", "mode": "hybrid", "stream": true, "think": true, "only_rag": false}` |
 | `/ingest/file` | POST | 上传文件 | `multipart/form-data`, file=@doc.pdf |
+| `/ingest/text` | POST | 摄入纯文本 | `{"text": "文本内容"}` |
+| `/ingest/batch_qa` | POST | 批量摄入 QA | `[{"question": "Q1", "answer": "A1"}, ...]` |
 | `/documents` | GET | 文档列表 | 查看已索引文档及状态 |
 | `/docs/{id}` | DELETE | 删除文档 | 根据 ID 删除文档及关联图谱数据 |
 
-**检索模式 (Mode) 说明**:
+**`/query` 参数说明**:
 
-- `hybrid` (推荐): 混合检索，同时利用向量相似度(事实)和知识图谱(关系)。
-- `naive`: 纯向量检索，速度快但缺乏深度关联。
-- `local`: 侧重于实体细节的图谱检索。
-- `global`: 侧重于宏观关系的图谱总结。
+- `query`: 用户问题。
+- `mode`: 检索模式 (`hybrid`, `naive`, `local`, `global`)。推荐使用 `hybrid`。
+- `stream`: 是否流式输出 (OpenAI 兼容 Chunk 格式)。
+- `think`: 是否启用思考模式 (DeepSeek 风格，返回 `reasoning_content`)。
+- `only_rag`: **严格模式**。若为 `true`，未从知识库检索到内容时将拒绝回答，不使用 LLM 通用知识。
 
-**租户管理**：
+**响应字段 (流式)**:
+- `delta.content`: 正文回答。
+- `delta.reasoning_content`: 思考过程 (DeepSeek 风格)。
+- `delta.x_rag_status`: 检索命中状态 (`hit` 或 `miss`)。
 
+**租户管理**:
+
+通过 Header `X-Tenant-ID` 进行租户隔离，每个租户拥有独立的存储空间。
 ```bash
-curl -H "X-Tenant-ID: <tenant_id>" http://localhost:9600/xxx
-# <tenant_id> 租户ID 默认值: default
+curl -H "X-Tenant-ID: my_tenant" http://localhost:9600/query -d '{"query": "..."}'
 ```
 
 ## 🛠️ 项目结构
@@ -79,16 +89,18 @@ curl -H "X-Tenant-ID: <tenant_id>" http://localhost:9600/xxx
 ```text
 /
 ├── app/
-│   ├── api/            # 接口路由定义
-│   ├── core/           # 核心逻辑 (RAG实例, Prompt优化)
-│   ├── config.py       # 配置管理
-│   └── main.py         # 程序入口
-├── index_data/         # [重要] 知识库持久化数据
-│   ├── graph_chunk_entity_relation.graphml  # 完整的知识图谱结构 (NetworkX格式)
-│   ├── kv_store_*.json       # 键值存储 (文档原文, 实体描述, 关系描述等)
-│   ├── vdb_*.json            # 向量数据库 (实体向量, 关系向量, 文本块向量)
-│   └── lightrag_cache.json   # LLM 响应缓存 (加速重复查询)
-├── requirements.txt    # 依赖列表
+│   ├── api/            # 接口路由定义 (OpenAI 标准流式实现)
+│   ├── core/           # 核心逻辑 (RAG Manager, 多租户管理, PDF图文解析)
+│   ├── config.py       # Pydantic-settings 配置管理
+│   └── main.py         # FastAPI 入口
+├── index_data/         # [重要] 知识库持久化数据根目录
+│   └── {tenant_id}/    # 各租户独立文件夹
+│       ├── graph_*.graphml    # 知识图谱结构
+│       ├── kv_store_*.json    # 键值存储 (文本块, 实体描述等)
+│       └── vdb_*.json         # 向量数据库
+├── requirements.txt    # 依赖列表 (包含 Pillow, PyPDF 等)
+├── Dockerfile          # 容器化构建文件 (中文注释)
+├── deploy.sh           # 一键部署脚本 (支持 host-gateway 访问宿主机 Ollama)
 └── .env                # 环境变量配置
 ```
 
diff --git a/deploy.sh b/deploy.sh
index f3eb9d8..c647919 100644
--- a/deploy.sh
+++ b/deploy.sh
@@ -22,13 +22,16 @@ fi
 
 # 3. 启动新容器
 echo "启动服务..."
-# 注意：使用 --env-file 挂载环境变量
-# 使用 -v 挂载数据卷，确保数据持久化
+# 注意：
+# 1. 使用 --env-file 挂载环境变量
+# 2. 使用 -v 挂载数据卷，确保数据持久化
+# 3. 使用 --add-host 允许容器访问宿主机的 Ollama/vLLM 服务 (host.docker.internal)
 docker run -d \
     --name $CONTAINER_NAME \
     -p $PORT:$PORT \
     --env-file .env \
     -v "$DATA_DIR":/app/index_data \
+    --add-host=host.docker.internal:host-gateway \
     --restart unless-stopped \
     $IMAGE_NAME
 
diff --git a/requirements.txt b/requirements.txt
index 77bbc1e..ed5e077 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,5 @@ ollama
 numpy
 httpx
 pydantic-settings
+Pillow
+pydantic