# ========================= # 构建阶段 # ========================= FROM python:3.10.18-bookworm AS builder # 切换 apt 源到清华 RUN sed -i 's@http://deb.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources && \ sed -i 's@http://security.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources WORKDIR /app # 安装构建依赖 RUN apt-get update && apt-get install -y \ gcc \ python3-dev \ libjpeg-dev \ zlib1g-dev \ libpq-dev \ libffi-dev \ libgl1 \ libglib2.0-0 \ wget \ antiword \ curl \ unzip \ && rm -rf /var/lib/apt/lists/* # 安装 protoc RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.19.4/protoc-3.19.4-linux-x86_64.zip && \ unzip protoc-3.19.4-linux-x86_64.zip -d /usr/local && \ chmod +x /usr/local/bin/protoc && \ rm protoc-3.19.4-linux-x86_64.zip # 复制依赖文件 COPY services/docreader/requirements.txt . # 安装依赖 RUN pip cache purge && pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple # 预下载 PP-OCRv5 模型 RUN mkdir -p /root/.paddlex/official_models && \ wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar \ -O /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar && \ wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_rec_infer.tar \ -O /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar && \ tar -xf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar -C /root/.paddlex/official_models/ && \ tar -xf /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar -C /root/.paddlex/official_models/ && \ rm -rf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar # 复制源代码和生成脚本 COPY services/docreader/src/ /app/src/ COPY services/docreader/scripts/ /app/scripts/ # 确保模型目录存在 RUN ls -la /root/.paddlex/official_models # 生成 protobuf 代码 RUN chmod +x /app/scripts/generate_proto.sh && bash /app/scripts/generate_proto.sh # ========================= # 运行阶段 # ========================= FROM python:3.10.18-bookworm AS runner # 切换 apt 源到清华 RUN sed -i 's@http://deb.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources && \ sed -i 's@http://security.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources WORKDIR /app # 安装运行时依赖 RUN apt-get update && apt-get install -y \ libjpeg62-turbo \ libpq5 \ wget \ gnupg \ libgl1 \ libglib2.0-0 \ antiword \ supervisor \ vim \ tar \ dpkg \ libxinerama1 \ libfontconfig1 \ libdbus-glib-1-2 \ libcairo2 \ libcups2 \ libglu1-mesa \ libsm6 \ libreoffice \ && rm -rf /var/lib/apt/lists/* # # 下载并安装 LibreOffice(区分架构) # RUN mkdir -p /tmp/libreoffice && cd /tmp/libreoffice && \ # if [ "$(uname -m)" = "x86_64" ]; then \ # wget https://mirrors.tuna.tsinghua.edu.cn/libreoffice/libreoffice/stable/25.2.5/deb/x86_64/LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \ # tar -xzf LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \ # cd LibreOffice_25.2.5.2_Linux_x86-64_deb/DEBS && dpkg -i *.deb; \ # elif [ "$(uname -m)" = "aarch64" ] || [ "$(uname -m)" = "arm64" ]; then \ # wget https://mirrors.aliyun.com/libreoffice/testing/25.8.0/deb/aarch64/LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \ # tar -xzf LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \ # cd LibreOffice_25.8.0.3_Linux_aarch64_deb/DEBS && dpkg -i *.deb; \ # else \ # echo "Unsupported architecture: $(uname -m)" && exit 1; \ # fi && \ # cd / && rm -rf /tmp/libreoffice # 设置 LibreOffice 环境变量 # RUN echo 'export LIBREOFFICE_PATH=/opt/libreoffice25.2/program/soffice' >> /etc/environment; # 从构建阶段复制已安装的依赖和生成的代码 COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages COPY --from=builder /usr/local/bin /usr/local/bin COPY --from=builder /root/.paddlex/official_models /root/.paddlex/official_models COPY --from=builder /app/src /app/src # 安装 Playwright 浏览器 RUN python -m playwright install webkit RUN python -m playwright install-deps webkit # 设置 Python 路径 ENV PYTHONPATH=/app/src RUN cd /app/src && python -m download_deps # 创建supervisor配置 RUN mkdir -p /etc/supervisor/conf.d COPY services/docreader/supervisord.conf /etc/supervisor/conf.d/supervisord.conf # 暴露 gRPC 端口 EXPOSE 50051 # 使用supervisor启动服务 CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]