135 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Docker
		
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Docker
		
	
	
	
# =========================
 | 
						||
# 构建阶段
 | 
						||
# =========================
 | 
						||
FROM python:3.10.18-bookworm AS builder
 | 
						||
 | 
						||
# 切换 apt 源到清华
 | 
						||
RUN sed -i 's@http://deb.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources && \
 | 
						||
    sed -i 's@http://security.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources
 | 
						||
 | 
						||
WORKDIR /app
 | 
						||
 | 
						||
# 安装构建依赖
 | 
						||
RUN apt-get update && apt-get install -y \
 | 
						||
    gcc \
 | 
						||
    python3-dev \
 | 
						||
    libjpeg-dev \
 | 
						||
    zlib1g-dev \
 | 
						||
    libpq-dev \
 | 
						||
    libffi-dev \
 | 
						||
    libgl1 \
 | 
						||
    libglib2.0-0 \
 | 
						||
    wget \
 | 
						||
    antiword \
 | 
						||
    curl \
 | 
						||
    unzip \
 | 
						||
    && rm -rf /var/lib/apt/lists/*
 | 
						||
 | 
						||
# 安装 protoc
 | 
						||
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.19.4/protoc-3.19.4-linux-x86_64.zip && \
 | 
						||
    unzip protoc-3.19.4-linux-x86_64.zip -d /usr/local && \
 | 
						||
    chmod +x /usr/local/bin/protoc && \
 | 
						||
    rm protoc-3.19.4-linux-x86_64.zip
 | 
						||
 | 
						||
# 复制依赖文件
 | 
						||
COPY services/docreader/requirements.txt .
 | 
						||
 | 
						||
# 安装依赖
 | 
						||
RUN pip cache purge && pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 | 
						||
 | 
						||
# 预下载 PP-OCRv5 模型
 | 
						||
RUN mkdir -p /root/.paddlex/official_models && \
 | 
						||
    wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar \
 | 
						||
        -O /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar && \
 | 
						||
    wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_rec_infer.tar \
 | 
						||
        -O /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar && \
 | 
						||
    tar -xf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar -C /root/.paddlex/official_models/ && \
 | 
						||
    tar -xf /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar -C /root/.paddlex/official_models/ && \
 | 
						||
    rm -rf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar
 | 
						||
 | 
						||
# 复制源代码和生成脚本
 | 
						||
COPY services/docreader/src/ /app/src/
 | 
						||
COPY services/docreader/scripts/ /app/scripts/
 | 
						||
 | 
						||
# 确保模型目录存在
 | 
						||
RUN ls -la /root/.paddlex/official_models
 | 
						||
 | 
						||
# 生成 protobuf 代码
 | 
						||
RUN chmod +x /app/scripts/generate_proto.sh && bash /app/scripts/generate_proto.sh
 | 
						||
 | 
						||
 | 
						||
# =========================
 | 
						||
# 运行阶段
 | 
						||
# =========================
 | 
						||
FROM python:3.10.18-bookworm AS runner
 | 
						||
 | 
						||
# 切换 apt 源到清华
 | 
						||
RUN sed -i 's@http://deb.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources && \
 | 
						||
    sed -i 's@http://security.debian.org@https://mirrors.tuna.tsinghua.edu.cn@g' /etc/apt/sources.list.d/debian.sources
 | 
						||
 | 
						||
WORKDIR /app
 | 
						||
 | 
						||
# 安装运行时依赖
 | 
						||
RUN apt-get update && apt-get install -y \
 | 
						||
    libjpeg62-turbo \
 | 
						||
    libpq5 \
 | 
						||
    wget \
 | 
						||
    gnupg \
 | 
						||
    libgl1 \
 | 
						||
    libglib2.0-0 \
 | 
						||
    antiword \
 | 
						||
    supervisor \
 | 
						||
    vim \
 | 
						||
    tar \
 | 
						||
    dpkg \
 | 
						||
    libxinerama1 \
 | 
						||
    libfontconfig1 \
 | 
						||
    libdbus-glib-1-2 \
 | 
						||
    libcairo2 \
 | 
						||
    libcups2 \
 | 
						||
    libglu1-mesa \
 | 
						||
    libsm6 \
 | 
						||
    libreoffice \
 | 
						||
    && rm -rf /var/lib/apt/lists/*
 | 
						||
 | 
						||
# # 下载并安装 LibreOffice(区分架构)
 | 
						||
# RUN mkdir -p /tmp/libreoffice && cd /tmp/libreoffice && \
 | 
						||
#     if [ "$(uname -m)" = "x86_64" ]; then \
 | 
						||
#         wget https://mirrors.tuna.tsinghua.edu.cn/libreoffice/libreoffice/stable/25.2.5/deb/x86_64/LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \
 | 
						||
#         tar -xzf LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \
 | 
						||
#         cd LibreOffice_25.2.5.2_Linux_x86-64_deb/DEBS && dpkg -i *.deb; \
 | 
						||
#     elif [ "$(uname -m)" = "aarch64" ] || [ "$(uname -m)" = "arm64" ]; then \
 | 
						||
#         wget https://mirrors.aliyun.com/libreoffice/testing/25.8.0/deb/aarch64/LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \
 | 
						||
#         tar -xzf LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \
 | 
						||
#         cd LibreOffice_25.8.0.3_Linux_aarch64_deb/DEBS && dpkg -i *.deb; \
 | 
						||
#     else \
 | 
						||
#         echo "Unsupported architecture: $(uname -m)" && exit 1; \
 | 
						||
#     fi && \
 | 
						||
#     cd / && rm -rf /tmp/libreoffice
 | 
						||
 | 
						||
# 设置 LibreOffice 环境变量
 | 
						||
# RUN echo 'export LIBREOFFICE_PATH=/opt/libreoffice25.2/program/soffice' >> /etc/environment;
 | 
						||
    
 | 
						||
# 从构建阶段复制已安装的依赖和生成的代码
 | 
						||
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
 | 
						||
COPY --from=builder /usr/local/bin /usr/local/bin
 | 
						||
COPY --from=builder /root/.paddlex/official_models /root/.paddlex/official_models
 | 
						||
COPY --from=builder /app/src /app/src
 | 
						||
 | 
						||
# 安装 Playwright 浏览器
 | 
						||
RUN python -m playwright install webkit
 | 
						||
RUN python -m playwright install-deps webkit
 | 
						||
 | 
						||
# 设置 Python 路径
 | 
						||
ENV PYTHONPATH=/app/src
 | 
						||
RUN cd /app/src && python -m download_deps
 | 
						||
 | 
						||
# 创建supervisor配置
 | 
						||
RUN mkdir -p /etc/supervisor/conf.d
 | 
						||
COPY services/docreader/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 | 
						||
 | 
						||
# 暴露 gRPC 端口
 | 
						||
EXPOSE 50051
 | 
						||
 | 
						||
# 使用supervisor启动服务
 | 
						||
CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] |