commit 80d3ffa0e704764e9715365d0662f5e8a2458397 Author: fuzhongyun <15339891972@163.com> Date: Mon Dec 29 16:36:36 2025 +0800 init diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..b40f5dc --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Python-generated files +__pycache__/ +*.py[oc] +.pytest* +build/ +dist/ +wheels/ +*.egg-info +*.png + +# Virtual environments +.venv diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a7a1263 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# 使用官方轻量级 Python 镜像 +FROM python:3.12-slim + +# 设置工作目录 +WORKDIR /app + +# 安装依赖 +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 复制源码和字体文件 +# 注意:我们将当前目录的所有内容复制到 /app +COPY . . + +# 确保 assets 目录存在 (以防本地没有创建但有字体文件散落在根目录的情况,虽然我们在 deploy 前已经整理了) +# 设置字体路径环境变量(可选,如果在代码中使用了默认值) +ENV FONT_PATH_REGULAR=/app/assets/simsun.ttc +ENV FONT_PATH_BOLD=/app/assets/simhei.ttf + +# 暴露端口 +EXPOSE 8000 + +# 启动命令 +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md new file mode 100755 index 0000000..f8cb672 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +# Excel2Pic API Service + +这是一个轻量级的 Excel 转图片 API 服务,专为插件化部署设计。 +它使用 FastAPI 构建,底层基于 `openpyxl` 和 `Pillow` (PIL) 实现高效的表格渲染,无需依赖任何 Office 组件或浏览器环境。 + +## 特性 + +* 🚀 **高性能**:基于 FastAPI,原生异步支持。 +* 📦 **极轻量**:Docker 镜像体积 < 200MB。 +* 🎨 **纯 Python 实现**:无需 LibreOffice 或 Headless Chrome。 +* 🔠 **中文支持**:内置宋体和黑体支持,解决中文乱码问题。 +* 🐳 **一键部署**:提供 Dockerfile 和一键部署脚本。 + +## 目录结构 + +``` +excel2pic/ +├── app.py # FastAPI 应用入口 +├── core/ # 核心逻辑模块 +│ ├── renderer.py # 渲染引擎 (ExcelRenderer) +│ └── __init__.py +├── assets/ # 静态资源 (字体文件) +│ ├── simsun.ttc # 宋体 +│ └── simhei.ttf # 黑体 +├── tests/ # 单元测试 +├── deploy.sh # 一键部署脚本 +├── Dockerfile # Docker 构建文件 +└── requirements.txt # 项目依赖 +``` + +## 快速开始 + +### 1. 本地运行 + +**前置条件**: Python 3.12+ + +```bash +# 1. 创建并激活虚拟环境 +python -m venv .venv +source .venv/bin/activate # Linux/Mac +# .venv\Scripts\activate # Windows + +# 2. 安装依赖 +pip install -r requirements.txt + +# 3. 启动服务 +uvicorn app:app --reload +``` + +### 2. Docker 部署 + +```bash +# 赋予执行权限 +chmod +x deploy.sh + +# 一键构建并启动 +./deploy.sh +``` + +## API 文档 + +启动服务后,访问 `http://localhost:8000/docs` 查看交互式 Swagger 文档。 + +### 核心接口 + +**POST** `/api/v1/convert` + +将上传的 Excel 文件转换为 PNG 图片。 + +* **参数**: + * `file`: (Required) Excel 文件 (.xlsx / .xls) + * `sheet_name`: (Optional) 指定要转换的工作表名称,默认为活动工作表。 + +* **响应**: + * `Content-Type`: `image/png` (直接返回图片二进制流) + +**示例调用 (cURL)**: + +```bash +curl -X POST "http://localhost:8000/api/v1/convert" \ + -F "file=@/path/to/data.xlsx" \ + -F "sheet_name=Sheet1" \ + --output result.png +``` + +## 维护指南 + +### 添加新字体 +1. 将 `.ttf` 或 `.ttc` 文件放入 `assets/` 目录。 +2. 修改 `core/renderer.py` 中的 `__init__` 默认参数,或在初始化 `ExcelRenderer` 时传入路径。 +3. 重新构建 Docker 镜像。 + +### 运行测试 +```bash +export PYTHONPATH=$PYTHONPATH:. +pytest tests/ +``` diff --git a/app.py b/app.py new file mode 100644 index 0000000..5d764c5 --- /dev/null +++ b/app.py @@ -0,0 +1,62 @@ +from fastapi import FastAPI, UploadFile, File, Form, HTTPException +from fastapi.responses import StreamingResponse, JSONResponse +from core.renderer import ExcelRenderer +import io +import logging + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +app = FastAPI( + title="Excel2Pic API", + description="A lightweight service to convert Excel sheets to images.", + version="0.1.0" +) + +@app.post("/api/v1/convert", summary="Convert Excel to Image") +async def convert_excel( + file: UploadFile = File(..., description="The Excel file to convert"), + sheet_name: str = Form(None, description="Name of the sheet to convert (optional, defaults to active sheet)"), +): + """ + Convert an uploaded Excel file to a PNG image. + """ + # Validation + if not file.filename.endswith(('.xlsx', '.xls')): + raise HTTPException(status_code=400, detail="Invalid file format. Please upload .xlsx or .xls file.") + + try: + # Read file content + contents = await file.read() + + # Initialize Renderer + # Note: In a real deployment, font paths might come from env vars + renderer = ExcelRenderer(contents) + + # Render + image_bytes = renderer.render_to_bytes(sheet_name=sheet_name) + + # Return as streaming response + # Handle Chinese filenames in Content-Disposition + from urllib.parse import quote + filename = file.filename.split('.')[0] + ".png" + encoded_filename = quote(filename) + + return StreamingResponse( + io.BytesIO(image_bytes), + media_type="image/png", + headers={"Content-Disposition": f"inline; filename*=utf-8''{encoded_filename}"} + ) + + except ValueError as ve: + # Often raised when sheet name is not found + logger.warning(f"Value Error: {str(ve)}") + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + logger.error(f"Internal Server Error: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"An error occurred during conversion: {str(e)}") + +@app.get("/health", summary="Health Check") +def health_check(): + return {"status": "ok"} diff --git a/assets/simhei.ttf b/assets/simhei.ttf new file mode 100755 index 0000000..c5030ae Binary files /dev/null and b/assets/simhei.ttf differ diff --git a/assets/simsun.ttc b/assets/simsun.ttc new file mode 100755 index 0000000..23d5c4a Binary files /dev/null and b/assets/simsun.ttc differ diff --git a/assets/simsun.ttf b/assets/simsun.ttf new file mode 100755 index 0000000..e0115ab Binary files /dev/null and b/assets/simsun.ttf differ diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/renderer.py b/core/renderer.py new file mode 100644 index 0000000..05388f7 --- /dev/null +++ b/core/renderer.py @@ -0,0 +1,210 @@ +import io +import warnings +from typing import Optional, Union, Tuple, BinaryIO + +from openpyxl import load_workbook +from openpyxl.worksheet.worksheet import Worksheet +from PIL import Image, ImageDraw, ImageFont + +# Suppress warnings +warnings.filterwarnings('ignore') + +class ExcelRenderer: + def __init__(self, file_content: Union[bytes, BinaryIO], font_path_regular: str = "assets/simsun.ttc", font_path_bold: str = "assets/simhei.ttf"): + """ + Initialize the renderer with Excel file content and font paths. + """ + self.file_content = file_content if isinstance(file_content, io.BytesIO) else io.BytesIO(file_content) + self.font_path_regular = font_path_regular + self.font_path_bold = font_path_bold + self._load_fonts() + + def _load_fonts(self): + """ + Load fonts with fallback mechanisms. + """ + try: + self.font_regular = ImageFont.truetype(self.font_path_regular, 12) + except OSError: + # Fallback to default if custom font not found + try: + self.font_regular = ImageFont.truetype("arial.ttf", 12) + except OSError: + self.font_regular = ImageFont.load_default() + + try: + self.font_bold = ImageFont.truetype(self.font_path_bold, 12) + except OSError: + try: + self.font_bold = ImageFont.truetype("arialbd.ttf", 12) + except OSError: + self.font_bold = ImageFont.load_default() + + def render_to_bytes(self, sheet_name: Optional[str] = None, dpi: int = 200, padding: int = 20) -> bytes: + """ + Render the specified sheet to a PNG image and return bytes. + """ + img = self._render_image(sheet_name, padding) + + output = io.BytesIO() + img.save(output, format='PNG', dpi=(dpi, dpi)) + output.seek(0) + return output.getvalue() + + def _render_image(self, sheet_name: Optional[str], padding: int) -> Image.Image: + """ + Internal method to draw the Excel sheet onto a PIL Image. + """ + wb = load_workbook(self.file_content, data_only=True) + + if sheet_name is None: + sheet = wb.active + else: + if sheet_name in wb.sheetnames: + sheet = wb[sheet_name] + else: + raise ValueError(f"Sheet '{sheet_name}' not found. Available sheets: {wb.sheetnames}") + + return self._draw_sheet(sheet, padding) + + def _draw_sheet(self, sheet: Worksheet, padding: int) -> Image.Image: + cell_height = 40 # Default cell height + + max_row = sheet.max_row + max_col = sheet.max_column + + # Calculate column widths and image dimensions + col_widths_pixels = [] + img_width = 2 * padding + + for col in range(1, max_col + 1): + col_letter = sheet.cell(row=1, column=col).column_letter + # Get column width (approximate conversion) + col_dim = sheet.column_dimensions[col_letter] + col_width_excel = col_dim.width if col_dim.width else 10 + + # Excel width to pixels (approximate factor ~7 + padding) + width_px = int(col_width_excel * 7) + 5 + col_widths_pixels.append(width_px) + img_width += width_px + + img_height = max_row * cell_height + 2 * padding + + # Create image + img = Image.new('RGB', (img_width, img_height), color='white') + draw = ImageDraw.Draw(img) + + # Pre-calculate column x-positions + col_x_positions = [padding] + current_x = padding + for width in col_widths_pixels: + current_x += width + col_x_positions.append(current_x) + + # Draw cells + for row in range(1, max_row + 1): + for col in range(1, max_col + 1): + cell = sheet.cell(row=row, column=col) + + x1 = col_x_positions[col - 1] + y1 = padding + (row - 1) * cell_height + x2 = col_x_positions[col] + y2 = y1 + cell_height + + self._draw_cell(draw, cell, x1, y1, x2, y2) + + return img + + def _draw_cell(self, draw: ImageDraw.ImageDraw, cell, x1, y1, x2, y2): + # Background color + fill_color = cell.fill.start_color.rgb + bg_color = self._parse_color(fill_color, default=(255, 255, 255)) + + # Draw background and border + draw.rectangle([x1, y1, x2, y2], fill=bg_color, outline=(200, 200, 200)) + + # Content + cell_value = cell.value + if cell_value is None: + return + + text = self._format_cell_value(cell, cell_value) + if not text: + return + + # Font handling + is_bold = cell.font and cell.font.bold + current_font = self.font_bold if is_bold else self.font_regular + + # Font color + font_color_hex = cell.font.color.rgb if (cell.font and cell.font.color) else None + text_color = self._parse_color(font_color_hex, default=(0, 0, 0)) + + # Alignment + h_align = cell.alignment.horizontal if (cell.alignment and cell.alignment.horizontal) else 'left' + v_align = cell.alignment.vertical if (cell.alignment and cell.alignment.vertical) else 'center' + + # Text rendering with simple truncation + self._draw_text(draw, text, x1, y1, x2, y2, current_font, text_color, h_align, v_align) + + def _parse_color(self, color_code, default=(0, 0, 0)) -> Tuple[int, int, int]: + if not color_code or color_code == '00000000' or not isinstance(color_code, str): + return default + + # Handle ARGB (Excel often uses this) + if len(color_code) > 6: + # Strip alpha if present (usually first 2 chars for ARGB) + # Example: FF000000 -> 000000 (Black), FFFFFFFF -> FFFFFF (White) + # Note: This is a simplification. + if color_code.startswith('FF') or len(color_code) == 8: + color_code = color_code[2:] + + try: + return tuple(int(color_code[i:i + 2], 16) for i in (0, 2, 4)) + except: + return default + + def _format_cell_value(self, cell, value) -> str: + if isinstance(value, (int, float)): + # Simple number formatting + if cell.number_format: + if '0.00' in str(cell.number_format): + return format(value, '.2f') + elif '0.0' in str(cell.number_format): + return format(value, '.1f') + return str(value) + return str(value) + + def _draw_text(self, draw, text, x1, y1, x2, y2, font, color, h_align, v_align): + # Calculate available width + max_width = x2 - x1 - 10 + text_width = draw.textlength(text, font=font) + + # Simple truncation if too long + if text_width > max_width and len(text) > 3: + # Estimate chars that fit + char_ratio = max_width / text_width + keep_chars = int(len(text) * char_ratio) - 2 + if keep_chars > 0: + text = text[:keep_chars] + "..." + text_width = draw.textlength(text, font=font) # Re-measure + + # Horizontal Position + if h_align == 'center': + text_x = x1 + (x2 - x1 - text_width) / 2 + elif h_align == 'right': + text_x = x2 - text_width - 5 + else: # left + text_x = x1 + 5 + + # Vertical Position (Approximate, using fixed height) + # Assuming font size 12 approx height 12-15 pixels + font_height = 12 + if v_align == 'top': + text_y = y1 + 5 + elif v_align == 'bottom': + text_y = y2 - font_height - 5 + else: # center + text_y = y1 + (y2 - y1 - font_height) / 2 + + draw.text((text_x, text_y), text, fill=color, font=font) diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..17e1d04 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e + +# 定义变量 +IMAGE_NAME="excel2pic" +CONTAINER_NAME="excel2pic_container" +PORT=8000 + +echo "开始部署 $IMAGE_NAME ..." + +# 1. 构建 Docker 镜像 +echo "构建 Docker 镜像..." +docker build -t $IMAGE_NAME:latest . + +# 2. 检查并停止旧容器 +if [ "$(docker ps -aq -f name=$CONTAINER_NAME)" ]; then + echo "停止并移除旧容器..." + docker stop $CONTAINER_NAME || true + docker rm $CONTAINER_NAME || true +fi + +# 3. 启动新容器 +echo "启动新容器..." +docker run -d \ + --name $CONTAINER_NAME \ + -p $PORT:8000 \ + $IMAGE_NAME:latest + +# 4. 验证部署 +echo "等待服务启动..." +sleep 2 +if docker ps | grep -q $CONTAINER_NAME; then + echo "部署成功!" + echo "API 文档地址: http://localhost:$PORT/docs" + echo "测试转换接口: POST http://localhost:$PORT/api/v1/convert" +else + echo "部署失败,容器未运行。" + docker logs $CONTAINER_NAME + exit 1 +fi diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..290158b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +fastapi +uvicorn +python-multipart +pandas +openpyxl +pillow +pytest +httpx diff --git a/tests/data.xlsx b/tests/data.xlsx new file mode 100755 index 0000000..7635079 Binary files /dev/null and b/tests/data.xlsx differ diff --git a/tests/data1.xlsx b/tests/data1.xlsx new file mode 100755 index 0000000..9d7b32e Binary files /dev/null and b/tests/data1.xlsx differ diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..6492c9a --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,60 @@ +import pytest +from fastapi.testclient import TestClient +from app import app +from openpyxl import Workbook +import io + +client = TestClient(app) + +@pytest.fixture +def sample_excel_file(): + wb = Workbook() + ws = wb.active + ws.title = "APITest" + ws['A1'] = "API" + + out = io.BytesIO() + wb.save(out) + out.seek(0) + return out + +def test_health_check(): + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + +def test_convert_endpoint(sample_excel_file): + files = {'file': ('test.xlsx', sample_excel_file, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')} + response = client.post("/api/v1/convert", files=files) + + assert response.status_code == 200 + assert response.headers["content-type"] == "image/png" + assert len(response.content) > 0 + +def test_convert_invalid_file_type(): + files = {'file': ('test.txt', io.BytesIO(b"dummy"), 'text/plain')} + response = client.post("/api/v1/convert", files=files) + + assert response.status_code == 400 + assert "Invalid file format" in response.json()["detail"] + +def test_convert_specific_sheet(sample_excel_file): + # Re-create file because previous read might have consumed it if not handled carefully (TestClient usually handles this) + # But let's be safe and use the fixture which returns a new BytesIO if we construct it that way. + # Actually the fixture returns the same object, let's seek 0 just in case. + sample_excel_file.seek(0) + + files = {'file': ('test.xlsx', sample_excel_file, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')} + data = {'sheet_name': 'APITest'} + response = client.post("/api/v1/convert", files=files, data=data) + + assert response.status_code == 200 + +def test_convert_missing_sheet(sample_excel_file): + sample_excel_file.seek(0) + files = {'file': ('test.xlsx', sample_excel_file, 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')} + data = {'sheet_name': 'MissingSheet'} + response = client.post("/api/v1/convert", files=files, data=data) + + assert response.status_code == 400 + assert "Sheet 'MissingSheet' not found" in response.json()["detail"] diff --git a/tests/test_renderer.py b/tests/test_renderer.py new file mode 100644 index 0000000..53ceca1 --- /dev/null +++ b/tests/test_renderer.py @@ -0,0 +1,47 @@ +import pytest +from openpyxl import Workbook +import io +from core.renderer import ExcelRenderer +from PIL import Image + +@pytest.fixture +def sample_excel_bytes(): + wb = Workbook() + ws = wb.active + ws.title = "TestSheet" + ws['A1'] = "Hello" + ws['B1'] = "World" + ws['A2'] = 123 + ws['B2'] = 456.78 + + # Add some color + from openpyxl.styles import PatternFill + fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid") + ws['A1'].fill = fill + + out = io.BytesIO() + wb.save(out) + out.seek(0) + return out.getvalue() + +def test_renderer_initialization(sample_excel_bytes): + renderer = ExcelRenderer(sample_excel_bytes) + assert renderer is not None + +def test_render_to_bytes(sample_excel_bytes): + renderer = ExcelRenderer(sample_excel_bytes) + img_bytes = renderer.render_to_bytes(sheet_name="TestSheet") + + assert isinstance(img_bytes, bytes) + assert len(img_bytes) > 0 + + # Verify it's a valid image + img = Image.open(io.BytesIO(img_bytes)) + assert img.format == "PNG" + assert img.width > 0 + assert img.height > 0 + +def test_render_invalid_sheet(sample_excel_bytes): + renderer = ExcelRenderer(sample_excel_bytes) + with pytest.raises(ValueError, match="Sheet 'NonExistent' not found"): + renderer.render_to_bytes(sheet_name="NonExistent")