From eaa6fc6fe7980e510218b78752abebf012b5ccc1 Mon Sep 17 00:00:00 2001 From: fuzhongyun <15339891972@163.com> Date: Mon, 19 Jan 2026 15:26:23 +0800 Subject: [PATCH] =?UTF-8?q?fix:=201.=20=E5=A2=9E=E5=8A=A0=20ollama=20vl=20?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=85=8D=E7=BD=AE=202.=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E7=AE=80=E6=98=93=E5=90=8E=E5=8F=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 1 + app/config.py | 1 + app/core/ingest.py | 84 +++++++++++++++++++++++---------------- app/static/admin.html | 91 +++++++++++++++++++++++++------------------ 4 files changed, 105 insertions(+), 72 deletions(-) diff --git a/.env.example b/.env.example index bb77f59..e2fdce6 100644 --- a/.env.example +++ b/.env.example @@ -11,6 +11,7 @@ LLM_MODEL=qwen2.5-7b-awq LLM_KEY=EMPTY # vLLM default key # LLM(Vision) Configuration +VL_BINDING=vllm # ollama, vllm, openai VL_BINDING_HOST=http://192.168.6.115:8001/v1 VL_MODEL=qwen2.5-vl-3b-awq VL_KEY=EMPTY diff --git a/app/config.py b/app/config.py index 10ee6ef..0c5faf5 100644 --- a/app/config.py +++ b/app/config.py @@ -18,6 +18,7 @@ class Settings(BaseSettings): LLM_KEY: str = "EMPTY" # vLLM default key # LLM (Vision) - vLLM + VL_BINDING: str = "vllm" # ollama, vllm, openai VL_BINDING_HOST: str = "http://192.168.6.115:8001/v1" VL_MODEL: str = "qwen2.5-vl-3b-awq" VL_KEY: str = "EMPTY" diff --git a/app/core/ingest.py b/app/core/ingest.py index 77a76b5..7c70498 100644 --- a/app/core/ingest.py +++ b/app/core/ingest.py @@ -6,7 +6,8 @@ from app.config import settings async def vl_image_caption_func(image_data: bytes, prompt: str = "请详细描述这张图片") -> str: """ - 使用 VL 模型 (vLLM OpenAI API) 生成图片描述 + 使用 VL 模型生成图片描述 + 支持 ollama 和 openai/vllm 协议 """ if not settings.VL_BINDING_HOST: return "[Image Processing Skipped: No VL Model Configured]" @@ -15,38 +16,53 @@ async def vl_image_caption_func(image_data: bytes, prompt: str = "请详细描 # 1. 编码图片为 Base64 base64_image = base64.b64encode(image_data).decode('utf-8') - # 2. 构造 OpenAI 格式请求 - # vLLM 支持 OpenAI Vision API - url = f"{settings.VL_BINDING_HOST}/chat/completions" - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {settings.VL_KEY}" - } - - payload = { - "model": settings.VL_MODEL, - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{base64_image}" - } - } - ] - } - ], - "max_tokens": 300 - } - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.post(url, headers=headers, json=payload) - response.raise_for_status() - result = response.json() - description = result['choices'][0]['message']['content'] + if settings.VL_BINDING == "ollama": + # Ollama 协议 + url = f"{settings.VL_BINDING_HOST}/api/generate" + payload = { + "model": settings.VL_MODEL, + "prompt": prompt, + "images": [base64_image], + "stream": False + } + response = await client.post(url, json=payload) + response.raise_for_status() + result = response.json() + description = result.get('response', '') + + else: + # OpenAI / vLLM 协议 + url = f"{settings.VL_BINDING_HOST}/chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {settings.VL_KEY}" + } + + payload = { + "model": settings.VL_MODEL, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + } + } + ] + } + ], + "max_tokens": 300 + } + + response = await client.post(url, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + description = result['choices'][0]['message']['content'] + return f"[Image Description: {description}]" except Exception as e: @@ -70,14 +86,14 @@ async def process_pdf_with_images(file_bytes: bytes) -> str: text_content += f"--- Page {page_num + 1} Text ---\n{page_text}\n\n" # 2. 提取图片 - if False and settings.VL_BINDING_HOST: + if settings.VL_BINDING_HOST: for count, image_file_object in enumerate(page.images): try: # 获取图片数据 image_data = image_file_object.data # 简单验证图片有效性 - # Image.open(BytesIO(image_data)).verify() + Image.open(BytesIO(image_data)).verify() # 调用 VL 模型 caption = await vl_image_caption_func(image_data) diff --git a/app/static/admin.html b/app/static/admin.html index 8413afc..9607935 100644 --- a/app/static/admin.html +++ b/app/static/admin.html @@ -38,7 +38,7 @@