commit 5fc9026222e3b4eb42753f29c1c7c57bd329462a Author: renzhiyuan <465386466@qq.com> Date: Wed Aug 13 11:45:01 2025 +0800 1 diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/brandSelect.iml b/.idea/brandSelect.iml new file mode 100644 index 0000000..3b4d670 --- /dev/null +++ b/.idea/brandSelect.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cb0f56f --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..03bec72 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..3692588 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d2ab9ab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 复制 requirements.txt 并优先安装依赖(利用 Docker 层缓存) +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ + +# 复制整个项目 +COPY . . + +EXPOSE 5001 + +# 确保模块名和 Flask 实例名正确(默认是 app:app) +CMD ["uvicorn", "app:app", "--reload", "--host", "0.0.0.0", "--port", "5001"] diff --git a/__pycache__/app.cpython-311.pyc b/__pycache__/app.cpython-311.pyc new file mode 100644 index 0000000..6789c95 Binary files /dev/null and b/__pycache__/app.cpython-311.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000..81f59a7 --- /dev/null +++ b/app.py @@ -0,0 +1,82 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from modelscope import AutoModelForCausalLM, AutoTokenizer +import re +from typing import List + +app = FastAPI() + +# 初始化模型和tokenizer(全局变量,只加载一次) +model_name = "Qwen/Qwen3-0.6B" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype="auto", + device_map="auto", +) + + +class BrandExtractionRequest(BaseModel): + goods: str + brand_list: str # 改为字符串格式,如 "apple,samsung,huawei" + + +@app.post("/extract_brand/") +async def extract_brand(request: BrandExtractionRequest): + goods = request.goods + brand_set = set([brand.strip() for brand in request.brand_list.split(",")]) # 解析逗号分隔的字符串并去空格 + + # 构建prompt + prompt = ( + f"商品名称:{goods}\n" + "-只需要返回一个品牌的名字,去掉多余的描述\n" + f"-请在以下品牌中选择:{brand_set}" + ) + + messages = [ + {"role": "system", + "content": '从商品名称中提取品牌名称,需严格匹配预定义的品牌列表。若未找到匹配品牌,返回 "失败" '}, + {"role": "user", "content": prompt} + ] + + # 生成文本 + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=False + ) + + model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + generated_ids = model.generate( + **model_inputs, + max_new_tokens=32768 + ) + output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() + + # 解析输出 + try: + index = len(output_ids) - output_ids[::-1].index(151668) # 151668是tokenizer中的特殊token + except ValueError: + index = 0 + + content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") + + # 后处理:确保返回的品牌在brand_set中,否则返回"失败" + extracted_brand = content.strip() + if extracted_brand not in brand_set: + # 尝试在goods中直接查找品牌名(简单匹配) + for brand in brand_set: + if brand in goods: + extracted_brand = brand + break + else: + extracted_brand = "失败" + + return {"extracted_brand": extracted_brand} + + +# 测试用的GET端点(可选) +@app.get("/") +async def root(): + return {"message": "Brand Extraction API is running"} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..307fdef --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +modelscope +filelock +transformers +torch<=2.3 +fastapi +numpy<=1.26.4 +starlette +torch<=2.3 +torchaudio +uvicorn +addict +datasets==2.21.0 +pillow +simplejson +sortedcontainers +loguru +accelerate + + + +pip install accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple/ \ No newline at end of file