From 5fc9026222e3b4eb42753f29c1c7c57bd329462a Mon Sep 17 00:00:00 2001 From: renzhiyuan <465386466@qq.com> Date: Wed, 13 Aug 2025 11:45:01 +0800 Subject: [PATCH] 1 --- .idea/.gitignore | 8 ++ .idea/brandSelect.iml | 8 ++ .idea/inspectionProfiles/Project_Default.xml | 14 +++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/misc.xml | 7 ++ .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 ++ Dockerfile | 15 ++++ __pycache__/app.cpython-311.pyc | Bin 0 -> 3759 bytes app.py | 82 ++++++++++++++++++ requirements.txt | 21 +++++ 11 files changed, 175 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/brandSelect.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 Dockerfile create mode 100644 __pycache__/app.cpython-311.pyc create mode 100644 app.py create mode 100644 requirements.txt diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/brandSelect.iml b/.idea/brandSelect.iml new file mode 100644 index 0000000..3b4d670 --- /dev/null +++ b/.idea/brandSelect.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cb0f56f --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..03bec72 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..3692588 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d2ab9ab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.11-slim + +WORKDIR /app + +# 复制 requirements.txt 并优先安装依赖(利用 Docker 层缓存) +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ + +# 复制整个项目 +COPY . . + +EXPOSE 5001 + +# 确保模块名和 Flask 实例名正确(默认是 app:app) +CMD ["uvicorn", "app:app", "--reload", "--host", "0.0.0.0", "--port", "5001"] diff --git a/__pycache__/app.cpython-311.pyc b/__pycache__/app.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6789c958cb80092c4d9998ff02cdc4751a660918 GIT binary patch literal 3759 zcmZ`6ZEO_Bb$0jmzI`7)e*wlACjm?WyFsCVEDEk6V6e?+8l2MA=4LSqcYDX~9$@P( zS%^uDaKNOrc7axI1&#xxrBbCjf;5$yAN_f@PV#9bBunub`&SN`NXUQMH+x?D&ZKiY zZ{EC*d2c@6o4L=6ikt|_hqgPXY61I>bj-reBeUBng!&Oim&hC9qkSZ(MX_%V7Le7*r?r!t?e$LTTi=9qTm_=OAnyt zN_!FT*?c~>VcSFd5Cc+B=cPO}dK+O!H^NSVS;F%Q&Ox6kncw0iwy<09o}_V6w`GZ+ zei)%RU(>{~r_fvM?M6#<1K4r|)%s?LN-d?^_V9|j>)<{>9gVyqv_!BFs-^XkT`@Ic zboNB#U+^)74;^UHiwN~-+c{3+!ve?YPL2yla4bY{ zH^-fg@u7JRqo}g(3x^J5 ztx!LLI=dWJYd@l3o#N;;5OO6!spcvI0aM+COPk!mJf2AdDZ22mG%YzQh1>*L{2c58}_QmlB&0t+50T#Dl2Xj>qs#Q7{@_9?F*QkB7?cUS0~Oe@7mgUWL*hc%f0@Y} zw2GdkZvSy|M4KGB4|310^gg@Q4bqt}`@etEo4NLQ>e`je-CL7i{~qX5 z*RM~1^T@ez`mg`U+!>yHd~b5(YEEf#7V}7@ORp!>8tmq z?%bcc^3P1qqnvO~0KCo3rMf0__ixjGzgJTY3vZDlAwjnVBa$jes&0uXg1qNwEu-7j z`722;DJs8 zs@v!1DCkT`kaUMAMPn)_!p&z@Bjk|jtSDjOg6@2W55Tnw>0zSXb7AikD|;RCx_(*Ck^RZY5LW!8b*wa*djw!R=h zYXZ5Fl>>O-okvZ{z_!uAwp3u-c%We{(D3!KC%+mEG^Ya1+JUDf%Wk%25mRb4K-yWB zWz5b*d1aP0I|f^8$=Xrzs)4g9|LS|M4Te%{8b%JK*8F_5W@oBqXR>A&d?|n9xPRZ6 zf8VHof6BjKYtC93wiuSHula`ergw1t@KN~5!$(s!Z;sb&9jnYxs@fHN$ICtA6?~HR-7u^&Cif z4vc#ak9iI!kNk4f^KQ!XuJ(4;ips0{!=Ht3L~cg3wzREmKup@!!Z%S`l|`&$tpR#= zr@bWuY|jThAEay6_IcC3(mpoJB3~8EeAU{Xwh8yjfe(|ex}>E}A@ALPuiwPj;ad+W1)~>{RrwTiLcx<2~(Iht_G%6x6PmGu2qfXeOSGF;r&<*Z{TWC zsg`4s1PQ_j*c>okY9zyx{1Z{-J%ApBYItNG)6!)5)}(7&(z4CS*R>_O?Ib*``KTz9 zx0h~@c4A&q#h`2pV4Y>#K4*3vp$s1GI3=npkqS9zZ zvhYizs$}7pM%$A=_N9?SGk$4Qo-F((tVLSKMX_7d7&sc9Rhl#HuI*__xod~lkGkJV zx!=+pY5Rtxy)tFrkhXZzzOuBp6k^apKSuyw5TLPH4^5RoNWTIGjz1^V7mF+>qS;R> zE>$F*D@Lix6jhm}H&gY=)j!K3XcxhA2GJ``c2rqr9}OY!fB*mh literal 0 HcmV?d00001 diff --git a/app.py b/app.py new file mode 100644 index 0000000..81f59a7 --- /dev/null +++ b/app.py @@ -0,0 +1,82 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from modelscope import AutoModelForCausalLM, AutoTokenizer +import re +from typing import List + +app = FastAPI() + +# 初始化模型和tokenizer(全局变量,只加载一次) +model_name = "Qwen/Qwen3-0.6B" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype="auto", + device_map="auto", +) + + +class BrandExtractionRequest(BaseModel): + goods: str + brand_list: str # 改为字符串格式,如 "apple,samsung,huawei" + + +@app.post("/extract_brand/") +async def extract_brand(request: BrandExtractionRequest): + goods = request.goods + brand_set = set([brand.strip() for brand in request.brand_list.split(",")]) # 解析逗号分隔的字符串并去空格 + + # 构建prompt + prompt = ( + f"商品名称:{goods}\n" + "-只需要返回一个品牌的名字,去掉多余的描述\n" + f"-请在以下品牌中选择:{brand_set}" + ) + + messages = [ + {"role": "system", + "content": '从商品名称中提取品牌名称,需严格匹配预定义的品牌列表。若未找到匹配品牌,返回 "失败" '}, + {"role": "user", "content": prompt} + ] + + # 生成文本 + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=False + ) + + model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + generated_ids = model.generate( + **model_inputs, + max_new_tokens=32768 + ) + output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() + + # 解析输出 + try: + index = len(output_ids) - output_ids[::-1].index(151668) # 151668是tokenizer中的特殊token + except ValueError: + index = 0 + + content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n") + + # 后处理:确保返回的品牌在brand_set中,否则返回"失败" + extracted_brand = content.strip() + if extracted_brand not in brand_set: + # 尝试在goods中直接查找品牌名(简单匹配) + for brand in brand_set: + if brand in goods: + extracted_brand = brand + break + else: + extracted_brand = "失败" + + return {"extracted_brand": extracted_brand} + + +# 测试用的GET端点(可选) +@app.get("/") +async def root(): + return {"message": "Brand Extraction API is running"} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..307fdef --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +modelscope +filelock +transformers +torch<=2.3 +fastapi +numpy<=1.26.4 +starlette +torch<=2.3 +torchaudio +uvicorn +addict +datasets==2.21.0 +pillow +simplejson +sortedcontainers +loguru +accelerate + + + +pip install accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple/ \ No newline at end of file