commit 5fc9026222e3b4eb42753f29c1c7c57bd329462a
Author: renzhiyuan <465386466@qq.com>
Date: Wed Aug 13 11:45:01 2025 +0800
1
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/brandSelect.iml b/.idea/brandSelect.iml
new file mode 100644
index 0000000..3b4d670
--- /dev/null
+++ b/.idea/brandSelect.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..cb0f56f
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..03bec72
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..3692588
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..d2ab9ab
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# 复制 requirements.txt 并优先安装依赖(利用 Docker 层缓存)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
+
+# 复制整个项目
+COPY . .
+
+EXPOSE 5001
+
+# 确保模块名和 Flask 实例名正确(默认是 app:app)
+CMD ["uvicorn", "app:app", "--reload", "--host", "0.0.0.0", "--port", "5001"]
diff --git a/__pycache__/app.cpython-311.pyc b/__pycache__/app.cpython-311.pyc
new file mode 100644
index 0000000..6789c95
Binary files /dev/null and b/__pycache__/app.cpython-311.pyc differ
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..81f59a7
--- /dev/null
+++ b/app.py
@@ -0,0 +1,82 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from modelscope import AutoModelForCausalLM, AutoTokenizer
+import re
+from typing import List
+
+app = FastAPI()
+
+# 初始化模型和tokenizer(全局变量,只加载一次)
+model_name = "Qwen/Qwen3-0.6B"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+ model_name,
+ torch_dtype="auto",
+ device_map="auto",
+)
+
+
+class BrandExtractionRequest(BaseModel):
+ goods: str
+ brand_list: str # 改为字符串格式,如 "apple,samsung,huawei"
+
+
+@app.post("/extract_brand/")
+async def extract_brand(request: BrandExtractionRequest):
+ goods = request.goods
+ brand_set = set([brand.strip() for brand in request.brand_list.split(",")]) # 解析逗号分隔的字符串并去空格
+
+ # 构建prompt
+ prompt = (
+ f"商品名称:{goods}\n"
+ "-只需要返回一个品牌的名字,去掉多余的描述\n"
+ f"-请在以下品牌中选择:{brand_set}"
+ )
+
+ messages = [
+ {"role": "system",
+ "content": '从商品名称中提取品牌名称,需严格匹配预定义的品牌列表。若未找到匹配品牌,返回 "失败" '},
+ {"role": "user", "content": prompt}
+ ]
+
+ # 生成文本
+ text = tokenizer.apply_chat_template(
+ messages,
+ tokenize=False,
+ add_generation_prompt=True,
+ enable_thinking=False
+ )
+
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+ generated_ids = model.generate(
+ **model_inputs,
+ max_new_tokens=32768
+ )
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+
+ # 解析输出
+ try:
+ index = len(output_ids) - output_ids[::-1].index(151668) # 151668是tokenizer中的特殊token
+ except ValueError:
+ index = 0
+
+ content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
+
+ # 后处理:确保返回的品牌在brand_set中,否则返回"失败"
+ extracted_brand = content.strip()
+ if extracted_brand not in brand_set:
+ # 尝试在goods中直接查找品牌名(简单匹配)
+ for brand in brand_set:
+ if brand in goods:
+ extracted_brand = brand
+ break
+ else:
+ extracted_brand = "失败"
+
+ return {"extracted_brand": extracted_brand}
+
+
+# 测试用的GET端点(可选)
+@app.get("/")
+async def root():
+ return {"message": "Brand Extraction API is running"}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..307fdef
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+modelscope
+filelock
+transformers
+torch<=2.3
+fastapi
+numpy<=1.26.4
+starlette
+torch<=2.3
+torchaudio
+uvicorn
+addict
+datasets==2.21.0
+pillow
+simplejson
+sortedcontainers
+loguru
+accelerate
+
+
+
+pip install accelerate -i https://pypi.tuna.tsinghua.edu.cn/simple/
\ No newline at end of file