From faeef3050eb84516b689ee84ae1dc7b592d561af Mon Sep 17 00:00:00 2001 From: renzhiyuan <465386466@qq.com> Date: Tue, 5 Aug 2025 09:33:41 +0800 Subject: [PATCH] 1 --- Dockerfile | 22 +++++++++++++ app.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++ other/large.py | 32 ++++++++++++++++++ other/main.py | 27 +++++++++++++++ require.txt | 32 ++++++++++++++++++ 5 files changed, 202 insertions(+) create mode 100644 Dockerfile create mode 100644 app.py create mode 100644 other/large.py create mode 100644 other/main.py create mode 100644 require.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bc1b8bb --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +# 使用官方 Python 基础镜像 +FROM python:3.8-slim + +# 设置工作目录 +WORKDIR /app + +# 安装系统依赖(如果需要) +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# 复制项目文件 +COPY . /app + +# 安装 Python 依赖 +RUN pip install --no-cache-dir -r requirements.txt + +# 暴露端口 +EXPOSE 5000 + +# 启动命令(使用 Gunicorn 替代 Flask 开发服务器) +#CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"] \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..022b248 --- /dev/null +++ b/app.py @@ -0,0 +1,89 @@ +from flask import Flask, request, jsonify +from transformers import pipeline +import logging + +# 配置日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# 初始化模型 +try: + classifier = pipeline( + "zero-shot-classification", + model="./nlp_structbert_zero-shot-classification_chinese-large", + device="cpu", + max_length=512, + ignore_mismatched_sizes=True + ) + logger.info("模型加载成功") +except Exception as e: + logger.error(f"模型加载失败: {str(e)}") + raise + +# 定义类别标签 +level1 = [ + "食品", "电器", "洗护", "女装", "手机","健康", "男装", "美妆", "电脑", "运动","内衣", "母婴", "数码", "百货", "鞋包","办公", "家装", "饰品", "车品", "图书","生鲜", "家纺", "宠物", "奢品", "其它", "药品" +] + +# 创建Flask应用 +app = Flask(__name__) +app.config['JSON_AS_ASCII'] = False # 确保中文能正常显示 + + +@app.route('/best', methods=['POST']) +def classify_text(): + # 获取请求数据 + data = request.get_json() + if not data or 'text' not in data: + return jsonify({"error": "缺少text参数"}), 400 + if not data or 'cate' not in data: + return jsonify({"error": "缺少cate参数"}), 400 + text = data['text'] + cate = data['cate'] + + try: + # 执行分类 + result = classifier( + text, + candidate_labels=cate, + truncation=True + ) + + labels = result['labels'] + + + # 构建响应 + response = { + "cate": labels[0], + } + return jsonify(response) + + except Exception as e: + logger.error(f"分类过程中出错: {str(e)}") + return jsonify({"error": str(e)}), 500 + + +@app.route('/health') +def health_check(): + """健康检查端点""" + try: + # 简单测试模型是否可用 + test_result = classifier( + "测试文本", + candidate_labels=["食品", "电器"], + truncation=True + ) + return jsonify({ + "status": "healthy", + "model_loaded": True + }) + except: + return jsonify({ + "status": "unhealthy", + "model_loaded": False + }), 500 + + +if __name__ == '__main__': + # 运行应用 + app.run(host='0.0.0.0', port=5000, debug=True) \ No newline at end of file diff --git a/other/large.py b/other/large.py new file mode 100644 index 0000000..ad68a7f --- /dev/null +++ b/other/large.py @@ -0,0 +1,32 @@ +from transformers import pipeline + + +classifier = pipeline( + "zero-shot-classification", + model="./nlp_structbert_zero-shot-classification_chinese-large", + device="cpu", + max_length=512, + ignore_mismatched_sizes=True # 忽略维度不匹配警告 +) +level1 = [ + "食品", "电器", "洗护", "女装", "手机", + "健康", "男装", "美妆", "电脑", "运动", + "内衣", "母婴", "数码", "百货", "鞋包", + "办公", "家装", "饰品", "车品", "图书", + "生鲜", "家纺", "宠物", "奢品", "其它","药品" +] + + +def theBestAndLow(goods): + result = classifier( + goods, + candidate_labels=level1, + truncation=True # 显式指定截断策略 + ) + labels = result['labels'] + scores = result['scores'] + + print("最高分标签:", labels[0], "得分:", scores[0]) + print("最低分标签:", labels[-1], "得分:", scores[-1]) + +theBestAndLow( "宇宙超萌儿童辅食有机果蔬蝴蝶面210g 数量:1盒装:") \ No newline at end of file diff --git a/other/main.py b/other/main.py new file mode 100644 index 0000000..4956df0 --- /dev/null +++ b/other/main.py @@ -0,0 +1,27 @@ +from transformers import pipeline + + +classifier = pipeline( + "zero-shot-classification", + model="./nlp_structbert_zero-shot-classification_chinese-base", + device="cpu", + + max_length=512, + ignore_mismatched_sizes=True # 忽略维度不匹配警告 +) +level1 = [ + "食品", "电器", "洗护", "女装", "手机", + "健康", "男装", "美妆", "电脑", "运动", + "内衣", "母婴", "数码", "百货", "鞋包", + "办公", "家装", "饰品", "车品", "图书", + "生鲜", "家纺", "宠物", "奢品", "其它" +] +result = classifier( + "CONBA/康恩贝森兰康牌铁皮石斛西洋参颗粒 规格:3g/包*60包:", + candidate_labels=level1, + truncation=True # 显式指定截断策略 +) + + + +print(result) \ No newline at end of file diff --git a/require.txt b/require.txt new file mode 100644 index 0000000..a81f214 --- /dev/null +++ b/require.txt @@ -0,0 +1,32 @@ +# 基础依赖 +Flask==3.1.0 +gunicorn==20.0.4 # 生产环境推荐 +transformers==4.26.0 + +# 数据处理 +numpy==2.0.1 +pandas==2.3.1 + +# 网络与请求 +requests==2.32.4 +urllib3==2.5.0 + +# 模型相关 +huggingface-hub==0.34.3 +tokenizers==0.13.3 +safetensors==0.5.3 + +# 工具库 +click==8.1.8 +Jinja2==3.1.6 +MarkupSafe==3.0.2 +itsdangerous==2.2.0 +Werkzeug==3.1.3 +pyyaml==6.0.2 +tqdm==4.67.1 +simplejson==3.20.1 + + +torch==2.7.1 # 如果使用GPU或需要特定版本 +datasets==3.3.2 # 如果需要数据集处理 +pillow==11.3.0 # 如果涉及图像处理 \ No newline at end of file