1

2025-08-05 09:33:41 +08:00 · 2025-08-05 09:33:41 +08:00 · faeef3050e
commit faeef3050e
5 changed files with 202 additions and 0 deletions
--- a/22
+++ b/22
@ -0,0 +1,22 @@
+# 使用官方 Python 基础镜像
+FROM python:3.8-slim
+
+# 设置工作目录
+WORKDIR /app
+
+# 安装系统依赖（如果需要）
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# 复制项目文件
+COPY . /app
+
+# 安装 Python 依赖
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 暴露端口
+EXPOSE 5000
+
+# 启动命令（使用 Gunicorn 替代 Flask 开发服务器）
+#CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"]
--- a/app.py
+++ b/app.py
@ -0,0 +1,89 @@
+from flask import Flask, request, jsonify
+from transformers import pipeline
+import logging
+
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# 初始化模型
+try:
+    classifier = pipeline(
+        "zero-shot-classification",
+        model="./nlp_structbert_zero-shot-classification_chinese-large",
+        device="cpu",
+        max_length=512,
+        ignore_mismatched_sizes=True
+    )
+    logger.info("模型加载成功")
+except Exception as e:
+    logger.error(f"模型加载失败: {str(e)}")
+    raise
+
+# 定义类别标签
+level1 = [
+    "食品", "电器", "洗护", "女装", "手机","健康", "男装", "美妆", "电脑", "运动","内衣", "母婴", "数码", "百货", "鞋包","办公", "家装", "饰品", "车品", "图书","生鲜", "家纺", "宠物", "奢品", "其它", "药品"
+]
+
+# 创建Flask应用
+app = Flask(__name__)
+app.config['JSON_AS_ASCII'] = False  # 确保中文能正常显示
+
+
+@app.route('/best', methods=['POST'])
+def classify_text():
+    # 获取请求数据
+    data = request.get_json()
+    if not data or 'text' not in data:
+        return jsonify({"error": "缺少text参数"}), 400
+    if not data or 'cate' not in data:
+        return jsonify({"error": "缺少cate参数"}), 400
+    text = data['text']
+    cate = data['cate']
+
+    try:
+        # 执行分类
+        result = classifier(
+            text,
+            candidate_labels=cate,
+            truncation=True
+        )
+
+        labels = result['labels']
+
+
+        # 构建响应
+        response = {
+            "cate": labels[0],
+        }
+        return jsonify(response)
+
+    except Exception as e:
+        logger.error(f"分类过程中出错: {str(e)}")
+        return jsonify({"error": str(e)}), 500
+
+
+@app.route('/health')
+def health_check():
+    """健康检查端点"""
+    try:
+        # 简单测试模型是否可用
+        test_result = classifier(
+            "测试文本",
+            candidate_labels=["食品", "电器"],
+            truncation=True
+        )
+        return jsonify({
+            "status": "healthy",
+            "model_loaded": True
+        })
+    except:
+        return jsonify({
+            "status": "unhealthy",
+            "model_loaded": False
+        }), 500
+
+
+if __name__ == '__main__':
+    # 运行应用
+    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/other/large.py
+++ b/other/large.py
@ -0,0 +1,32 @@
+from transformers import pipeline
+
+
+classifier = pipeline(
+    "zero-shot-classification",
+    model="./nlp_structbert_zero-shot-classification_chinese-large",
+    device="cpu",
+    max_length=512,
+    ignore_mismatched_sizes=True  # 忽略维度不匹配警告
+)
+level1 = [
+    "食品", "电器", "洗护", "女装", "手机",
+    "健康", "男装", "美妆", "电脑", "运动",
+    "内衣", "母婴", "数码", "百货", "鞋包",
+    "办公", "家装", "饰品", "车品", "图书",
+    "生鲜", "家纺", "宠物", "奢品", "其它","药品"
+]
+
+
+def theBestAndLow(goods):
+    result = classifier(
+        goods,
+        candidate_labels=level1,
+        truncation=True  # 显式指定截断策略
+    )
+    labels = result['labels']
+    scores = result['scores']
+
+    print("最高分标签:", labels[0], "得分:", scores[0])
+    print("最低分标签:", labels[-1], "得分:", scores[-1])
+
+theBestAndLow( "宇宙超萌儿童辅食有机果蔬蝴蝶面210g 数量:1盒装:")
--- a/other/main.py
+++ b/other/main.py
@ -0,0 +1,27 @@
+from transformers import pipeline
+
+
+classifier = pipeline(
+    "zero-shot-classification",
+    model="./nlp_structbert_zero-shot-classification_chinese-base",
+    device="cpu",
+
+    max_length=512,
+    ignore_mismatched_sizes=True  # 忽略维度不匹配警告
+)
+level1 = [
+    "食品", "电器", "洗护", "女装", "手机",
+    "健康", "男装", "美妆", "电脑", "运动",
+    "内衣", "母婴", "数码", "百货", "鞋包",
+    "办公", "家装", "饰品", "车品", "图书",
+    "生鲜", "家纺", "宠物", "奢品", "其它"
+]
+result = classifier(
+    "CONBA/康恩贝森兰康牌铁皮石斛西洋参颗粒 规格:3g/包*60包:",
+    candidate_labels=level1,
+    truncation=True  # 显式指定截断策略
+)
+
+
+
+print(result)
--- a/require.txt
+++ b/require.txt
@ -0,0 +1,32 @@
+# 基础依赖
+Flask==3.1.0
+gunicorn==20.0.4  # 生产环境推荐
+transformers==4.26.0
+
+# 数据处理
+numpy==2.0.1
+pandas==2.3.1
+
+# 网络与请求
+requests==2.32.4
+urllib3==2.5.0
+
+# 模型相关
+huggingface-hub==0.34.3
+tokenizers==0.13.3
+safetensors==0.5.3
+
+# 工具库
+click==8.1.8
+Jinja2==3.1.6
+MarkupSafe==3.0.2
+itsdangerous==2.2.0
+Werkzeug==3.1.3
+pyyaml==6.0.2
+tqdm==4.67.1
+simplejson==3.20.1
+
+
+torch==2.7.1  # 如果使用GPU或需要特定版本
+datasets==3.3.2  # 如果需要数据集处理
+pillow==11.3.0  # 如果涉及图像处理