llm_train/eval_with_lora.py

29 lines
1.6 KiB
Python

from transformers import AutoModelForCausalLM
from peft import PeftModel, PeftConfig
from config import Config,Default,Dir
from modelscope import snapshot_download, AutoTokenizer
import torch
def load_model_and_tokenizer():
print("🧼 开始加载模型...")
# 在modelscope上下载Qwen模型到本地目录下
model_dir = snapshot_download(Config.MODEL_NAME)
# Transformers加载模型权重
tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_NAME, use_fast=False,
trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto",
torch_dtype=torch.bfloat16)
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法
return model,tokenizer
if __name__ == "__main__":
model, tokenizer = load_model_and_tokenizer()
# 2. 加载 LoRA 适配器配置和权重
peft_model_id = "./lora_adapter" # 训练后保存的路径
peft_config = PeftConfig.from_pretrained(peft_model_id)
# 3. 将 LoRA 权重加载到原始模型(动态注入,不修改原始模型)
model_with_lora = PeftModel.from_pretrained(model, peft_model_id)
# 4. 进行推理(示例:文本生成)
input_text = "医生,我在研究内耳的前庭部分时,发现了一些特殊的结构,比如前庭嵴。请问前庭内还有哪些特殊的结构,它们的作用是什么?"
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))