29 lines
1.6 KiB
Python
29 lines
1.6 KiB
Python
from transformers import AutoModelForCausalLM
|
|
from peft import PeftModel, PeftConfig
|
|
from config import Config,Default,Dir
|
|
from modelscope import snapshot_download, AutoTokenizer
|
|
import torch
|
|
def load_model_and_tokenizer():
|
|
print("🧼 开始加载模型...")
|
|
# 在modelscope上下载Qwen模型到本地目录下
|
|
model_dir = snapshot_download(Config.MODEL_NAME)
|
|
# Transformers加载模型权重
|
|
tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_NAME, use_fast=False,
|
|
trust_remote_code=True)
|
|
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto",
|
|
torch_dtype=torch.bfloat16)
|
|
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法
|
|
return model,tokenizer
|
|
|
|
if __name__ == "__main__":
|
|
model, tokenizer = load_model_and_tokenizer()
|
|
# 2. 加载 LoRA 适配器配置和权重
|
|
peft_model_id = "./lora_adapter" # 训练后保存的路径
|
|
peft_config = PeftConfig.from_pretrained(peft_model_id)
|
|
# 3. 将 LoRA 权重加载到原始模型(动态注入,不修改原始模型)
|
|
model_with_lora = PeftModel.from_pretrained(model, peft_model_id)
|
|
# 4. 进行推理(示例:文本生成)
|
|
input_text = "医生,我在研究内耳的前庭部分时,发现了一些特殊的结构,比如前庭嵴。请问前庭内还有哪些特殊的结构,它们的作用是什么?"
|
|
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
|
|
outputs = model.generate(**inputs, max_new_tokens=100)
|
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) |