from transformers import AutoModelForCausalLM from peft import PeftModel, PeftConfig from config import Config,Default,Dir from modelscope import snapshot_download, AutoTokenizer import torch def load_model_and_tokenizer(): print("🧼 开始加载模型...") # 在modelscope上下载Qwen模型到本地目录下 model_dir = snapshot_download(Config.MODEL_NAME) # Transformers加载模型权重 tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_NAME, use_fast=False, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", torch_dtype=torch.bfloat16) model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法 return model,tokenizer if __name__ == "__main__": model, tokenizer = load_model_and_tokenizer() # 2. 加载 LoRA 适配器配置和权重 peft_model_id = "./lora_adapter" # 训练后保存的路径 peft_config = PeftConfig.from_pretrained(peft_model_id) # 3. 将 LoRA 权重加载到原始模型(动态注入,不修改原始模型) model_with_lora = PeftModel.from_pretrained(model, peft_model_id) # 4. 进行推理(示例:文本生成) input_text = "医生,我在研究内耳的前庭部分时,发现了一些特殊的结构,比如前庭嵴。请问前庭内还有哪些特殊的结构,它们的作用是什么?" inputs = tokenizer(input_text, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=100) print(tokenizer.decode(outputs[0], skip_special_tokens=True))