Das Objekt "MistralforcausAllm" hat kein Attribut "merge_and_unload".Python

Python-Programme
Anonymous
 Das Objekt "MistralforcausAllm" hat kein Attribut "merge_and_unload".

Post by Anonymous »

Ich habe das Modell OpenChat (ein Mistral 7B -Finetuning) für meine eigenen Daten finationstuniert (oder weiter vorbereitet). Dies hat gut funktioniert und die Inferenz führt zu schönen Ergebnissen. Jetzt möchte ich die Adaptergewichte mit dem Originalmodell zusammenführen, um das Modell in einem weiteren Schritt zu quantisieren. Das Problem ist, dass das Aufruf von Modell.Merge_and_unload () den Fehler erzeugt: < /p>

Code: Select all

 "AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
Gibt es eine Möglichkeit, diese oder eine andere Methode zu beheben, um meine Gewichtsadapter mit dem Originalmodell zusammenzuführen? >
Training < /strong> < /p>

Code: Select all

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})

from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

from peft import LoraConfig, get_peft_model

config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

data_train = ...

import transformers
from trl import SFTTrainer
from transformers import TrainingArguments

tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

trainer.train()

trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
Zusammenführen [/b]

Code: Select all

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from peft import prepare_model_for_kbit_training

from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "pretrained_model"

config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.merge_and_unload()

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post