Das Objekt "MistralforcausAllm" hat kein Attribut "merge_and_unload".

Post a reply

Smilies
:) :( :oops: :chelo: :roll: :wink: :muza: :sorry: :angel: :read: *x) :clever:
View more smilies

BBCode is ON
[img] is ON
[flash] is OFF
[url] is ON
Smilies are ON

Topic review
   

Expand view Topic review: Das Objekt "MistralforcausAllm" hat kein Attribut "merge_and_unload".

by Anonymous » 06 Feb 2025, 08:24

Ich habe das Modell OpenChat (ein Mistral 7B -Finetuning) für meine eigenen Daten finationstuniert (oder weiter vorbereitet). Dies hat gut funktioniert und die Inferenz führt zu schönen Ergebnissen. Jetzt möchte ich die Adaptergewichte mit dem Originalmodell zusammenführen, um das Modell in einem weiteren Schritt zu quantisieren. Das Problem ist, dass das Aufruf von Modell.Merge_and_unload () den Fehler erzeugt: < /p>

Code: Select all

 "AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
Gibt es eine Möglichkeit, diese oder eine andere Methode zu beheben, um meine Gewichtsadapter mit dem Originalmodell zusammenzuführen? >
Training < /strong> < /p>

Code: Select all

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})

from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

from peft import LoraConfig, get_peft_model

config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

data_train = ...

import transformers
from trl import SFTTrainer
from transformers import TrainingArguments

tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

trainer.train()

trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
Zusammenführen [/b]

Code: Select all

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from peft import prepare_model_for_kbit_training

from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)

model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "pretrained_model"

config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.merge_and_unload()

Top