by Anonymous » 06 Feb 2025, 08:24
Ich habe das Modell OpenChat (ein Mistral 7B -Finetuning) für meine eigenen Daten finationstuniert (oder weiter vorbereitet). Dies hat gut funktioniert und die Inferenz führt zu schönen Ergebnissen. Jetzt möchte ich die Adaptergewichte mit dem Originalmodell zusammenführen, um das Modell in einem weiteren Schritt zu quantisieren. Das Problem ist, dass das Aufruf von Modell.Merge_and_unload () den Fehler erzeugt: < /p>
Code: Select all
"AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
Gibt es eine Möglichkeit, diese oder eine andere Methode zu beheben, um meine Gewichtsadapter mit dem Originalmodell zusammenzuführen? >
Training < /strong> < /p>
Code: Select all
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
data_train = ...
import transformers
from trl import SFTTrainer
from transformers import TrainingArguments
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()
trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
Zusammenführen [/b]
Code: Select all
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_name = "pretrained_model"
config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.merge_and_unload()
Ich habe das Modell OpenChat (ein Mistral 7B -Finetuning) für meine eigenen Daten finationstuniert (oder weiter vorbereitet). Dies hat gut funktioniert und die Inferenz führt zu schönen Ergebnissen. Jetzt möchte ich die Adaptergewichte mit dem Originalmodell zusammenführen, um das Modell in einem weiteren Schritt zu quantisieren. Das Problem ist, dass das Aufruf von Modell.Merge_and_unload () den Fehler erzeugt: < /p>
[code] "AttributeError: 'MistralForCausalLM' object has no attribute 'merge_and_unload".
[/code]
Gibt es eine Möglichkeit, diese oder eine andere Methode zu beheben, um meine Gewichtsadapter mit dem Originalmodell zusammenzuführen? >
[b] Training < /strong> < /p>
[code]import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8,
lora_alpha=32,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
data_train = ...
import transformers
from trl import SFTTrainer
from transformers import TrainingArguments
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
model=model,
train_dataset=data_train,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=2,
#eval_steps=100,
logging_dir="./logs",
#max_steps=10,
num_train_epochs=1,
#evaluation_strategy="steps",
logging_strategy="steps",
learning_rate=2e-4,
fp16=True,
logging_steps=5 ,
save_total_limit=3,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()
trainer.save_model("pretrained_model")
tokenizer.save_pretrained("pretrained_model")
[/code]
Zusammenführen [/b]
[code]import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training
)
model_id = "openchat/openchat-3.5-1210"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_name = "pretrained_model"
config = PeftConfig.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.merge_and_unload()
[/code]