RuntimeError mit Pytorch beim Feinabstimmung LLM: "Element 0 der Tensoren erfordert keinen Grad"Python

Python-Programme
Guest
 RuntimeError mit Pytorch beim Feinabstimmung LLM: "Element 0 der Tensoren erfordert keinen Grad"

Post by Guest »

Ich versuche, ein Lama-Modell mit LORA zu optimieren, aber ich erhalte den folgenden Fehler während des Trainings: < /p>

Code: Select all

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
< /code>
 Code < /h2>
Hier ist mein Trainingseinricht: < /p>
import os
import time
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
Trainer,
TrainingArguments,
DataCollatorForLanguageModeling
)
from peft import get_peft_model, LoraConfig, TaskType
from safetensors.torch import save_file

def prepare_sample(example):
"""Prepare a single sample, returning None if invalid."""
if not isinstance(example['original_text'], str) or not isinstance(example['answers'], str):
return None
if not example['original_text'].strip() or not example['answers'].strip():
return None
return f"Soru: {example['original_text'].strip()}\nCevap: {example['answers'].strip()}"

def tokenize_function(examples, tokenizer):
"""Tokenize with proper error handling and validation (FIXED VERSION)."""
processed_texts = []
for idx in range(len(examples['original_text'])):
sample = prepare_sample({
'original_text': examples['original_text'][idx],
'answers': examples['answers'][idx]
})
if sample:
processed_texts.append(sample)

if not processed_texts:
return {"input_ids": [], "attention_mask": []}

tokenized = tokenizer(
processed_texts,
truncation=True,
max_length=256,
padding="max_length",
return_tensors=None
)

# Remove manual label assignment
return tokenized

def main():
# Memory optimization
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True

print("Loading dataset...")
dataset_name = "hcsolakoglu/turkish-wikipedia-qa-4-million"
data = load_dataset(dataset_name)

print("Setting up model and tokenizer...")
base_model_name = "Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token = tokenizer.eos_token

# Load model with configuration
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
)

# Disable model caching for gradient checkpointing
model.config.use_cache = False

print("Configuring LoRA...")
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=4,
lora_alpha=8,
lora_dropout=0.05,
target_modules=["q_proj", "v_proj"],
bias="none",
inference_mode=False
)

model = get_peft_model(model, peft_config)

# Verify trainable parameters
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || "
f"all params: {all_param} || "
f"trainable%: {100 * trainable_params / all_param:.2f}"
)

print("Processing dataset...")
tokenized_data = data.map(
lambda x: tokenize_function(x, tokenizer),
batched=True,
batch_size=100,
num_proc=4,
remove_columns=data["train"].column_names,
desc="Tokenizing dataset"
)

print("Filtering dataset...")
tokenized_data = tokenized_data.filter(
lambda x: len(x["input_ids"]) >  0,
desc="Removing empty examples"
)

if len(tokenized_data["train"]) == 0:
raise ValueError("No valid training examples found after processing!")

print(f"Final dataset size: {len(tokenized_data['train'])} examples")

data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
pad_to_multiple_of=8
)

training_args = TrainingArguments(
output_dir="./results",
learning_rate=4e-4,
per_device_train_batch_size=16,
gradient_accumulation_steps=4,
max_steps=1500,
lr_scheduler_type="cosine",
warmup_ratio=0.01,
weight_decay=0.01,
logging_dir="./logs",
save_strategy="steps",
save_steps=500,
save_total_limit=2,
bf16=True,
dataloader_num_workers=4,
dataloader_pin_memory=False,
optim="adamw_torch",
logging_steps=10,
report_to="none",
gradient_checkpointing=True,
remove_unused_columns=True
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_data["train"],
data_collator=data_collator,
)

try:
print("Starting training...")
start_time = time.time()
trainer.train()
end_time = time.time()
print(f"Training completed in {(end_time - start_time) / 3600:.2f} hours")

output_dir = "./RealityLLMs"
os.makedirs(output_dir, exist_ok=True)
save_file(model.state_dict(), os.path.join(output_dir, "adapter_model.safetensors"))
print(f"Model saved at: {output_dir}")

except Exception as e:
print(f"Training failed with error: {str(e)}")
raise

if __name__ == "__main__":
main()
< /code>
Ich verwende: < /p>

[*] Transformatoren < /li>
 peft < /li >
 Fackel < /li>
 Datensätze < /li>
< />
 Erwartete Verhalten < /h2>
Der Code sollte das Modell mithilfe von LORA-Adaptern ordnungsgemäß abteilen. Einstellung erfordert_grad = true 
für Modellparameter
Überprüfen Sie, ob sich die Tensoren auf dem richtigen Gerät befinden >
< /ul>
Umgebung < /h2>

Python 3.10 < /li>
Pytorch 2.1 .0 < /li>
Transformatoren 4.36.0 < /li>
PEFT 0.7.0 < /li>
< /ul>
Frage
Was verursacht diesen Gradientenberechnungsfehler und wie kann ich ihn beheben?

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post