RunTimeError: gegeben Gruppen = 1, Gewicht der Größe [64, 3, 3, 7, 7], erwartete Eingabe [1, 8, 3, 112, 112], um 3 Kanäl

Anonymous · Post by **Anonymous** » 04 Mar 2025, 09:51

import os
import shutil
import random
import torch
import torchvision.transforms as transforms
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.models.video as models
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
from PIL import Image

# ------------------------
# Datasets => Train, Test, Val
# ------------------------

source_dir = "new_dataset"
target_dir = "data"

for split in ["train", "test", "val"]:
os.makedirs(os.path.join(target_dir, split, "NonViolence"), exist_ok=True)
os.makedirs(os.path.join(target_dir, split, "Violence"), exist_ok=True)

train_ratio, val_ratio, test_ratio = 0.8, 0.1, 0.1

for category in ["NonViolence", "Violence"]:
category_path = os.path.join(source_dir, category)
files = os.listdir(category_path)
random.shuffle(files)

train_count = int(len(files) * train_ratio)
val_count = int(len(files) * val_ratio)

train_files = files[:train_count]
val_files = files[train_count:train_count + val_count]
test_files = files[train_count + val_count:]

for file_set, split in [(train_files, "train"), (val_files, "val"), (test_files, "test")]:
for file in file_set:
shutil.copy(os.path.join(category_path, file), os.path.join(target_dir, split, category, file))

total_train = len(os.listdir("data/train/Violence")) + len(os.listdir("data/train/NonViolence"))
total_test = len(os.listdir("data/test/Violence")) + len(os.listdir("data/test/NonViolence"))
total_val = len(os.listdir("data/val/Violence")) + len(os.listdir("data/val/NonViolence"))
print(f"Train: {total_train}")
print(f"Test: {total_test}")
print(f"Val: {total_val}")

class ViolenceDataset(Dataset):
def __init__(self, dataset_folder, clip_length=16, transform=None):
self.dataset_folder = dataset_folder
self.clip_length = clip_length
self.transform = transform if transform else transforms.Compose([
transforms.Resize((112, 112)),
transforms.ToTensor()
])

self.video_paths = []
self.labels = []

for label, category in enumerate(os.listdir(dataset_folder)):
folder_path = os.path.join(dataset_folder, category)
if os.path.isdir(folder_path):
for video_name in os.listdir(folder_path):
self.video_paths.append(os.path.join(folder_path, video_name))
self.labels.append(label)

def __len__(self):
return len(self.video_paths)

def __getitem__(self, idx):
video_path = self.video_paths[idx]
label = self.labels[idx]

frames = self.extract_frames(video_path)
frames = torch.stack([self.transform(frame) for frame in frames]) # (frames, C, H, W)
frames = frames.permute(1, 0, 2, 3) # (C, frames, H, W)

print(f"Dataset Output: {frames.shape}") # (C, frames, 112, 112)

return frames, torch.tensor(label, dtype=torch.long)

def extract_frames(self, video_path):
cap = cv2.VideoCapture(video_path)
frames = []
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
selected_frames = np.linspace(0, frame_count - 1, self.clip_length, dtype=int)

for i in range(frame_count):
ret, frame = cap.read()
if not ret:
break
if i in selected_frames:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (112, 112))
frames.append(frame)

cap.release()
return [transforms.ToPILImage()(frame) for frame in frames]

dataset_folder = "data"
batch_size = 8

train_dataset = ViolenceDataset(os.path.join(dataset_folder, "train"))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for clips, labels in train_loader:
print(f"Loader Video Shape: {clips.shape}") # (batch, 3, frames, 112, 112)
break

class ViolenceDetectionLSTM(nn.Module):
def __init__(self, hidden_size=256, num_layers=2):
super(ViolenceDetectionLSTM, self).__init__()
self.cnn = models.r3d_18(pretrained=True)
self.cnn.fc = nn.Identity()

self.lstm = nn.LSTM(input_size=512, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, 1)

def forward(self, x):
print("\n--- Forward Started ---")
print("Input Shape:", x.shape) # (batch, 16, 3, 112, 112)

# (batch, frames, 3, 112, 112)
x = x.permute(0, 2, 1, 3, 4) # (batch, frames, C, H, W)
print("Permute:", x.shape) # (batch, frames, 3, 112, 112)

cnn_features = []
for t in range(x.shape[1]):
frame = x[:, t, :, :, :] # (batch, 3, 112, 112)
cnn_out = self.cnn(frame) # (batch, 512)
cnn_features.append(cnn_out.unsqueeze(1)) # (batch, 512)
#(batch, frames, 512)
cnn_features = torch.cat(cnn_features, dim=1)
print("LSTM, CNN:", cnn_features.shape) # (batch, frames, 512)

lstm_out, _ = self.lstm(cnn_features)
lstm_out = lstm_out[:, -1, :]
output = self.fc(lstm_out)

print("Model Output:", output.shape) # (batch, 1)
print("--- Forward Finished ---\n")

return output
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViolenceDetectionLSTM().to(device)

# ------------------------
# Training
# ------------------------
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

num_epochs = 10
train_losses, val_losses = [], []

for epoch in range(num_epochs):
model.train()
train_loss = 0.0

for clips, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
clips, labels = clips.to(device), labels.float().unsqueeze(1).to(device)

# Debug: Input Check
print(f"Input: {clips.shape}") # (batch, frames, C, H, W)

optimizer.zero_grad()
outputs = model(clips)

# Debug: Output Check
print(f"Output: {outputs.shape}") # (batch, 1)

loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

train_loss += loss.item()

train_loss /= len(train_loader)
train_losses.append(train_loss)

print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}")

torch.save(model.state_dict(), "best_violence_model_lstm.pth")

print("Training complete! Best model saved.")
< /code>
RunTimeError: gegeben Gruppen = 1, Gewicht der Größe [64, 3, 3, 7, 7], erwartete Eingabe [1, 8, 3, 112, 112], um 3 Kanäle zu haben, aber 8 Kanäle, stattdessen, wird
ausgegeben. Als scrollierbares Element anzeigen oder in einem Texteditor geöffnet. Passen Sie die Einstellungen für die Zellausgänge an ... < /p>
Ich konnte dieses Problem 2 Tage lang nicht lösen, sogar mit Chat -GPT und anderen Dingen. Es gibt ein Problem mit den Kanälen, aber ich konnte nicht herausfinden und verwirrt

RunTimeError: gegeben Gruppen = 1, Gewicht der Größe [64, 3, 3, 7, 7], erwartete Eingabe [1, 8, 3, 112, 112], um 3 Kanäl

RunTimeError: gegeben Gruppen = 1, Gewicht der Größe [64, 3, 3, 7, 7], erwartete Eingabe [1, 8, 3, 112, 112], um 3 Kanäl ⇐ Python

Quick Reply