Meine Umgebungsbeobachtungen kommen in (19, 19, 28), wobei (19, 19) die Größe des Gitters um Mario ist (9 oben, 9 rechts usw.) und 28 7 Kanäle x 4 Frames (gestapelt mit VecFrameStack). Die 7 Kanäle sind One-Hot-Darstellungen jedes Zelltyps, wie feste Blöcke, zertrampelbare Feinde usw.
Ich habe versucht, Hyperparameter und Bewertungsfunktionen anzupassen, aber sie haben nicht funktioniert, und ich vermute, dass das Problem in der Merkmalsextraktion liegt.
Jede Hilfe wäre dankbar.
Lernskript:
Code: Select all
def make_env(rank):
def _init():
env = MarioGymEnv(port=5555+rank)
env = ThrottleEnv(env, delay=0)
env = SkipEnv(env, skip=2) # custom [url=viewtopic.php?t=25360]environment[/url] to skip every other frame
return env
return _init
def main():
num_cpu = 12
env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
env = VecFrameStack(env, n_stack=4)
env = VecMonitor(env)
policy_kwargs = dict(
features_extractor_class=Cnn,
)
model = PPO(
'CnnPolicy',
env,
policy_kwargs=policy_kwargs,
verbose=1,
tensorboard_log='./board',
learning_rate=1e-3,
n_steps=256,
batch_size=256,
)
TOTAL_TIMESTEPS = 5_000_000
TB_LOG_NAME = 'PPO-CustomCNN-ScheduledLR'
checkpoint_callback = CheckpointCallback(
save_freq= max(10_000 // num_cpu, 1),
save_path='./models/',
name_prefix='marioAI'
)
try:
model.learn(
total_timesteps=TOTAL_TIMESTEPS,
callback=checkpoint_callback,
tb_log_name=TB_LOG_NAME
)
model.save('marioAI_final')
except Exception as e:
print(e)
model.save('marioAI_error')'
Code: Select all
class Cnn(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
super().__init__(observation_space, features_dim)
n_input_channels = observation_space.shape[2]
self.cnn = nn.Sequential(
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # Stride 2 downsamples
nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), # Stride 2 downsamples
nn.ReLU(),
)
with torch.no_grad():
dummy_input = torch.zeros(
(1, n_input_channels, observation_space.shape[0], observation_space.shape[1])
)
output = self.cnn(dummy_input)
n_flattened_features = output.flatten(1).shape[1]
self.linear_head = nn.Sequential(
nn.Linear(n_flattened_features, features_dim),
nn.ReLU()
)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
observations = observations.permute(0, 3, 1, 2)
cnn_output = self.cnn(observations)
flattened_features = torch.flatten(cnn_output, start_dim=1)
features = self.linear_head(flattened_features)
return features
Mobile version