Python plays Mario

01/05/2023

Playing around with ChatGPT & Google Co-lab, I have this working script, that trains a model using neural networks, to play Mario.

I’m really keen to play with this more, just need more time in my days!

!pip uninstall -y gym
!pip install gym==0.23.1

import gym
import gym_super_mario_bros
from gym.wrappers import RecordVideo
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

# define the custom wrapper
class NoopResetEnv(gym.Wrapper):
    def __init__(self, env, noop_max=30):
        super().__init__(env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def reset(self, **kwargs):
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = np.random.randint(1, self.noop_max + 1)
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

# define the environment with custom wrapper
env = NoopResetEnv(gym_super_mario_bros.make('SuperMarioBros-v0'), noop_max=30)
env = RecordVideo(env, './video.mp4')

# define the agent
model = keras.Sequential([
    keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(240, 256, 3)),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dense(units=env.action_space.n, activation='softmax')
])

# define the functions to train the agent
def run_episode():
    state = env.reset()
    state = np.expand_dims(state, axis=0)
    total_reward = 0
    done = False
    while not done:
        action = np.argmax(model.predict(state))
        state, reward, done, info = env.step(action)
        state = np.expand_dims(state, axis=0)
        total_reward += reward
    return total_reward

# train the agent and save the model weights after each episode
rewards = []
num_episodes = 10
for i in range(num_episodes):
    reward = run_episode()
    rewards.append(reward)
    print(f"Episode {i + 1}: Reward = {reward}")
    # update model weights based on reward
    model.save_weights(f'model_weights_episode_{i+1}.h5')

env.close()

# plot the rewards obtained in each episode
plt.plot(range(1, num_episodes+1), rewards)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Mario-playing agent performance')
plt.show()