Python plays Mario
01/05/2023
Playing around with ChatGPT & Google Co-lab, I have this working script, that trains a model using neural networks, to play Mario.
I’m really keen to play with this more, just need more time in my days!
!pip uninstall -y gym
!pip install gym==0.23.1
import gym
import gym_super_mario_bros
from gym.wrappers import RecordVideo
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
# define the custom wrapper
class NoopResetEnv(gym.Wrapper):
def __init__(self, env, noop_max=30):
super().__init__(env)
self.noop_max = noop_max
self.override_num_noops = None
self.noop_action = 0
assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
def reset(self, **kwargs):
self.env.reset(**kwargs)
if self.override_num_noops is not None:
noops = self.override_num_noops
else:
noops = np.random.randint(1, self.noop_max + 1)
assert noops > 0
obs = None
for _ in range(noops):
obs, _, done, _ = self.env.step(self.noop_action)
if done:
obs = self.env.reset(**kwargs)
return obs
# define the environment with custom wrapper
env = NoopResetEnv(gym_super_mario_bros.make('SuperMarioBros-v0'), noop_max=30)
env = RecordVideo(env, './video.mp4')
# define the agent
model = keras.Sequential([
keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(240, 256, 3)),
keras.layers.MaxPooling2D(pool_size=2),
keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
keras.layers.MaxPooling2D(pool_size=2),
keras.layers.Flatten(),
keras.layers.Dense(units=128, activation='relu'),
keras.layers.Dense(units=env.action_space.n, activation='softmax')
])
# define the functions to train the agent
def run_episode():
state = env.reset()
state = np.expand_dims(state, axis=0)
total_reward = 0
done = False
while not done:
action = np.argmax(model.predict(state))
state, reward, done, info = env.step(action)
state = np.expand_dims(state, axis=0)
total_reward += reward
return total_reward
# train the agent and save the model weights after each episode
rewards = []
num_episodes = 10
for i in range(num_episodes):
reward = run_episode()
rewards.append(reward)
print(f"Episode {i + 1}: Reward = {reward}")
# update model weights based on reward
model.save_weights(f'model_weights_episode_{i+1}.h5')
env.close()
# plot the rewards obtained in each episode
plt.plot(range(1, num_episodes+1), rewards)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Mario-playing agent performance')
plt.show()