The problem I am facing is that when I am training my agent using PPO, the environment doesn't render using Pygame, but when I manually step through the environment using random actions, the rendering works fine. Here is my code for the environment,
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import mujoco
import glfw
import cv2
import pygame
xml = """
-->
<mujoco model="Humanoid">
<option timestep="0.005"/>
<visual>
<map force="0.1" zfar="30"/>
<rgba haze="0.15 0.25 0.35 1"/>
<global offwidth="2560" offheight="1440" elevation="-20" azimuth="120"/>
</visual>
<statistic center="0 0 0.7"/>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".3 .5 .7" rgb2="0 0 0" width="32" height="512"/>
<texture name="body" type="cube" builtin="flat" mark="cross" width="128" height="128" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01"/>
<material name="body" texture="body" texuniform="true" rgba="0.8 0.6 .4 1"/>
<texture name="grid" type="2d" builtin="checker" width="512" height="512" rgb1=".1 .2 .3" rgb2=".2 .3 .4"/>
<material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
</asset>
<default>
<motor ctrlrange="-1 1" ctrllimited="true"/>
<default class="body">
<!-- geoms -->
<geom type="capsule" condim="1" friction=".7" solimp=".9 .99 .003" solref=".015 1" material="body"/>
<default class="thigh">
<geom size=".06"/>
</default>
<default class="shin">
<geom fromto="0 0 0 0 0 -.3" size=".049"/>
</default>
<default class="foot">
<geom size=".027"/>
<default class="foot1">
<geom fromto="-.07 -.01 0 .14 -.03 0"/>
</default>
<default class="foot2">
<geom fromto="-.07 .01 0 .14 .03 0"/>
</default>
</default>
<default class="arm_upper">
<geom size=".04"/>
</default>
<default class="arm_lower">
<geom size=".031"/>
</default>
<default class="hand">
<geom type="sphere" size=".04"/>
</default>
<!-- joints -->
<joint type="hinge" damping=".2" stiffness="1" armature=".01" limited="true" solimplimit="0 .99 .01"/>
<default class="joint_big">
<joint damping="5" stiffness="10"/>
<default class="hip_x">
<joint range="-30 10"/>
</default>
<default class="hip_z">
<joint range="-60 35"/>
</default>
<default class="hip_y">
<joint axis="0 1 0" range="-150 20"/>
</default>
<default class="joint_big_stiff">
<joint stiffness="20"/>
</default>
</default>
<default class="knee">
<joint pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
</default>
<default class="ankle">
<joint range="-50 50"/>
<default class="ankle_y">
<joint pos="0 0 .08" axis="0 1 0" stiffness="6"/>
</default>
<default class="ankle_x">
<joint pos="0 0 .04" stiffness="3"/>
</default>
</default>
<default class="shoulder">
<joint range="-85 60"/>
</default>
<default class="elbow">
<joint range="-100 50" stiffness="0"/>
</default>
</default>
</default>
<worldbody>
<geom name="floor" size="0 0 .05" type="plane" material="grid" condim="3"/>
<light name="spotlight" mode="targetbodycom" target="torso" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="0 -6 4" cutoff="30"/>
<body name="torso" pos="0 0 1.282" childclass="body">
<light name="top" pos="0 0 2" mode="trackcom"/>
<camera name="back" pos="-3 0 1" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
<camera name="side" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
<freejoint name="root"/>
<geom name="torso" fromto="0 -.08 0 0 .08 0" size=".08"/>
<geom name="waist_upper" fromto="-.01 -.06 -.12 -.01 .06 -.12" size=".06"/>
<body name="head" pos="0 0 .19">
<geom name="head" type="sphere" size=".09"/>
<camera name="egocentric" pos=".09 0 0" xyaxes="0 -1 0 .1 0 1" fovy="80"/>
</body>
<body name="waist_lower" pos="-.01 0 -.26">
<geom name="waist_lower" fromto="0 -.06 0 0 .06 0" size=".06"/>
<joint name="abdomen_z" pos="0 0 .065" axis="0 0 1" range="-45 45" class="joint_big_stiff"/>
<joint name="abdomen_y" pos="0 0 .065" axis="0 1 0" range="-75 30" class="joint_big"/>
<body name="pelvis" pos="0 0 -.165">
<joint name="abdomen_x" pos="0 0 .1" axis="1 0 0" range="-35 35" class="joint_big"/>
<geom name="butt" fromto="-.02 -.07 0 -.02 .07 0" size=".09"/>
<body name="thigh_right" pos="0 -.1 -.04">
<joint name="hip_x_right" axis="1 0 0" class="hip_x"/>
<joint name="hip_z_right" axis="0 0 1" class="hip_z"/>
<joint name="hip_y_right" class="hip_y"/>
<geom name="thigh_right" fromto="0 0 0 0 .01 -.44" class="thigh"/>
<body name="shin_right" pos="0 .01 -.4">
<joint name="knee_right" class="knee"/>
<geom name="shin_right" class="shin"/>
<body name="foot_right" pos="0 0 -.39">
<joint name="ankle_y_right" class="ankle_y"/>
<joint name="ankle_x_right" class="ankle_x" axis="1 0 .5"/>
<geom name="foot1_right" class="foot1"/>
<geom name="foot2_right" class="foot2"/>
</body>
</body>
</body>
<body name="thigh_left" pos="0 .1 -.04">
<joint name="hip_x_left" axis="-1 0 0" class="hip_x"/>
<joint name="hip_z_left" axis="0 0 -1" class="hip_z"/>
<joint name="hip_y_left" class="hip_y"/>
<geom name="thigh_left" fromto="0 0 0 0 -.01 -.44" class="thigh"/>
<body name="shin_left" pos="0 -.01 -.4">
<joint name="knee_left" class="knee"/>
<geom name="shin_left" fromto="0 0 0 0 0 -.3" class="shin"/>
<body name="foot_left" pos="0 0 -.39">
<joint name="ankle_y_left" class="ankle_y"/>
<joint name="ankle_x_left" class="ankle_x" axis="-1 0 -.5"/>
<geom name="foot1_left" class="foot1"/>
<geom name="foot2_left" class="foot2"/>
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -.17 .06">
<joint name="right_shoulder1" axis="2 1 1" class="shoulder"/>
<joint name="right_shoulder2" axis="0 -1 1" class="shoulder"/>
<geom name="right_upper_arm" fromto="0 0 0 .16 -.16 -.16" class="arm_upper"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint name="right_elbow" axis="0 -1 1" class="elbow"/>
<geom name="right_lower_arm" fromto=".01 .01 .01 .17 .17 .17" class="arm_lower"/>
<body name="right_hand" pos=".18 .18 .18">
<geom name="right_hand" zaxis="1 1 1" class="hand" size=".09"/>
</body>
</body>
</body>
<body name="left_upper_arm" pos="0 .17 .06">
<joint name="left_shoulder1" axis="-2 1 -1" class="shoulder"/>
<joint name="left_shoulder2" axis="0 -1 -1" class="shoulder"/>
<geom name="left_upper_arm" fromto="0 0 0 .16 .16 -.16" class="arm_upper"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint name="left_elbow" axis="0 -1 -1" class="elbow"/>
<geom name="left_lower_arm" fromto=".01 -.01 .01 .17 -.17 .17" class="arm_lower"/>
<body name="left_hand" pos=".18 -.18 .18">
<geom name="left_hand" zaxis="1 -1 1" class="hand" size=".09"/>
</body>
</body>
</body>
</body>
<geom name="ring_floor" type="box" size="3 3 0.1" pos="0 0 -0.05" rgba="0.5 0.5 0.5 1"/>
<!-- Front ropes -->
<geom name="front_rope1" type="capsule" fromto="-3 3 0.5 3 3 0.5" size="0.05" rgba="1 1 1 1"/>
<geom name="front_rope2" type="capsule" fromto="-3 3 1 3 3 1" size="0.05" rgba="1 1 1 1"/>
<geom name="front_rope3" type="capsule" fromto="-3 3 1.5 3 3 1.5" size="0.05" rgba="1 1 1 1"/>
<!-- Back ropes -->
<geom name="back_rope1" type="capsule" fromto="-3 -3 0.5 3 -3 0.5" size="0.05" rgba="1 1 1 1"/>
<geom name="back_rope2" type="capsule" fromto="-3 -3 1 3 -3 1" size="0.05" rgba="1 1 1 1"/>
<geom name="back_rope3" type="capsule" fromto="-3 -3 1.5 3 -3 1.5" size="0.05" rgba="1 1 1 1"/>
<!-- Left ropes -->
<geom name="left_rope1" type="capsule" fromto="-3 -3 0.5 -3 3 0.5" size="0.05" rgba="1 1 1 1"/>
<geom name="left_rope2" type="capsule" fromto="-3 -3 1 -3 3 1" size="0.05" rgba="1 1 1 1"/>
<geom name="left_rope3" type="capsule" fromto="-3 -3 1.5 -3 3 1.5" size="0.05" rgba="1 1 1 1"/>
<!-- Right ropes -->
<geom name="right_rope1" type="capsule" fromto="3 -3 0.5 3 3 0.5" size="0.05" rgba="1 1 1 1"/>
<geom name="right_rope2" type="capsule" fromto="3 -3 1 3 3 1" size="0.05" rgba="1 1 1 1"/>
<geom name="right_rope3" type="capsule" fromto="3 -3 1.5 3 3 1.5" size="0.05" rgba="1 1 1 1"/>
<geom name="corner1" type="cylinder" size="0.1 1" pos="3 3 1" rgba="0 0 0 1"/>
<geom name="corner2" type="cylinder" size="0.1 1" pos="-3 3 1" rgba="0 0 0 1"/>
<geom name="corner3" type="cylinder" size="0.1 1" pos="3 -3 1" rgba="0 0 0 1"/>
<geom name="corner4" type="cylinder" size="0.1 1" pos="-3 -3 1" rgba="0 0 0 1"/>
</worldbody>
<contact>
<exclude body1="waist_lower" body2="thigh_right"/>
<exclude body1="waist_lower" body2="thigh_left"/>
</contact>
<tendon>
<fixed name="hamstring_right" limited="true" range="-0.3 2">
<joint joint="hip_y_right" coef=".5"/>
<joint joint="knee_right" coef="-.5"/>
</fixed>
<fixed name="hamstring_left" limited="true" range="-0.3 2">
<joint joint="hip_y_left" coef=".5"/>
<joint joint="knee_left" coef="-.5"/>
</fixed>
</tendon>
<actuator>
<motor name="abdomen_y" gear="40" joint="abdomen_y"/>
<motor name="abdomen_z" gear="40" joint="abdomen_z"/>
<motor name="abdomen_x" gear="40" joint="abdomen_x"/>
<motor name="hip_x_right" gear="40" joint="hip_x_right"/>
<motor name="hip_z_right" gear="40" joint="hip_z_right"/>
<motor name="hip_y_right" gear="120" joint="hip_y_right"/>
<motor name="knee_right" gear="80" joint="knee_right"/>
<motor name="ankle_x_right" gear="20" joint="ankle_x_right"/>
<motor name="ankle_y_right" gear="20" joint="ankle_y_right"/>
<motor name="hip_x_left" gear="40" joint="hip_x_left"/>
<motor name="hip_z_left" gear="40" joint="hip_z_left"/>
<motor name="hip_y_left" gear="120" joint="hip_y_left"/>
<motor name="knee_left" gear="80" joint="knee_left"/>
<motor name="ankle_x_left" gear="20" joint="ankle_x_left"/>
<motor name="ankle_y_left" gear="20" joint="ankle_y_left"/>
<motor name="right_shoulder1" gear="20" joint="right_shoulder1"/>
<motor name="right_shoulder2" gear="20" joint="right_shoulder2"/>
<motor name="right_elbow" gear="40" joint="right_elbow"/>
<motor name="left_shoulder1" gear="20" joint="left_shoulder1"/>
<motor name="left_shoulder2" gear="20" joint="left_shoulder2"/>
<motor name="left_elbow" gear="40" joint="left_elbow"/>
</actuator>
<keyframe>
<!--
The values below are split into rows for readibility:
torso position
torso orientation
spinal
right leg
left leg
arms
-->
<key name="squat" qpos="0 0 0.596
0.988015 0 0.154359 0
0 0.4 0
-0.25 -0.5 -2.5 -2.65 -0.8 0.56
-0.25 -0.5 -2.5 -2.65 -0.8 0.56
0 0 0 0 0 0"/>
<key name="stand_on_left_leg" qpos="0 0 1.21948
0.971588 -0.179973 0.135318 -0.0729076
-0.0516 -0.202 0.23
-0.24 -0.007 -0.34 -1.76 -0.466 -0.0415
-0.08 -0.01 -0.37 -0.685 -0.35 -0.09
0.109 -0.067 -0.7 -0.05 0.12 0.16"/>
</keyframe>
</mujoco>
"""
target_stance = [0, 0, 1.2,
0.5, 0, 0, 0, # torso orientation
0, 0, 0, # spine
-0.5, 0.2, -1.5, -1.7, -0.5, 0.3, # right leg
0.5, -0.2, -1.5, -1.7, -0.5, -0.3, # left leg
0.1, -0.1, -0.7, -0.1, 0.2, 0.3] # arms
class BoxingEnv(gym.Env):
def __init__(self):
# Load the MuJoCo model from XML string
self.model = mujoco.MjModel.from_xml_string(xml)
self.sim = mujoco.MjData(self.model)
# Define action and observation spaces
num_actuators = self.model.nu
num_observations = self.model.nq # This is just an example; adjust as needed
self.action_space = spaces.Box(low=-1, high=1, shape=(num_actuators,), dtype=np.float32)
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_observations,), dtype=np.float32)
self.renderer = mujoco.Renderer(model = self.model, height=720, width=1366)
self.done = False
self.fallen = bool(self.sim.qpos[2] < 0.3)
self.timestep = 0
num_observations = len(self._get_observation())
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_observations,), dtype=np.float32)
pygame.init()
self.screen = pygame.display.set_mode((1366, 720)) # Adjust the resolution as needed
def step(self, action):
self.timestep += 1
# Apply the action to the environment
self.sim.ctrl[:] = action
mujoco.mj_step(self.model, self.sim)
# Get the observation, reward, done, and info
observation = self._get_observation()
reward = self._get_reward()
done = self._get_done()
self.done = done
trunctuated = False
info = {}
return observation, reward, done, trunctuated, info
def reset(self, seed = None, **kwargs):
# Set initial conditions
init_qpos = [0, 0, 1.2, 0, 0, 0, 0, ...]
init_qvel = [0, 0, 0, 0, 0, 0, 0, ...]
# Reset MuJoCo
self.sim = mujoco.MjData(self.model)
mujoco.mj_forward(self.model, self.sim)
# Get observation
obs = self._get_observation()
mujoco.mj_forward(self.model, self.sim)
reset_info = {} # This can be populated with any reset-specific info if needed
return obs, reset_info
def render(self, mode='human'):
self.renderer.update_scene(self.sim)
img = self.renderer.render()
pygame_img = pygame.surfarray.make_surface(np.transpose(img, (1, 0, 2)))
self.screen.blit(pygame_img, (0, 0))
pygame.display.flip()
if self.timestep >= 20 * 60: # 20 secs at 60 fps
obs = self.reset()
self.timestep = 0
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
exit()
def _get_observation(self):
# Joint positions
qpos = self.sim.qpos
# Joint velocities
qvel = self.sim.qvel
contact_forces = np.sum(np.sqrt(np.sum(np.square(np.array(self.sim.cfrc_ext)), axis=1)))
# Concatenate and return as a single observation vector
observation = np.concatenate([qpos, qvel, [contact_forces]])
return observation
def _get_reward(self):
stance_diff = np.linalg.norm(self.sim.qpos - target_stance)
if self.fallen:
return -10
else:
return 0
return -stance_diff
def _get_done(self):
if self.fallen:
return True
else:
return False
def sample_random_action(self):
# Get action bounds
action_low = self.action_space.low
action_high = self.action_space.high
# Sample random action as numpy array
action = np.random.uniform(low=action_low, high=action_high)
return action
class BoxingEnvWrapper(gym.Wrapper):
def __init__(self, env):
super().__init__(env)
def reset(self, seed=None, **kwargs):
obs = self.env.reset()
return obs
def step(self, action):
obs, reward, done, trunctuated, info = self.env.step(action)
# Calculate stance reward
stance_diff = np.linalg.norm(self.env.sim.qpos - target_stance)
reward = -stance_diff
return obs, reward, trunctuated, done, info
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
env = BoxingEnvWrapper(BoxingEnv())
from gymnasium.envs.registration import register
register(
id='BoxingEnv-v2',
entry_point='final_boxing:BoxingEnv',
)
from stable_baselines3.common.vec_env import VecEnvWrapper
env = DummyVecEnv([lambda: BoxingEnvWrapper(BoxingEnv())])
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)
I tried using other forms of rendering such as opencv but it doesnt even render at all unless I use pygame. I expect the code to render the mujoco environment while training so I can check in on the progress but the code just displays a black screen.
You are rendering in human mode. As your env is a mujocoEnv type, this rendering mode should raise a mujoco rendering window. If you want an image to use as source for your pygame object, you should render the mujocoEnv using rgb_array mode, which will return you the environment's camera image in RGB format.