How should I set up observation space in gym?

41 Views Asked by At

I wanted to set up a gym environment to work on my school RL project. The environment is structured as follows: an Action Space of 7, with 6 for 3D movement and 1 for placing a block at coordinates (x, y, z). A file with an array of block coordinates will be supplied for the objective. There are certain rules for placing the block. Should I include the objective array and the blocks placed previously in the observation space (both are arrays)? Or should I return the observation with all the block placed previously, along with the current position? If yes, how should I do it? My current code as follows:

def __init__(self):

        self.objective = np.load('objective.npy') # Load the objective from file
        self.min_x, self.max_x, self.min_y, self.max_y, self.min_z, self.max_z= objective_functions.calculate_objective_space(self.objective) # add padding to the objective space
        self.model = CompositeBlock() 
        self.action_space = spaces.Discrete(7)  # 7 discrete actions
        self.observation_space =  spaces.Box(
                                    low=np.array([self.min_x, self.min_y, self.min_z,0,0]),
                                    high=np.array([self.max_x, self.max_y, self.max_z,1,1]),
                                    dtype=np.int16
                                    )
        self.reset()

def step(self, action):
        reward = 0
        done = False
        # Movement in 3D space
        # Clip the cursor position to stay within bounds
        if action == 0:  
            self.cur_pos[1] += 1
            self.cur_pos[1] = np.clip(self.cur_pos[1], self.min_y, self.max_y)
        elif action == 1:  
            self.cur_pos[1] -= 1
            self.cur_pos[1] = np.clip(self.cur_pos[1], self.min_y, self.max_y)
        elif action == 2:  
            self.cur_pos[0] -= 1
            self.cur_pos[0] = np.clip(self.cur_pos[0], self.min_x, self.max_x)
        elif action == 3:  
            self.cur_pos[0] += 1
            self.cur_pos[0] = np.clip(self.cur_pos[0], self.min_x, self.max_x)
        elif action == 4:  
            self.cur_pos[2] += 1
            self.cur_pos[2] = np.clip(self.cur_pos[2], self.min_z, self.max_z)
        elif action == 5:  
            self.cur_pos[2] -= 1
            self.cur_pos[2] = np.clip(self.cur_pos[2], self.min_z, self.max_z)
        
        # Check if block placement is valid
        if action == 6:  # Place block
            if building_rules(self.cur_pos, self.model.blocks): # Check if block placement is valid
                self.model.add_block(self.cur_pos)
                if(np.any([np.array_equal(self.cur_pos, arr) for arr in self.objective])): # Reward if block placement is objective
                    reward = 1  
                else:   # Penalize if block placement is not objective
                    reward = -0.1

        done = set(map(tuple, self.objective)).issubset(set(map(tuple, self.model.blocks))) # Check if all objective blocks are placed
        self.observation = np.array(self.cur_pos)
        return self.observation, reward, done, {}
    
    def _get_info(self):
        pass
    
    def reset(self,seed = None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Reset the agent location to (0,0,0)
        self.model = CompositeBlock() 
        self.cur_pos= np.array([0, 0, 0])
        self.observation = np.array(self.cur_pos)
        
        return self.observation

    def render(self):
        # Render the environment (optional for Gym)
        pass
    

`

I will need to train a model afterwards, which should be the best option for simplicity? Currently, my observation is an array with 5 elements consisting of the current position's x, y, and z coordinates, is_placed_before (0 or 1), and is_objective_block (0 or 1). I feel like the agent does not know where the objective blocks are. I wonder how to fit the objective blocks into the observation.

0

There are 0 best solutions below