Commit 76427e68 authored by hezhiqiang01's avatar hezhiqiang01

update continuous

parent 90258d12
......@@ -22,14 +22,15 @@ MAPPO原版代码对于环境的封装过于复杂,本项目直接将环境封
## 用法
- 环境部分是一个空的的实现,文件`light_mappo/envs/env_core.py`里面环境部分的实现:[Code](https://github.com/tinyzqh/light_mappo/blob/main/envs/env_wrappers.py)
- 环境部分是一个空的的实现,文件`light_mappo/envs/env_core.py`里面环境部分的实现:[Code](https://github.com/tinyzqh/light_mappo/blob/main/envs/env_core.py)
```python
class Env(object):
import numpy as np
class EnvCore(object):
"""
# 环境中的智能体
"""
def __init__(self, i):
def __init__(self):
self.agent_num = 2 # 设置智能体(小飞机)的个数,这里设置为两个
self.obs_dim = 14 # 设置智能体的观测纬度
self.action_dim = 5 # 设置智能体的动作纬度,这里假定为一个五个纬度的
......
import gym
from gym import spaces
import numpy as np
from envs.env_core import EnvCore
class ContinuousActionEnv(object):
"""对于连续动作环境的封装"""
def __init__(self):
self.env = EnvCore()
self.num_agent = self.env.agent_num
self.signal_obs_dim = self.env.obs_dim
self.signal_action_dim = self.env.action_dim
# if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
self.discrete_action_input = False
self.movable = True
# configure spaces
self.action_space = []
self.observation_space = []
self.share_observation_space = []
share_obs_dim = 0
total_action_space = []
for agent in range(self.num_agent):
# physical action space
u_action_space = spaces.Box(low=-np.inf, high=+np.inf, shape=(self.signal_action_dim,), dtype=np.float32)
if self.movable:
total_action_space.append(u_action_space)
# total action space
self.action_space.append(total_action_space[0])
# observation space
share_obs_dim += self.signal_obs_dim
self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(self.signal_obs_dim,),
dtype=np.float32)) # [-inf,inf]
self.share_observation_space = [spaces.Box(low=-np.inf, high=+np.inf, shape=(share_obs_dim,),
dtype=np.float32) for _ in range(self.num_agent)]
def step(self, actions):
"""
输入actions纬度假设:
# actions shape = (5, 2, 5)
# 5个线程的环境,里面有2个智能体,每个智能体的动作是一个one_hot的5维编码
"""
results = self.env.step(actions)
obs, rews, dones, infos = results
return np.stack(obs), np.stack(rews), np.stack(dones), infos
def reset(self):
obs = self.env.reset()
return np.stack(obs)
def close(self):
pass
def render(self, mode="rgb_array"):
pass
def seed(self, seed):
pass
\ No newline at end of file
import numpy as np
class EnvCore(object):
"""
# 环境中的智能体
"""
def __init__(self):
self.agent_num = 2 # 设置智能体(小飞机)的个数,这里设置为两个
self.obs_dim = 14 # 设置智能体的观测纬度
self.action_dim = 5 # 设置智能体的动作纬度,这里假定为一个五个纬度的
def reset(self):
"""
# self.agent_num设定为2个智能体时,返回值为一个list,每个list里面为一个shape = (self.obs_dim, )的观测数据
"""
sub_agent_obs = []
for i in range(self.agent_num):
sub_obs = np.random.random(size=(14, ))
sub_agent_obs.append(sub_obs)
return sub_agent_obs
def step(self, actions):
"""
# self.agent_num设定为2个智能体时,actions的输入为一个2纬的list,每个list里面为一个shape = (self.action_dim, )的动作数据
# 默认参数情况下,输入为一个list,里面含有两个元素,因为动作纬度为5,所里每个元素shape = (5, )
"""
sub_agent_obs = []
sub_agent_reward = []
sub_agent_done = []
sub_agent_info = []
for i in range(self.agent_num):
sub_agent_obs.append(np.random.random(size=(14,)))
sub_agent_reward.append([np.random.rand()])
sub_agent_done.append(False)
sub_agent_info.append({})
return [sub_agent_obs, sub_agent_reward, sub_agent_done, sub_agent_info]
\ No newline at end of file
......@@ -31,9 +31,8 @@ class DiscreteActionEnv(object):
self.share_observation_space = []
share_obs_dim = 0
for agent in range(self.num_agent):
total_action_space = []
for agent in range(self.num_agent):
# physical action space
u_action_space = spaces.Discrete(self.signal_action_dim) # 5个离散的动作
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment