update continuous

76427e68 · hezhiqiang01 · 90258d12 · 76427e68 · 76427e68 · 76427e68
Commit 76427e68 authored Dec 01, 2022 by hezhiqiang01
Showing with 111 additions and 5 deletions

README.md README.md +4 -3

envs/env_continuous.py envs/env_continuous.py +68 -0

envs/env_core.py envs/env_core.py +38 -0

envs/env_discrete.py envs/env_discrete.py +1 -2

No files found.
--- a/README.md
+++ b/README.md
@@ -22,14 +22,15 @@ MAPPO原版代码对于环境的封装过于复杂，本项目直接将环境封

 ## 用法

- 环境部分是一个空的的实现，文件`light_mappo/envs/env_core.py`里面环境部分的实现：[Code](https://github.com/tinyzqh/light_mappo/blob/main/envs/env_wrappers.py)
+- 环境部分是一个空的的实现，文件`light_mappo/envs/env_core.py`里面环境部分的实现：[Code](https://github.com/tinyzqh/light_mappo/blob/main/envs/env_core.py)

 ```python
-class Env(object):
+import numpy as np
+class EnvCore(object):
    """
    # 环境中的智能体
    """
-    def __init__(self, i):
+    def __init__(self):
        self.agent_num = 2  # 设置智能体(小飞机)的个数，这里设置为两个
        self.obs_dim = 14  # 设置智能体的观测纬度
        self.action_dim = 5  # 设置智能体的动作纬度，这里假定为一个五个纬度的

--- a/envs/env_continuous.py
+++ b/envs/env_continuous.py
+import gym
+from gym import spaces
+import numpy as np
+from envs.env_core import EnvCore
+
+
+class ContinuousActionEnv(object):
+    """对于连续动作环境的封装"""
+    def __init__(self):
+        self.env = EnvCore()
+        self.num_agent = self.env.agent_num
+
+        self.signal_obs_dim = self.env.obs_dim
+        self.signal_action_dim = self.env.action_dim
+
+        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
+        self.discrete_action_input = False
+
+        self.movable = True
+
+        # configure spaces
+        self.action_space = []
+        self.observation_space = []
+        self.share_observation_space = []
+
+        share_obs_dim = 0
+        total_action_space = []
+        for agent in range(self.num_agent):
+            # physical action space
+            u_action_space = spaces.Box(low=-np.inf, high=+np.inf, shape=(self.signal_action_dim,), dtype=np.float32)
+
+            if self.movable:
+                total_action_space.append(u_action_space)
+
+            # total action space
+            self.action_space.append(total_action_space[0])
+
+            # observation space
+            share_obs_dim += self.signal_obs_dim
+            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(self.signal_obs_dim,),
+                                                     dtype=np.float32))  # [-inf,inf]
+
+        self.share_observation_space = [spaces.Box(low=-np.inf, high=+np.inf, shape=(share_obs_dim,),
+                                                   dtype=np.float32) for _ in range(self.num_agent)]
+
+    def step(self, actions):
+        """
+        输入actions纬度假设：
+        # actions shape = (5, 2, 5)
+        # 5个线程的环境，里面有2个智能体，每个智能体的动作是一个one_hot的5维编码
+        """
+
+        results = self.env.step(actions)
+        obs, rews, dones, infos = results
+        return np.stack(obs), np.stack(rews), np.stack(dones), infos
+
+    def reset(self):
+        obs = self.env.reset()
+        return np.stack(obs)
+
+    def close(self):
+        pass
+
+    def render(self, mode="rgb_array"):
+        pass
+
+    def seed(self, seed):
+        pass
\ No newline at end of file
--- a/envs/env_core.py
+++ b/envs/env_core.py
+import numpy as np
+
+
+class EnvCore(object):
+    """
+    # 环境中的智能体
+    """
+    def __init__(self):
+        self.agent_num = 2  # 设置智能体(小飞机)的个数，这里设置为两个
+        self.obs_dim = 14  # 设置智能体的观测纬度
+        self.action_dim = 5  # 设置智能体的动作纬度，这里假定为一个五个纬度的
+
+    def reset(self):
+        """
+        # self.agent_num设定为2个智能体时，返回值为一个list，每个list里面为一个shape = (self.obs_dim, )的观测数据
+        """
+        sub_agent_obs = []
+        for i in range(self.agent_num):
+            sub_obs = np.random.random(size=(14, ))
+            sub_agent_obs.append(sub_obs)
+        return sub_agent_obs
+
+    def step(self, actions):
+        """
+        # self.agent_num设定为2个智能体时，actions的输入为一个2纬的list，每个list里面为一个shape = (self.action_dim, )的动作数据
+        # 默认参数情况下，输入为一个list，里面含有两个元素，因为动作纬度为5，所里每个元素shape = (5, )
+        """
+        sub_agent_obs = []
+        sub_agent_reward = []
+        sub_agent_done = []
+        sub_agent_info = []
+        for i in range(self.agent_num):
+            sub_agent_obs.append(np.random.random(size=(14,)))
+            sub_agent_reward.append([np.random.rand()])
+            sub_agent_done.append(False)
+            sub_agent_info.append({})
+
+        return [sub_agent_obs, sub_agent_reward, sub_agent_done, sub_agent_info]
\ No newline at end of file
--- a/envs/env_discrete.py
+++ b/envs/env_discrete.py
@@ -31,9 +31,8 @@ class DiscreteActionEnv(object):
        self.share_observation_space = []

        share_obs_dim = 0
+        total_action_space = []
        for agent in range(self.num_agent):
-            total_action_space = []
-
            # physical action space
            u_action_space = spaces.Discrete(self.signal_action_dim)  # 5个离散的动作