Commit 4bc6d041 authored by hzq's avatar hzq

[dev] update

parent 76427e68
...@@ -66,6 +66,7 @@ class EnvCore(object): ...@@ -66,6 +66,7 @@ class EnvCore(object):
只需要编写这一部分的代码,就可以无缝衔接MAPPO。在env_core.py之后,单独提出来了两个文件env_discrete.py和env_continuous.py这两个文件用于封装处理动作空间和离散动作空间。在algorithms/utils/act.py中elif self.continuous_action:这个判断逻辑也是用来处理连续动作空间的。和runner/shared/env_runner.py部分的# TODO 这里改造成自己环境需要的形式即可都是用来处理连续动作空间的。 只需要编写这一部分的代码,就可以无缝衔接MAPPO。在env_core.py之后,单独提出来了两个文件env_discrete.py和env_continuous.py这两个文件用于封装处理动作空间和离散动作空间。在algorithms/utils/act.py中elif self.continuous_action:这个判断逻辑也是用来处理连续动作空间的。和runner/shared/env_runner.py部分的# TODO 这里改造成自己环境需要的形式即可都是用来处理连续动作空间的。
在train.py文件里面,选择注释连续环境,或者离散环境进行demo环境的切换。
## Related Efforts ## Related Efforts
......
...@@ -82,7 +82,7 @@ class DiscreteActionEnv(object): ...@@ -82,7 +82,7 @@ class DiscreteActionEnv(object):
def seed(self, seed): def seed(self, seed):
pass pass
class MultiDiscrete(gym.Space): class MultiDiscrete():
""" """
- The multi-discrete action space consists of a series of discrete action spaces with different parameters - The multi-discrete action space consists of a series of discrete action spaces with different parameters
- It can be adapted to both a Discrete action space or a continuous (Box) action space - It can be adapted to both a Discrete action space or a continuous (Box) action space
......
...@@ -133,7 +133,9 @@ class EnvRunner(Runner): ...@@ -133,7 +133,9 @@ class EnvRunner(Runner):
elif self.envs.action_space[agent_id].__class__.__name__ == 'Discrete': elif self.envs.action_space[agent_id].__class__.__name__ == 'Discrete':
action_env = np.squeeze(np.eye(self.envs.action_space[agent_id].n)[action], 1) action_env = np.squeeze(np.eye(self.envs.action_space[agent_id].n)[action], 1)
else: else:
raise NotImplementedError # TODO 这里改造成自己环境需要的形式即可
action_env = actions
# raise NotImplementedError
actions.append(action) actions.append(action)
temp_actions_env.append(action_env) temp_actions_env.append(action_env)
......
...@@ -5,18 +5,11 @@ ...@@ -5,18 +5,11 @@
# @File : env_runner.py # @File : env_runner.py
""" """
"""
# @Time : 2021/7/1 7:04 下午
# @Author : hezhiqiang01
# @Email : hezhiqiang01@baidu.com
# @File : huaru_runner.py
"""
import time import time
import numpy as np import numpy as np
import torch import torch
from runner.shared.base_runner import Runner from runner.shared.base_runner import Runner
import imageio # import imageio
def _t2n(x): def _t2n(x):
...@@ -269,5 +262,5 @@ class EnvRunner(Runner): ...@@ -269,5 +262,5 @@ class EnvRunner(Runner):
print("average episode rewards is: " + str(np.mean(np.sum(np.array(episode_rewards), axis=0)))) print("average episode rewards is: " + str(np.mean(np.sum(np.array(episode_rewards), axis=0))))
if self.all_args.save_gifs: # if self.all_args.save_gifs:
imageio.mimsave(str(self.gif_dir) + '/render.gif', all_frames, duration=self.all_args.ifi) # imageio.mimsave(str(self.gif_dir) + '/render.gif', all_frames, duration=self.all_args.ifi)
...@@ -22,10 +22,11 @@ from envs.env_wrappers import SubprocVecEnv, DummyVecEnv ...@@ -22,10 +22,11 @@ from envs.env_wrappers import SubprocVecEnv, DummyVecEnv
def make_train_env(all_args): def make_train_env(all_args):
def get_env_fn(rank): def get_env_fn(rank):
def init_env(): def init_env():
# from envs.env_continuous import ContinuousActionEnv # TODO 注意注意,这里选择连续还是离散可以选择注释上面两行,或者下面两行。
# env = ContinuousActionEnv() from envs.env_continuous import ContinuousActionEnv
from envs.env_discrete import DiscreteActionEnv env = ContinuousActionEnv()
env = DiscreteActionEnv() # from envs.env_discrete import DiscreteActionEnv
# env = DiscreteActionEnv()
env.seed(all_args.seed + rank * 1000) env.seed(all_args.seed + rank * 1000)
return env return env
return init_env return init_env
...@@ -35,8 +36,11 @@ def make_train_env(all_args): ...@@ -35,8 +36,11 @@ def make_train_env(all_args):
def make_eval_env(all_args): def make_eval_env(all_args):
def get_env_fn(rank): def get_env_fn(rank):
def init_env(): def init_env():
from envs.env_discrete import DiscreteActionEnv # TODO 注意注意,这里选择连续还是离散可以选择注释上面两行,或者下面两行。
env = DiscreteActionEnv() from envs.env_continuous import ContinuousActionEnv
env = ContinuousActionEnv()
# from envs.env_discrete import DiscreteActionEnv
# env = DiscreteActionEnv()
env.seed(all_args.seed + rank * 1000) env.seed(all_args.seed + rank * 1000)
return env return env
return init_env return init_env
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment