Commit 4bc6d041 authored by hzq's avatar hzq

[dev] update

parent 76427e68
......@@ -66,6 +66,7 @@ class EnvCore(object):
只需要编写这一部分的代码,就可以无缝衔接MAPPO。在env_core.py之后,单独提出来了两个文件env_discrete.py和env_continuous.py这两个文件用于封装处理动作空间和离散动作空间。在algorithms/utils/act.py中elif self.continuous_action:这个判断逻辑也是用来处理连续动作空间的。和runner/shared/env_runner.py部分的# TODO 这里改造成自己环境需要的形式即可都是用来处理连续动作空间的。
在train.py文件里面,选择注释连续环境,或者离散环境进行demo环境的切换。
## Related Efforts
......
......@@ -82,7 +82,7 @@ class DiscreteActionEnv(object):
def seed(self, seed):
pass
class MultiDiscrete(gym.Space):
class MultiDiscrete():
"""
- The multi-discrete action space consists of a series of discrete action spaces with different parameters
- It can be adapted to both a Discrete action space or a continuous (Box) action space
......
......@@ -133,7 +133,9 @@ class EnvRunner(Runner):
elif self.envs.action_space[agent_id].__class__.__name__ == 'Discrete':
action_env = np.squeeze(np.eye(self.envs.action_space[agent_id].n)[action], 1)
else:
raise NotImplementedError
# TODO 这里改造成自己环境需要的形式即可
action_env = actions
# raise NotImplementedError
actions.append(action)
temp_actions_env.append(action_env)
......
......@@ -5,18 +5,11 @@
# @File : env_runner.py
"""
"""
# @Time : 2021/7/1 7:04 下午
# @Author : hezhiqiang01
# @Email : hezhiqiang01@baidu.com
# @File : huaru_runner.py
"""
import time
import numpy as np
import torch
from runner.shared.base_runner import Runner
import imageio
# import imageio
def _t2n(x):
......@@ -269,5 +262,5 @@ class EnvRunner(Runner):
print("average episode rewards is: " + str(np.mean(np.sum(np.array(episode_rewards), axis=0))))
if self.all_args.save_gifs:
imageio.mimsave(str(self.gif_dir) + '/render.gif', all_frames, duration=self.all_args.ifi)
# if self.all_args.save_gifs:
# imageio.mimsave(str(self.gif_dir) + '/render.gif', all_frames, duration=self.all_args.ifi)
......@@ -22,10 +22,11 @@ from envs.env_wrappers import SubprocVecEnv, DummyVecEnv
def make_train_env(all_args):
def get_env_fn(rank):
def init_env():
# from envs.env_continuous import ContinuousActionEnv
# env = ContinuousActionEnv()
from envs.env_discrete import DiscreteActionEnv
env = DiscreteActionEnv()
# TODO 注意注意,这里选择连续还是离散可以选择注释上面两行,或者下面两行。
from envs.env_continuous import ContinuousActionEnv
env = ContinuousActionEnv()
# from envs.env_discrete import DiscreteActionEnv
# env = DiscreteActionEnv()
env.seed(all_args.seed + rank * 1000)
return env
return init_env
......@@ -35,8 +36,11 @@ def make_train_env(all_args):
def make_eval_env(all_args):
def get_env_fn(rank):
def init_env():
from envs.env_discrete import DiscreteActionEnv
env = DiscreteActionEnv()
# TODO 注意注意,这里选择连续还是离散可以选择注释上面两行,或者下面两行。
from envs.env_continuous import ContinuousActionEnv
env = ContinuousActionEnv()
# from envs.env_discrete import DiscreteActionEnv
# env = DiscreteActionEnv()
env.seed(all_args.seed + rank * 1000)
return env
return init_env
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment