Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
结
结合Transformer与多智能体强化学习的多无人机编码缓存传输方法
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
牛辰龙
结合Transformer与多智能体强化学习的多无人机编码缓存传输方法
Commits
76427e68
Commit
76427e68
authored
Dec 01, 2022
by
hezhiqiang01
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update continuous
parent
90258d12
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
5 deletions
+111
-5
README.md
README.md
+4
-3
envs/env_continuous.py
envs/env_continuous.py
+68
-0
envs/env_core.py
envs/env_core.py
+38
-0
envs/env_discrete.py
envs/env_discrete.py
+1
-2
No files found.
README.md
View file @
76427e68
...
...
@@ -22,14 +22,15 @@ MAPPO原版代码对于环境的封装过于复杂,本项目直接将环境封
## 用法
-
环境部分是一个空的的实现,文件
`light_mappo/envs/env_core.py`
里面环境部分的实现:
[
Code
](
https://github.com/tinyzqh/light_mappo/blob/main/envs/env_
wrappers
.py
)
-
环境部分是一个空的的实现,文件
`light_mappo/envs/env_core.py`
里面环境部分的实现:
[
Code
](
https://github.com/tinyzqh/light_mappo/blob/main/envs/env_
core
.py
)
```
python
class
Env
(
object
):
import
numpy
as
np
class
EnvCore
(
object
):
"""
# 环境中的智能体
"""
def
__init__
(
self
,
i
):
def
__init__
(
self
):
self
.
agent_num
=
2
# 设置智能体(小飞机)的个数,这里设置为两个
self
.
obs_dim
=
14
# 设置智能体的观测纬度
self
.
action_dim
=
5
# 设置智能体的动作纬度,这里假定为一个五个纬度的
...
...
envs/env_continuous.py
0 → 100644
View file @
76427e68
import
gym
from
gym
import
spaces
import
numpy
as
np
from
envs.env_core
import
EnvCore
class
ContinuousActionEnv
(
object
):
"""对于连续动作环境的封装"""
def
__init__
(
self
):
self
.
env
=
EnvCore
()
self
.
num_agent
=
self
.
env
.
agent_num
self
.
signal_obs_dim
=
self
.
env
.
obs_dim
self
.
signal_action_dim
=
self
.
env
.
action_dim
# if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
self
.
discrete_action_input
=
False
self
.
movable
=
True
# configure spaces
self
.
action_space
=
[]
self
.
observation_space
=
[]
self
.
share_observation_space
=
[]
share_obs_dim
=
0
total_action_space
=
[]
for
agent
in
range
(
self
.
num_agent
):
# physical action space
u_action_space
=
spaces
.
Box
(
low
=-
np
.
inf
,
high
=+
np
.
inf
,
shape
=
(
self
.
signal_action_dim
,),
dtype
=
np
.
float32
)
if
self
.
movable
:
total_action_space
.
append
(
u_action_space
)
# total action space
self
.
action_space
.
append
(
total_action_space
[
0
])
# observation space
share_obs_dim
+=
self
.
signal_obs_dim
self
.
observation_space
.
append
(
spaces
.
Box
(
low
=-
np
.
inf
,
high
=+
np
.
inf
,
shape
=
(
self
.
signal_obs_dim
,),
dtype
=
np
.
float32
))
# [-inf,inf]
self
.
share_observation_space
=
[
spaces
.
Box
(
low
=-
np
.
inf
,
high
=+
np
.
inf
,
shape
=
(
share_obs_dim
,),
dtype
=
np
.
float32
)
for
_
in
range
(
self
.
num_agent
)]
def
step
(
self
,
actions
):
"""
输入actions纬度假设:
# actions shape = (5, 2, 5)
# 5个线程的环境,里面有2个智能体,每个智能体的动作是一个one_hot的5维编码
"""
results
=
self
.
env
.
step
(
actions
)
obs
,
rews
,
dones
,
infos
=
results
return
np
.
stack
(
obs
),
np
.
stack
(
rews
),
np
.
stack
(
dones
),
infos
def
reset
(
self
):
obs
=
self
.
env
.
reset
()
return
np
.
stack
(
obs
)
def
close
(
self
):
pass
def
render
(
self
,
mode
=
"rgb_array"
):
pass
def
seed
(
self
,
seed
):
pass
\ No newline at end of file
envs/env_core.py
0 → 100644
View file @
76427e68
import
numpy
as
np
class
EnvCore
(
object
):
"""
# 环境中的智能体
"""
def
__init__
(
self
):
self
.
agent_num
=
2
# 设置智能体(小飞机)的个数,这里设置为两个
self
.
obs_dim
=
14
# 设置智能体的观测纬度
self
.
action_dim
=
5
# 设置智能体的动作纬度,这里假定为一个五个纬度的
def
reset
(
self
):
"""
# self.agent_num设定为2个智能体时,返回值为一个list,每个list里面为一个shape = (self.obs_dim, )的观测数据
"""
sub_agent_obs
=
[]
for
i
in
range
(
self
.
agent_num
):
sub_obs
=
np
.
random
.
random
(
size
=
(
14
,
))
sub_agent_obs
.
append
(
sub_obs
)
return
sub_agent_obs
def
step
(
self
,
actions
):
"""
# self.agent_num设定为2个智能体时,actions的输入为一个2纬的list,每个list里面为一个shape = (self.action_dim, )的动作数据
# 默认参数情况下,输入为一个list,里面含有两个元素,因为动作纬度为5,所里每个元素shape = (5, )
"""
sub_agent_obs
=
[]
sub_agent_reward
=
[]
sub_agent_done
=
[]
sub_agent_info
=
[]
for
i
in
range
(
self
.
agent_num
):
sub_agent_obs
.
append
(
np
.
random
.
random
(
size
=
(
14
,)))
sub_agent_reward
.
append
([
np
.
random
.
rand
()])
sub_agent_done
.
append
(
False
)
sub_agent_info
.
append
({})
return
[
sub_agent_obs
,
sub_agent_reward
,
sub_agent_done
,
sub_agent_info
]
\ No newline at end of file
envs/env_discrete.py
View file @
76427e68
...
...
@@ -31,9 +31,8 @@ class DiscreteActionEnv(object):
self
.
share_observation_space
=
[]
share_obs_dim
=
0
total_action_space
=
[]
for
agent
in
range
(
self
.
num_agent
):
total_action_space
=
[]
# physical action space
u_action_space
=
spaces
.
Discrete
(
self
.
signal_action_dim
)
# 5个离散的动作
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment