Commit a562d9db authored by 张逸鸣's avatar 张逸鸣

111

parent ea508a3a
......@@ -9,7 +9,7 @@ import torch
import torch.nn as nn
from algorithms.utils.util import init, check
from algorithms.utils.cnn import CNNBase
from algorithms.utils.mlp import MLPBase
from algorithms.utils.mlp import MLPBase, MLPBaseWithTrans, MLPBaseGPT2
from algorithms.utils.rnn import RNNLayer
from algorithms.utils.act import ACTLayer
from algorithms.utils.popart import PopArt
......@@ -37,7 +37,7 @@ class R_Actor(nn.Module):
self.tpdv = dict(dtype=torch.float32, device=device)
obs_shape = get_shape_from_obs_space(obs_space)
base = CNNBase if len(obs_shape) == 3 else MLPBase
base = CNNBase if len(obs_shape) == 3 else MLPBaseGPT2
self.base = base(args, obs_shape)
if self._use_naive_recurrent_policy or self._use_recurrent_policy:
......@@ -134,7 +134,9 @@ class R_Critic(nn.Module):
init_method = [nn.init.xavier_uniform_, nn.init.orthogonal_][self._use_orthogonal]
cent_obs_shape = get_shape_from_obs_space(cent_obs_space)
base = CNNBase if len(cent_obs_shape) == 3 else MLPBase
base = CNNBase if len(cent_obs_shape) == 3 else MLPBaseGPT2
print("cent_obs_shape:" )
print(cent_obs_shape)
self.base = base(args, cent_obs_shape)
if self._use_naive_recurrent_policy or self._use_recurrent_policy:
......
......@@ -15,9 +15,13 @@ class ACTLayer(nn.Module):
self.mixed_action = False
self.multi_discrete = False
self.continuous_action = False
self.tanh = nn.Tanh()
print(action_space)
if action_space.__class__.__name__ == "Discrete":
action_dim = action_space.n
print(action_dim)
self.action_out = Categorical(inputs_dim, action_dim, use_orthogonal, gain)
elif action_space.__class__.__name__ == "Box":
self.continuous_action = True
......@@ -30,10 +34,12 @@ class ACTLayer(nn.Module):
self.multi_discrete = True
action_dims = action_space.high - action_space.low + 1
self.action_outs = []
print(action_dims)
for action_dim in action_dims:
self.action_outs.append(Categorical(inputs_dim, action_dim, use_orthogonal, gain))
self.action_outs = nn.ModuleList(self.action_outs)
else: # discrete + continous
print("mixed_action")
self.mixed_action = True
continous_dim = action_space[0].shape[0]
discrete_dim = action_space[1].n
......@@ -82,6 +88,8 @@ class ACTLayer(nn.Module):
action_logit = self.action_out(x)
actions = action_logit.mode() if deterministic else action_logit.sample()
action_log_probs = action_logit.log_probs(actions)
#actions = torch.sigmoid(actions)
actions = self.tanh(actions)
# actions.append(action.float())
# action_log_probs.append(action_log_prob)
# actions = torch.cat(actions, -1)
......
This diff is collapsed.
......@@ -166,11 +166,11 @@ def get_config():
default="check",
help="an identifier to distinguish different experiment.",
)
parser.add_argument("--seed", type=int, default=1, help="Random seed for numpy/torch")
parser.add_argument("--seed", type=int, default=10, help="Random seed for numpy/torch")
parser.add_argument(
"--cuda",
action="store_false",
default=True,
default=False,
help="by default True, will use GPU to train; or else will use CPU;",
)
parser.add_argument(
......@@ -182,13 +182,13 @@ def get_config():
parser.add_argument(
"--n_training_threads",
type=int,
default=2,
default=10,
help="Number of torch threads for training",
)
parser.add_argument(
"--n_rollout_threads",
type=int,
default=5,
default=1,
help="Number of parallel envs for training rollouts",
)
parser.add_argument(
......@@ -206,7 +206,7 @@ def get_config():
parser.add_argument(
"--num_env_steps",
type=int,
default=10e6,
default=2*10e6,
help="Number of environment steps to train (default: 10e6)",
)
parser.add_argument(
......@@ -256,13 +256,13 @@ def get_config():
parser.add_argument(
"--hidden_size",
type=int,
default=64,
default=128,
help="Dimension of hidden layers for actor/critic networks",
)
parser.add_argument(
"--layer_N",
type=int,
default=1,
default=3,
help="Number of layers for actor/critic networks",
)
parser.add_argument("--use_ReLU", action="store_false", default=True, help="Whether to use ReLU")
......@@ -314,11 +314,11 @@ def get_config():
)
# optimizer parameters
parser.add_argument("--lr", type=float, default=5e-4, help="learning rate (default: 5e-4)")
parser.add_argument("--lr", type=float, default=1e-5, help="learning rate (default: 5e-4)")
parser.add_argument(
"--critic_lr",
type=float,
default=5e-4,
default=1e-4,
help="critic learning rate (default: 5e-4)",
)
parser.add_argument(
......
import math
import random
import numpy as np
class AerialVehicle(object):
def __init__(self, CustomerNum, TotalContentNum, ContentSize, Hight,
EnvA, EnvB, Frequency, Bandwidth, TransmitPower, SpeedOfLight,
AvgLOS, AvgNLOS, ConstrainLOS, Noise, MaxPlaceX, MinPlaceX,
MaxPlaceY, MinPlaceY):
self.CustomerNum = CustomerNum # 区域内用户数量
self.TotalContentNum = TotalContentNum # 文件总数量
self.ContentSize = ContentSize # 单个文件大小
self.K = 0 # 分片数量
self.PlaceX = 0 # 无人机位置水平坐标x
self.PlaceY = 0 # 无人机位置水平坐标y
self.PlaceH = Hight # 无人机飞行高度H
self.EnvA = EnvA # 系统环境参数a
self.EnvB = EnvB # 系统环境参数b
self.Frequency = Frequency # 无人机载波频率
self.SpeedOfLight = SpeedOfLight # 光速取值
self.AvgLOS = AvgLOS # 无人机LOS传输平均额外损失
self.AvgNLOS = AvgNLOS # 无人机NLOS传输平均额外损失
self.ConstrainLOS = ConstrainLOS # 最低LOS概率限制
self.Bandwidth = Bandwidth # 无人机总带宽
self.TransmitPower = TransmitPower # 无人机传输总功率
self.Noise = Noise # 高斯白噪声
self.ServiceNum = 0 # 本轮服务的数量
self.ServiceList = [] # 本轮服务的用户List
self.MaxPlaceX = MaxPlaceX # 区域范围
self.MinPlaceX = MinPlaceX
self.MaxPlaceY = MaxPlaceY
self.MinPlaceY = MinPlaceY
self.TotalContentNum = TotalContentNum
self.ServiceRadius = self.getRadiusOfUAV() # 无人机服务半径
print("RadiusOfUAV " + str(self.ServiceRadius))
self.CachedContentList = [0 for _ in range(TotalContentNum)] # 无人机缓存列表F
def cacheContent(self, i):
self.CachedContentList[i] = 1
def clearCacheContent(self):
self.CachedContentList = [0 for _ in range(self.TotalContentNum)]
def whetherCacheContent(self, i):
return self.CachedContentList[i] == 1
def getCacheList(self):
return self.CachedContentList
def setK(self, K):
self.K = K
def getPlaceX(self):
return self.PlaceX
def getPlaceY(self):
return self.PlaceY
# 移动无人机位置
def moveTo(self,x, y):
self.PlaceX = x
self.PlaceY = y
def moveToByDist(self, dist, direction):
x = self.PlaceX + dist * math.cos(direction)
y = self.PlaceY + dist * math.sin(direction)
if self.isBeyond(x, y):
return 5
self.PlaceX = x
self.PlaceY = y
return self.InPlace()
def isBeyond(self, x, y):
if x > self.MaxPlaceX or x < self.MinPlaceX or y > self.MaxPlaceY or y < self.MinPlaceY:
return True
return False
# 限制无人机飞行范围
def InPlace(self):
punish = 0
if self.PlaceX > self.MaxPlaceX:
# self.PlaceX = self.MaxPlaceX
punish+=3
if self.PlaceX > self.MaxPlaceX - 5:
punish += 1
if self.PlaceX < self.MinPlaceX:
# self.PlaceX = self.MinPlaceX
punish += 3
if self.PlaceX < self.MinPlaceX + 5:
punish += 1
if self.PlaceY > self.MaxPlaceY:
# self.PlaceY = self.MaxPlaceY
punish += 3
if self.PlaceY > self.MaxPlaceY - 5:
punish += 1
if self.PlaceY < self.MinPlaceY:
# self.PlaceY = self.MinPlaceY
punish += 3
if self.PlaceY < self.MinPlaceY + 5:
punish += 1
return punish
# 计算水平距离
def getDist(self,x, y):
return math.sqrt(math.pow(self.PlaceX - x, 2) + math.pow(self.PlaceY - y, 2))
# 计算LOS路径的概率
def getPossOfLos(self, x, y):
var1 = 180 / math.pi * math.atan(self.PlaceH / self.getDist(x, y))
var2 = - self.EnvB * (var1 - self.EnvA)
var3 = 1 + self.EnvA * math.exp(var2)
return 1 / var3
# 计算LOS路径的概率
def getPossOfLosByDist(self, dist):
var1 = (180)/3.1415 * math.atan(self.PlaceH / dist)
var2 = - self.EnvB * (var1 - self.EnvA)
var3 = 1 + self.EnvA * math.exp(var2)
return 1 / var3
# 计算路损的公共部分
def getNormalLoss(self, x, y):
var1 = math.log10(4 * math.pi * self.Frequency * self.getDist(x, y) / self.SpeedOfLight)
if var1 <= 0:
return 0
return 20 * var1
# 计算LOS路损
def getLOS(self, x, y):
return self.getNormalLoss(x, y) + self.AvgLOS
# 计算NLOS路损
def getNLOS(self, x, y):
return self.getNormalLoss(x, y) + self.AvgNLOS
# 计算平均路损
def getAvgLOS(self, x, y):
return self.getLOS(x, y) * self.getPossOfLos(x, y) + self.getNLOS(x, y) * (1 - self.getPossOfLos(x, y))
# 计算无人机服务半径
def getRadiusOfUAV(self):
var1 = math.log10((1 - self.ConstrainLOS) / (self.EnvA * self.ConstrainLOS))
var2 = math.tan(self.EnvA - (1 / self.EnvB) * var1)
return self.PlaceH / var2
# 无人机分配传输功率
def allocTransmitPower(self):
return self.TransmitPower
# 无人机分配带宽
def allocBandWidth(self):
return self.Bandwidth / self.ServiceNum
# 尝试是否在无人机服务范围内
def tryGetService(self, x, y, requestIndex, UserIndex):
print("tryGetService")
if self.getDist(x, y) > self.ServiceRadius:
# 水平距离大于服务半径
return False
elif not self.whetherCacheContent(requestIndex):
# 该无人机未缓存该内容
print("nonononono")
return False
else:
self.ServiceNum = self.ServiceNum + 1
self.ServiceList.append(UserIndex)
return True
def addService(self, UserIndex):
self.ServiceNum = self.ServiceNum + 1
self.ServiceList.append(UserIndex)
# 计算下行传输速率
def getTransSpeed(self, x, y):
var1 = self.allocTransmitPower() / (self.Noise * np.power(10, self.getAvgLOS(x, y) / 10))
return self.allocBandWidth() * math.log2(var1 + 1) / (1024 * 1024 * 8)
# 计算分片大小
def getSizeOFSlice(self):
return self.ContentSize / self.K
# 能否从无人机处获得分片
def tryGetContent(self, x, y, time, UserIndex, transSpeedBaseLine, requestIndex):
if self.CachedContentList[requestIndex] != 1:
return -1
if UserIndex not in self.ServiceList:
return -1
speed = self.getTransSpeed(x, y)
if speed < transSpeedBaseLine:
return False
if (self.getSizeOFSlice() / speed) > time:
return False
return True
def clearServiceList(self):
self.ServiceNum = 0
return self.ServiceList.clear()
####################################leader
def setLeader(self, Role):
self.Role = Role
def isLeader(self):
return self.Role
import random
from numpy import random as r
class UserForUAV(object):
def __init__(self, UserId, TotalContentNum, FalvorNum, K):
self.UserId = UserId # 用户索引
self.TotalContentNum = TotalContentNum # 文件集合总数
self.FalvorNum = FalvorNum # 该用户倾向选择的文件数量
self.PlaceX = 0 # 用户水平位置x
self.PlaceY = 0 # 用户水平位置y
self.K = K
self.RequestIndex = 0
self.UAVList = []
if FalvorNum > TotalContentNum:
self.FalvorNum = TotalContentNum
self.FalvorList = random.sample(range(1, 9), FalvorNum) # 该用户倾向选择的文件
# 按照zipf分布,模拟用户请求
def genRequestIndex(self):
x = r.zipf(a=2, size=1)[0]
x = x - 1
if x >= len(self.FalvorList):
self.RequestIndex = self.FalvorList[len(self.FalvorList)-1]
else:
self.RequestIndex = self.FalvorList[x]
return self.RequestIndex
def getRequestIndex(self):
return self.RequestIndex
# 移动用户位置
def moveTo(self,x, y):
self.PlaceX = x
self.PlaceY = y
def setUAVList(self, UAVList):
self.UAVList = UAVList
def tryGetservice(self):
for i in range(0, len(self.UAVList)):
self.UAVList[i].tryGetService(self.PlaceX, self.PlaceY, self.RequestIndex, self.UserId)
def tryGetCache(self, transSpeedBaseLine, time):
vehiclesAbleToTrans = 0
self.genRequestIndex()
for i in range(0, len(self.UAVList)):
if self.UAVList[i].tryGetContent(self.PlaceX, self.PlaceY, time, self.UserId, transSpeedBaseLine, self.RequestIndex):
vehiclesAbleToTrans = vehiclesAbleToTrans + 1
return vehiclesAbleToTrans, self.RequestIndex
......@@ -2,6 +2,7 @@ import gym
from gym import spaces
import numpy as np
from envs.env_core import EnvCore
from envs.env_discrete import MultiDiscrete
class ContinuousActionEnv(object):
......@@ -12,10 +13,11 @@ class ContinuousActionEnv(object):
def __init__(self):
self.env = EnvCore()
self.num_agent = self.env.agent_num
self.num_agent = self.env.AerialVehiclesNum + 2
self.signal_obs_dim = self.env.obs_dim
self.signal_action_dim = self.env.action_dim
print( self.num_agent)
# if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
self.discrete_action_input = False
......@@ -29,7 +31,8 @@ class ContinuousActionEnv(object):
share_obs_dim = 0
total_action_space = []
for agent in range(self.num_agent):
for agent in range(self.env.AerialVehiclesNum):
# physical action space
u_action_space = spaces.Box(
low=-np.inf,
......@@ -54,7 +57,60 @@ class ContinuousActionEnv(object):
dtype=np.float32,
)
) # [-inf,inf]
print('section1')
mu_action = []
for i in range(self.env.CustomerNum * self.env.AerialVehiclesNum):
mu_action.append([0, 1])
u_action_space = MultiDiscrete(mu_action)
if self.movable:
total_action_space.append(u_action_space)
# total action space
self.action_space.append(u_action_space)
# observation space
share_obs_dim += self.env.getDimension2()
self.observation_space.append(
spaces.Box(
low=-np.inf,
high=+np.inf,
shape=(self.env.getDimension2(),),
dtype=np.float32,
)
)
print('section2')
mu_action = []
mu_action.append([0, self.env.ContentNum - 1])
u_action_space = spaces.Discrete(self.env.ContentNum)
if self.movable:
total_action_space.append(u_action_space)
# total action space
self.action_space.append(u_action_space)
# observation space
share_obs_dim += (self.env.CacheNum + self.env.CustomerNum)
self.observation_space.append(
spaces.Box(
low=-np.inf,
high=+np.inf,
shape=(self.env.CacheNum + self.env.CustomerNum,),
dtype=np.float32,
)
)
print("ons")
print(share_obs_dim)
self.share_observation_space = [
spaces.Box(
low=-np.inf, high=+np.inf, shape=(share_obs_dim,), dtype=np.float32
......@@ -62,6 +118,7 @@ class ContinuousActionEnv(object):
for _ in range(self.num_agent)
]
def step(self, actions):
"""
输入actions维度假设:
......@@ -75,11 +132,11 @@ class ContinuousActionEnv(object):
results = self.env.step(actions)
obs, rews, dones, infos = results
return np.stack(obs), np.stack(rews), np.stack(dones), infos
return obs, np.stack(rews), np.stack(dones), infos
def reset(self):
obs = self.env.reset()
return np.stack(obs)
return obs
def close(self):
pass
......
This diff is collapsed.
......@@ -47,7 +47,7 @@ class DummyVecEnv():
def reset(self):
obs = [env.reset() for env in self.envs] # [env_num, agent_num, obs_dim]
return np.array(obs)
return np.array(obs, dtype=object)
def close(self):
for env in self.envs:
......
This diff is collapsed.
......@@ -26,7 +26,6 @@ class EnvRunner(Runner):
if self.use_linear_lr_decay:
for agent_id in range(self.num_agents):
self.trainer[agent_id].policy.lr_decay(episode, episodes)
for step in range(self.episode_length):
# Sample actions
(
......@@ -63,15 +62,17 @@ class EnvRunner(Runner):
# post process
total_num_steps = (episode + 1) * self.episode_length * self.n_rollout_threads
self.envs.reset()
# save model
if episode % self.save_interval == 0 or episode == episodes - 1:
self.save()
#if episode % self.save_interval == 0 or episode == episodes - 1:
# self.save()
# log information
if episode % self.log_interval == 0:
end = time.time()
print(
"\n Scenario {} Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n".format(
"\n Scenario2 {} Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n".format(
self.all_args.scenario_name,
self.algorithm_name,
self.experiment_name,
......@@ -111,6 +112,7 @@ class EnvRunner(Runner):
share_obs.append(list(chain(*o)))
share_obs = np.array(share_obs) # shape = [env_num, agent_num * obs_dim]
for agent_id in range(self.num_agents):
if not self.use_centralized_V:
share_obs = np.array(list(obs[:, agent_id]))
......@@ -137,6 +139,7 @@ class EnvRunner(Runner):
self.buffer[agent_id].rnn_states_critic[step],
self.buffer[agent_id].masks[step],
)
# [agents, envs, dim]
values.append(_t2n(value))
action = _t2n(action)
......@@ -171,8 +174,8 @@ class EnvRunner(Runner):
actions_env.append(one_hot_action_env)
values = np.array(values).transpose(1, 0, 2)
actions = np.array(actions).transpose(1, 0, 2)
action_log_probs = np.array(action_log_probs).transpose(1, 0, 2)
# actions = np.array(actions).transpose(1, 0, 2)
# action_log_probs = np.array(action_log_probs).transpose(1, 0, 2)
rnn_states = np.array(rnn_states).transpose(1, 0, 2, 3)
rnn_states_critic = np.array(rnn_states_critic).transpose(1, 0, 2, 3)
......@@ -223,8 +226,8 @@ class EnvRunner(Runner):
np.array(list(obs[:, agent_id])),
rnn_states[:, agent_id],
rnn_states_critic[:, agent_id],
actions[:, agent_id],
action_log_probs[:, agent_id],
actions[agent_id],
action_log_probs[agent_id],
values[:, agent_id],
rewards[:, agent_id],
masks[:, agent_id],
......
This diff is collapsed.
......@@ -77,7 +77,7 @@ class EnvRunner(Runner):
if episode % self.log_interval == 0:
end = time.time()
print(
"\n Scenario {} Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n".format(
"\n Sce2nario {} Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n".format(
self.all_args.scenario_name,
self.algorithm_name,
self.experiment_name,
......@@ -88,17 +88,7 @@ class EnvRunner(Runner):
int(total_num_steps / (end - start)),
)
)
# if self.env_name == "MPE":
# env_infos = {}
# for agent_id in range(self.num_agents):
# idv_rews = []
# for info in infos:
# if 'individual_reward' in info[agent_id].keys():
# idv_rews.append(info[agent_id]['individual_reward'])
# agent_k = 'agent%i/individual_rewards' % agent_id
# env_infos[agent_k] = idv_rews
print('11111')
train_infos["average_episode_rewards"] = np.mean(self.buffer.rewards) * self.episode_length
print("average episode rewards is {}".format(train_infos["average_episode_rewards"]))
self.log_train(train_infos, total_num_steps)
......
......@@ -67,9 +67,9 @@ def make_eval_env(all_args):
def parse_args(args, parser):
parser.add_argument("--scenario_name", type=str, default="MyEnv", help="Which scenario to run on")
parser.add_argument("--scenario_name", type=str, default="MPE", help="Which scenario to run on")
parser.add_argument("--num_landmarks", type=int, default=3)
parser.add_argument("--num_agents", type=int, default=2, help="number of players")
parser.add_argument("--num_agents", type=int, default=9, help="number of players")
all_args = parser.parse_known_args(args)[0]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment