Commit 40758424 authored by 张逸鸣's avatar 张逸鸣

111

parent a562d9db
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# @Email : hezhiqiang01@baidu.com # @Email : hezhiqiang01@baidu.com
# @File : rMAPPOPolicy.py # @File : rMAPPOPolicy.py
""" """
import time
import torch import torch
from algorithms.algorithm.r_actor_critic import R_Actor, R_Critic from algorithms.algorithm.r_actor_critic import R_Actor, R_Critic
...@@ -71,12 +72,16 @@ class RMAPPOPolicy: ...@@ -71,12 +72,16 @@ class RMAPPOPolicy:
:return rnn_states_actor: (torch.Tensor) updated actor network RNN states. :return rnn_states_actor: (torch.Tensor) updated actor network RNN states.
:return rnn_states_critic: (torch.Tensor) updated critic network RNN states. :return rnn_states_critic: (torch.Tensor) updated critic network RNN states.
""" """
start_time = time.time()
actions, action_log_probs, rnn_states_actor = self.actor(obs, actions, action_log_probs, rnn_states_actor = self.actor(obs,
rnn_states_actor, rnn_states_actor,
masks, masks,
available_actions, available_actions,
deterministic) deterministic)
end_time = time.time()
time_diff = end_time - start_time # 计算时间差(单位为秒)
# print(f"代码执行耗时: {time_diff:.30f} 秒")
values, rnn_states_critic = self.critic(cent_obs, rnn_states_critic, masks) values, rnn_states_critic = self.critic(cent_obs, rnn_states_critic, masks)
return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# @Email : hezhiqiang01@baidu.com # @Email : hezhiqiang01@baidu.com
# @File : r_actor_critic.py # @File : r_actor_critic.py
""" """
import time
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -37,7 +38,7 @@ class R_Actor(nn.Module): ...@@ -37,7 +38,7 @@ class R_Actor(nn.Module):
self.tpdv = dict(dtype=torch.float32, device=device) self.tpdv = dict(dtype=torch.float32, device=device)
obs_shape = get_shape_from_obs_space(obs_space) obs_shape = get_shape_from_obs_space(obs_space)
base = CNNBase if len(obs_shape) == 3 else MLPBaseGPT2 base = CNNBase if len(obs_shape) == 3 else MLPBase
self.base = base(args, obs_shape) self.base = base(args, obs_shape)
if self._use_naive_recurrent_policy or self._use_recurrent_policy: if self._use_naive_recurrent_policy or self._use_recurrent_policy:
...@@ -61,17 +62,23 @@ class R_Actor(nn.Module): ...@@ -61,17 +62,23 @@ class R_Actor(nn.Module):
:return action_log_probs: (torch.Tensor) log probabilities of taken actions. :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
:return rnn_states: (torch.Tensor) updated RNN hidden states. :return rnn_states: (torch.Tensor) updated RNN hidden states.
""" """
start_time = time.time()
obs = check(obs).to(**self.tpdv) obs = check(obs).to(**self.tpdv)
rnn_states = check(rnn_states).to(**self.tpdv) rnn_states = check(rnn_states).to(**self.tpdv)
masks = check(masks).to(**self.tpdv) masks = check(masks).to(**self.tpdv)
if available_actions is not None: if available_actions is not None:
print(("22222222222"))
available_actions = check(available_actions).to(**self.tpdv) available_actions = check(available_actions).to(**self.tpdv)
actor_features = self.base(obs) actor_features = self.base(obs)
if self._use_naive_recurrent_policy or self._use_recurrent_policy: if self._use_naive_recurrent_policy or self._use_recurrent_policy:
print(("11111111"))
actor_features, rnn_states = self.rnn(actor_features, rnn_states, masks) actor_features, rnn_states = self.rnn(actor_features, rnn_states, masks)
end_time = time.time()
time_diff = end_time - start_time # 计算时间差(单位为秒)
# print(f"代码执行耗时: {time_diff:.30f} 秒")
actions, action_log_probs = self.act(actor_features, available_actions, deterministic) actions, action_log_probs = self.act(actor_features, available_actions, deterministic)
return actions, action_log_probs, rnn_states return actions, action_log_probs, rnn_states
...@@ -172,3 +179,4 @@ class R_Critic(nn.Module): ...@@ -172,3 +179,4 @@ class R_Critic(nn.Module):
values = self.v_out(critic_features) values = self.v_out(critic_features)
return values, rnn_states return values, rnn_states
import time
from .distributions import Bernoulli, Categorical, DiagGaussian from .distributions import Bernoulli, Categorical, DiagGaussian
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -57,7 +59,9 @@ class ACTLayer(nn.Module): ...@@ -57,7 +59,9 @@ class ACTLayer(nn.Module):
:return actions: (torch.Tensor) actions to take. :return actions: (torch.Tensor) actions to take.
:return action_log_probs: (torch.Tensor) log probabilities of taken actions. :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
""" """
start_time = time.time()
if self.mixed_action: if self.mixed_action:
actions = [] actions = []
action_log_probs = [] action_log_probs = []
for action_out in self.action_outs: for action_out in self.action_outs:
...@@ -71,6 +75,7 @@ class ACTLayer(nn.Module): ...@@ -71,6 +75,7 @@ class ACTLayer(nn.Module):
action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True) action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
elif self.multi_discrete: elif self.multi_discrete:
actions = [] actions = []
action_log_probs = [] action_log_probs = []
for action_out in self.action_outs: for action_out in self.action_outs:
...@@ -83,6 +88,7 @@ class ACTLayer(nn.Module): ...@@ -83,6 +88,7 @@ class ACTLayer(nn.Module):
actions = torch.cat(actions, -1) actions = torch.cat(actions, -1)
action_log_probs = torch.cat(action_log_probs, -1) action_log_probs = torch.cat(action_log_probs, -1)
elif self.continuous_action: elif self.continuous_action:
# actions = [] # actions = []
# action_log_probs = [] # action_log_probs = []
action_logit = self.action_out(x) action_logit = self.action_out(x)
...@@ -95,10 +101,13 @@ class ACTLayer(nn.Module): ...@@ -95,10 +101,13 @@ class ACTLayer(nn.Module):
# actions = torch.cat(actions, -1) # actions = torch.cat(actions, -1)
# action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True) # action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
else: else:
action_logits = self.action_out(x, available_actions) action_logits = self.action_out(x, available_actions)
actions = action_logits.mode() if deterministic else action_logits.sample() actions = action_logits.mode() if deterministic else action_logits.sample()
action_log_probs = action_logits.log_probs(actions) action_log_probs = action_logits.log_probs(actions)
end_time = time.time()
time_diff = end_time - start_time # 计算时间差(单位为秒)
return actions, action_log_probs return actions, action_log_probs
def get_probs(self, x, available_actions=None): def get_probs(self, x, available_actions=None):
......
...@@ -10,7 +10,7 @@ from envs.env_core import EnvCore ...@@ -10,7 +10,7 @@ from envs.env_core import EnvCore
from .util import init, get_clones from .util import init, get_clones
from transformers import GPT2Config, GPT2Model from transformers import GPT2Config, GPT2Model
"""MLP modules.""" """MLP modules."""
from transformers import EncodecModel
Feature = 4 Feature = 4
class MLPLayer(nn.Module): class MLPLayer(nn.Module):
...@@ -105,8 +105,8 @@ class MLPBaseGPT2(nn.Module): ...@@ -105,8 +105,8 @@ class MLPBaseGPT2(nn.Module):
self.feature_norm = nn.LayerNorm(obs_dim) self.feature_norm = nn.LayerNorm(obs_dim)
config = GPT2Config(n_embd=128, config = GPT2Config(n_embd=128,
n_layer=2, n_layer=6,
n_head=4) n_head=8)
self.mlp = new_mlp_gpt2(obs_dim, self.mlp = new_mlp_gpt2(obs_dim,
128, 128,
1, 1,
...@@ -369,7 +369,6 @@ def get_attn_pad_mask(seq_q, seq_k): ...@@ -369,7 +369,6 @@ def get_attn_pad_mask(seq_q, seq_k):
return torch.tensor(data3) return torch.tensor(data3)
class new_mlp_gpt2(nn.Module): class new_mlp_gpt2(nn.Module):
def __init__(self, input_dim, embd_dim, output_dim, config): def __init__(self, input_dim, embd_dim, output_dim, config):
super(new_mlp_gpt2, self).__init__() super(new_mlp_gpt2, self).__init__()
......
...@@ -73,8 +73,7 @@ class AerialVehicle(object): ...@@ -73,8 +73,7 @@ class AerialVehicle(object):
x = self.PlaceX + dist * math.cos(direction) x = self.PlaceX + dist * math.cos(direction)
y = self.PlaceY + dist * math.sin(direction) y = self.PlaceY + dist * math.sin(direction)
if self.isBeyond(x, y):
return 5
self.PlaceX = x self.PlaceX = x
self.PlaceY = y self.PlaceY = y
return self.InPlace() return self.InPlace()
...@@ -88,22 +87,22 @@ class AerialVehicle(object): ...@@ -88,22 +87,22 @@ class AerialVehicle(object):
def InPlace(self): def InPlace(self):
punish = 0 punish = 0
if self.PlaceX > self.MaxPlaceX: if self.PlaceX > self.MaxPlaceX:
# self.PlaceX = self.MaxPlaceX self.PlaceX = self.MaxPlaceX
punish+=3 punish+=3
if self.PlaceX > self.MaxPlaceX - 5: if self.PlaceX > self.MaxPlaceX - 5:
punish += 1 punish += 1
if self.PlaceX < self.MinPlaceX: if self.PlaceX < self.MinPlaceX:
# self.PlaceX = self.MinPlaceX self.PlaceX = self.MinPlaceX
punish += 3 punish += 3
if self.PlaceX < self.MinPlaceX + 5: if self.PlaceX < self.MinPlaceX + 5:
punish += 1 punish += 1
if self.PlaceY > self.MaxPlaceY: if self.PlaceY > self.MaxPlaceY:
# self.PlaceY = self.MaxPlaceY self.PlaceY = self.MaxPlaceY
punish += 3 punish += 3
if self.PlaceY > self.MaxPlaceY - 5: if self.PlaceY > self.MaxPlaceY - 5:
punish += 1 punish += 1
if self.PlaceY < self.MinPlaceY: if self.PlaceY < self.MinPlaceY:
# self.PlaceY = self.MinPlaceY self.PlaceY = self.MinPlaceY
punish += 3 punish += 3
if self.PlaceY < self.MinPlaceY + 5: if self.PlaceY < self.MinPlaceY + 5:
punish += 1 punish += 1
...@@ -192,17 +191,24 @@ class AerialVehicle(object): ...@@ -192,17 +191,24 @@ class AerialVehicle(object):
# 能否从无人机处获得分片 # 能否从无人机处获得分片
def tryGetContent(self, x, y, time, UserIndex, transSpeedBaseLine, requestIndex): def tryGetContent(self, x, y, time, UserIndex, transSpeedBaseLine, requestIndex):
if self.CachedContentList[requestIndex] != 1: if self.CachedContentList[requestIndex] != 1:
return -1 return False, -1
if UserIndex not in self.ServiceList: if UserIndex not in self.ServiceList:
return -1 return False, -1
speed = self.getTransSpeed(x, y) speed = self.getTransSpeed(x, y)
if speed < transSpeedBaseLine: if speed < transSpeedBaseLine:
return False return False, (self.getSizeOFSlice() / speed)
if (self.getSizeOFSlice() / speed) > time: if (self.getSizeOFSlice() / speed) > time:
return False return False, (self.getSizeOFSlice() / speed)
return True return True, (self.getSizeOFSlice() / speed)
def tryGetContent2(self, x, y):
speed = self.getTransSpeed(x, y)
print(speed)
print((self.getSizeOFSlice() / speed))
return (self.getSizeOFSlice() / speed)
def clearServiceList(self): def clearServiceList(self):
self.ServiceNum = 0 self.ServiceNum = 0
...@@ -210,6 +216,27 @@ class AerialVehicle(object): ...@@ -210,6 +216,27 @@ class AerialVehicle(object):
def cal_bs_communication_delay(self, num):
bs_transmit_power = 2
noise_equivalent_power = np.power(0.1, 13)
bs_bandwidth = 60 * np.power(10, 6)
average_path_loss = self.getAvgLOS(-2000, -2000)
signal_noise_ratio = bs_transmit_power / (
noise_equivalent_power * np.power(10, average_path_loss / 10))
transmission_speed = (bs_bandwidth / num) * math.log2(1 + signal_noise_ratio) / (1024 * 1024 * 8)
return self.getSizeOFSlice() / transmission_speed
# transmission_speed = (self.bs_bandwidth / self.uav_num) * math.log2(
# 1 + self.unit_distance_channel_gain / distance * self.bs_transmit_power / self.uav_num / self.noise_equivalent_power)
# return self.file_size / transmission_speed
####################################leader ####################################leader
...@@ -221,7 +248,12 @@ class AerialVehicle(object): ...@@ -221,7 +248,12 @@ class AerialVehicle(object):
a = AerialVehicle(20, 50, 256, 200, 11.9, 0.13, 2e9, 40e6, 2, 3e8, 6, 20, 0.02, 1e-13, 500, 500,0,0)
a.ServiceNum = 6
a.PlaceX = 100
a.PlaceY = 100
a.K = 1
a.tryGetContent2(0,0)
import random import random
from numpy import random as r from numpy import random as r
userperf = [0.7, 0.2, 0.03, 0.03, 0.04]
typeNum = 5
type1 = [0.7, 0.2, 0.03, 0.03, 0.04]
type2 = [0.2, 0.7, 0.03, 0.03, 0.04]
type3 = [0.03, 0.2, 0.7, 0.03, 0.04]
type4 = [0.03, 0.2, 0.03, 0.7, 0.04]
type5 = [0.04, 0.2, 0.03, 0.03, 0.7]
class UserForUAV(object): class UserForUAV(object):
def __init__(self, UserId, TotalContentNum, FalvorNum, K): def __init__(self, UserId, TotalContentNum, FalvorNum, K):
...@@ -14,7 +22,9 @@ class UserForUAV(object): ...@@ -14,7 +22,9 @@ class UserForUAV(object):
self.K = K self.K = K
self.RequestIndex = 0 self.RequestIndex = 0
self.UAVList = [] self.Service = []
self.eachNum = TotalContentNum / typeNum
if FalvorNum > TotalContentNum: if FalvorNum > TotalContentNum:
self.FalvorNum = TotalContentNum self.FalvorNum = TotalContentNum
...@@ -31,6 +41,17 @@ class UserForUAV(object): ...@@ -31,6 +41,17 @@ class UserForUAV(object):
self.RequestIndex = self.FalvorList[x] self.RequestIndex = self.FalvorList[x]
return self.RequestIndex return self.RequestIndex
# 按照zipf分布,模拟用户请求
def genRequestIndex(self):
x = r.zipf(a=2, size=1)[0]
x = x - 1
if x >= len(self.FalvorList):
self.RequestIndex = self.FalvorList[len(self.FalvorList) - 1]
else:
self.RequestIndex = self.FalvorList[x]
return self.RequestIndex
def getRequestIndex(self): def getRequestIndex(self):
return self.RequestIndex return self.RequestIndex
...@@ -47,15 +68,58 @@ class UserForUAV(object): ...@@ -47,15 +68,58 @@ class UserForUAV(object):
for i in range(0, len(self.UAVList)): for i in range(0, len(self.UAVList)):
self.UAVList[i].tryGetService(self.PlaceX, self.PlaceY, self.RequestIndex, self.UserId) self.UAVList[i].tryGetService(self.PlaceX, self.PlaceY, self.RequestIndex, self.UserId)
def tryGetCache(self, transSpeedBaseLine, time): def addUAV(self, index):
self.Service.append(index)
def clearService(self):
self.Service.clear()
def chooseService(self):
dict = {}
for i in range(0, len(self.Service)):
dist = self.UAVList[self.Service[i]].getDist(self.PlaceX, self.PlaceY)
dict[self.Service[i]] = dist
a = sorted(dict.items(),key = lambda x:x[1],reverse = False)
realService = []
for key in a[:self.K]:
realService.append(key[0])
self.Service = realService
def getService(self):
return self.Service
def tryGetCache(self, transSpeedBaseLine, limitedTime):
vehiclesAbleToTrans = 0 vehiclesAbleToTrans = 0
self.genRequestIndex() self.genRequestIndex()
get = 0
time = 0
miss = 0
time = []
for i in range(0, len(self.UAVList)): for i in range(0, len(self.UAVList)):
if self.UAVList[i].tryGetContent(self.PlaceX, self.PlaceY, time, self.UserId, transSpeedBaseLine, self.RequestIndex): canGet, tmp = self.UAVList[i].tryGetContent(self.PlaceX, self.PlaceY, limitedTime, self.UserId, transSpeedBaseLine, self.RequestIndex)
if canGet:
vehiclesAbleToTrans = vehiclesAbleToTrans + 1 vehiclesAbleToTrans = vehiclesAbleToTrans + 1
return vehiclesAbleToTrans, self.RequestIndex if tmp != -1:
# print("uav")
# print(tmp)
time.append(tmp)
get = get + 1
if self.K > get:
miss = self.K - get
if get == 0:
return vehiclesAbleToTrans, 0, miss, self.RequestIndex
time.sort()
if get >= self.K:
return vehiclesAbleToTrans, time[self.K - 1], miss, self.RequestIndex
return vehiclesAbleToTrans, 0, miss, self.RequestIndex
ss = []
for i in range(0,100):
ss.append(r.zipf(1.5,10)[0])
print(r.zipf(1.6,100))
...@@ -9,12 +9,12 @@ from numpy import random ...@@ -9,12 +9,12 @@ from numpy import random
import torch import torch
from envs.UserForUAV import UserForUAV from envs.UserForUAV import UserForUAV
CustomerNum = 30 CustomerNum = 40
AerialVehiclesNum = 7 AerialVehiclesNum = 5
TotalContentNum = 30 TotalContentNum = 30
MaxPlaceX = 500 MaxPlaceX = 500
MaxPlaceY = 500 MaxPlaceY = 500
Move = MaxPlaceX / 100 Move = 15
ContentSize = 256 ContentSize = 256
CacheLimit = 3600 CacheLimit = 3600
Hight = 200 Hight = 200
...@@ -31,7 +31,7 @@ Noise = 1e-13 ...@@ -31,7 +31,7 @@ Noise = 1e-13
DownSize = 0 DownSize = 0
FalvorNum = 2 FalvorNum = 2
K = 4 K = 3
transSpeedBaseLine = 0.5 transSpeedBaseLine = 0.5
timeLImit = 20 timeLImit = 20
...@@ -94,6 +94,7 @@ class EnvCore(object): ...@@ -94,6 +94,7 @@ class EnvCore(object):
self.bags = 0 self.bags = 0
self.slices = 0 self.slices = 0
self.time = 0
self.allBags = [] self.allBags = []
self.allSlices = [] self.allSlices = []
...@@ -124,6 +125,9 @@ class EnvCore(object): ...@@ -124,6 +125,9 @@ class EnvCore(object):
print(self.dimension4) print(self.dimension4)
print(self.dimension1*5 + self.dimension2 + self.dimension3 + self.dimension4) print(self.dimension1*5 + self.dimension2 + self.dimension3 + self.dimension4)
self.trace = []
self.clearTrace()
...@@ -163,6 +167,7 @@ class EnvCore(object): ...@@ -163,6 +167,7 @@ class EnvCore(object):
punish = [] punish = []
for i in range(0, len(self.AerialVehicles)): for i in range(0, len(self.AerialVehicles)):
act = actions[i] act = actions[i]
dist = act[0] * self.Move dist = act[0] * self.Move
...@@ -176,25 +181,62 @@ class EnvCore(object): ...@@ -176,25 +181,62 @@ class EnvCore(object):
totalReward = 0 totalReward = 0
num = 0 num = 0
cacheMit = 0 cacheMit = 0
totalMiss = 0
totalTime = 0
missList = []
times = []
for i in range(0, len(self.Customers)): for i in range(0, len(self.Customers)):
reward, requestIndex = self.Customers[i].tryGetCache(transSpeedBaseLine, timeLImit) reward, time, miss, requestIndex= self.Customers[i].tryGetCache(transSpeedBaseLine, timeLImit)
reward = reward times.append(time)
totalReward += reward totalReward += reward
missList.append(miss)
totalMiss = totalMiss + miss
if reward >= self.k:
num += 1
if requestIndex in self.CacheContent: if requestIndex in self.CacheContent:
cacheMit += 1 cacheMit += 1
self.CacheDict[requestIndex] = self.CacheDict[requestIndex] + 1 self.CacheDict[requestIndex] = self.CacheDict[requestIndex] + 1
if reward >= self.k:
num += 1 for i in range(0, len(self.Customers)):
avg = []
for j in range(0, missList[i]):
# print("bs")
# print(self.AerialVehicles[i].cal_bs_communication_delay(totalMiss))
avg.append(self.AerialVehicles[j].cal_bs_communication_delay(totalMiss))
if missList[i] != 0:
avg.sort()
times[i] = avg[len(avg) - 1]
for i in range(0, len(self.Customers)):
totalTime += times[i]
totalTime = totalTime /200
totalReward = totalReward * 1 / self.k totalReward = totalReward * 1 / self.k
self.nowStep += 1 totalReward = totalReward + num * 50 + totalTime
self.appendTrace()
totalReward = totalReward
self.nowStep += 1
self.time += totalTime
self.bags += num self.bags += num
self.slices += totalReward self.slices += totalReward
# print(self.k) # print(self.k)
if self.nowStep % 200 == 0: if self.nowStep % 200 == 0:
print(self.bags) print('bags: '+str(self.bags))
print(self.slices) print('totalReward: '+str(self.slices))
print(self.CacheContent) print('time: '+str(self.time))
# print(times)
# print(missList)
if self.epi > 600:
self.plotTrace()
self.allBags.append(self.bags) self.allBags.append(self.bags)
self.allSlices.append(self.slices) self.allSlices.append(self.slices)
...@@ -211,20 +253,25 @@ class EnvCore(object): ...@@ -211,20 +253,25 @@ class EnvCore(object):
print(self.allBags) print(self.allBags)
print(self.allSlices) print(self.allSlices)
print(times)
print(missList)
if self.epi > 500 : if self.epi > 500 :
self.DownSize = 0 self.DownSize = 0
if self.epi > 550 : if self.epi > 550 :
self.DownSize = 0 self.DownSize = 0
self.clearTrace()
self.bags = 0 self.bags = 0
self.slices = 0 self.slices = 0
self.time = 0
totalReward = totalReward + num*50
for i in range(0, len(self.AerialVehicles)): for i in range(0, len(self.AerialVehicles)):
sub_agent_reward.append([totalReward - punish[i]*10]) sub_agent_reward.append([totalReward - punish[i]*10])
...@@ -248,7 +295,8 @@ class EnvCore(object): ...@@ -248,7 +295,8 @@ class EnvCore(object):
def addService(self, act): def addService(self, act):
self.AllocReward = 0 self.AllocReward = 0
reward = [0 for _ in range(TotalContentNum)] reward = [0 for _ in range(len(self.Customers))]
for i in range(0, len(self.Customers)): for i in range(0, len(self.Customers)):
for j in range(0, len(self.AerialVehicles)): for j in range(0, len(self.AerialVehicles)):
...@@ -262,6 +310,35 @@ class EnvCore(object): ...@@ -262,6 +310,35 @@ class EnvCore(object):
if reward[i] >= self.k: if reward[i] >= self.k:
self.AllocReward = self.AllocReward + 1 self.AllocReward = self.AllocReward + 1
def addService22(self, act):
self.AllocReward = 0
reward = [0 for _ in range(TotalContentNum)]
act = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0]
for i in range(0, len(self.Customers)):
for j in range(0, len(self.AerialVehicles)):
num = i * len(self.AerialVehicles) + j
if act[ num] == 1:
# print('yres')
self.AerialVehicles[j].addService(i)
reward[i] = reward[i] + 1
for i in range(0, len(reward)):
if reward[i] >= self.k:
self.AllocReward = self.AllocReward + 1
def clearTrace(self):
self.trace.clear()
for i in range(0, len(self.AerialVehicles)):
self.trace.append([])
def appendTrace(self):
for i in range(0, len(self.AerialVehicles)):
tmp = []
tmp.append(self.AerialVehicles[i].PlaceX)
tmp.append(self.AerialVehicles[i].PlaceY)
self.trace[i].append(tmp)
def RecommendReward(self): def RecommendReward(self):
if self.Recommend < 10: if self.Recommend < 10:
return 10 return 10
...@@ -441,13 +518,15 @@ class EnvCore(object): ...@@ -441,13 +518,15 @@ class EnvCore(object):
def resetServiceList(self): def resetServiceList(self):
for i in range(0, len(self.AerialVehicles)): for i in range(0, len(self.AerialVehicles)):
self.AerialVehicles[i].clearServiceList() self.AerialVehicles[i].clearServiceList()
for i in range(0, len(self.Customers)):
self.Customers[i].clearService()
def resetCustomerPosition(self): def resetCustomerPosition(self):
xDelta = self.MaxPlaceX - self.MinPlaceX xDelta = self.MaxPlaceX - self.MinPlaceX
yDelta = self.MaxPlaceY - self.MinPlaceY yDelta = self.MaxPlaceY - self.MinPlaceY
xx = [342.67990918, 476.6966731, 1.97413316, 256.09613169, 406.31048083, xx = [342.67990918, 476.6966731, 201.97413316, 256.09613169, 406.31048083,
306.26303341, 360.87765872, 145.93803409, 458.88706126, 357.2878917, 306.26303341, 360.87765872, 145.93803409, 458.88706126, 357.2878917,
271.27218401, 71.0850238, 186.67038003, 337.06680753, 220.91658721, 271.27218401, 171.0850238, 186.67038003, 337.06680753, 220.91658721,
217.00699667, 308.88348923, 256.56912128, 325.19859097, 300.5194767] 217.00699667, 308.88348923, 256.56912128, 325.19859097, 300.5194767]
yy = [402.61159842, 260.8235762, 454.3244404, 159.61804449, 45.22967464, yy = [402.61159842, 260.8235762, 454.3244404, 159.61804449, 45.22967464,
150.35002832, 56.99218093, 414.34066315, 23.44815969, 313.14357416, 150.35002832, 56.99218093, 414.34066315, 23.44815969, 313.14357416,
...@@ -461,6 +540,32 @@ class EnvCore(object): ...@@ -461,6 +540,32 @@ class EnvCore(object):
for i in range(0, len(self.Customers)): for i in range(0, len(self.Customers)):
self.Customers[i].moveTo(xx[i], yy[i]) self.Customers[i].moveTo(xx[i], yy[i])
def plotTrace(self):
color = ['red','green','blue','purple','yellow']
lable = ['UAV1', 'UAV2', 'UAV3', 'UAV4', 'UAV5']
for i in range(len(self.Customers)):
plt.plot(self.Customers[i].PlaceX,self.Customers[i].PlaceY,'o', color='black')
plt.text(self.Customers[i].PlaceX,self.Customers[i].PlaceY,str(i),fontsize=10, ha='right', va='bottom')
for i in range(len(self.trace)):
for j in range(len(self.trace[i])):
plt.plot(self.trace[i][j][0], self.trace[i][j][1], ',', color=color[i])
# 连接各个点
for j in range(len(self.trace[i])-1):
start = (self.trace[i][j][0], self.trace[i][j+1][0])
end = (self.trace[i][j][1], self.trace[i][j+1][1])
plt.plot(start, end, color=color[i])
for i in range(len(self.trace)):
plt.plot(self.trace[i][0][0], self.trace[i][0][1], '*', color='orange')
plt.plot(self.trace[i][len(self.trace[i])-1][0], self.trace[i][len(self.trace[i])-1][1], '*', color='pink')
plt.legend()
plt.show()
def newUAV(self): def newUAV(self):
return AerialVehicle(CustomerNum, TotalContentNum, ContentSize, Hight, return AerialVehicle(CustomerNum, TotalContentNum, ContentSize, Hight,
EnvA, EnvB, Frequency, Bandwidth, TransmitPower, SpeedOfLight, EnvA, EnvB, Frequency, Bandwidth, TransmitPower, SpeedOfLight,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
import torch
from transformers import GPT2Config, GPT2Model
from fvcore.nn import FlopCountAnalysis
n_embd = 256
# 创建GPT2配置
config = GPT2Config(
n_embd=n_embd, # 嵌入维度
n_layer=4, # Transformer层数
n_head=4, # 注意力头数
n_positions=256, # 最大序列长度支持
vocab_size=0 # 禁用词汇表(使用inputs_embeds)
)
# 创建GPT2模型
model = GPT2Model(config)
# 创建输入数据 (batch_size=1, sequence_length=256, hidden_size=128)
input_data = torch.randn(1, 256, n_embd) # 序列长度256,嵌入维度128
# 手动计算参数量
def count_parameters(model):
"""计算模型参数量"""
return sum(p.numel() for p in model.parameters() if p.requires_grad)
params = count_parameters(model)
print(f"参数量: {params:,} ({params / 1e6:.2f} M)")
# 使用 fvcore 计算 FLOPs
# 创建模型包装器来处理字典输入
class GPT2Wrapper(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, inputs_embeds):
return self.model(inputs_embeds=inputs_embeds).last_hidden_state
wrapped_model = GPT2Wrapper(model)
# 使用 fvcore 计算 FLOPs
flop_analyzer = FlopCountAnalysis(wrapped_model, input_data)
flops = flop_analyzer.total()
print(f"\n总 FLOPs: {flops:,}")
print(f"FLOPs (百万): {flops / 1e6:.2f} M")
print(f"FLOPs (十亿): {flops / 1e9:.2f} G")
# 理论计算验证
def calculate_gpt2_flops(batch_size, seq_len, n_embd, n_layer, n_head):
"""理论计算GPT-2模型的FLOPs"""
d_head = n_embd // n_head # 每个注意力头的维度
# 计算每层的FLOPs
flops_per_layer = 0
# 注意力部分
# 1. Q、K、V投影层
flops_per_layer += 3 * batch_size * seq_len * n_embd * n_embd * 2 # 乘加操作
# 2. 注意力分数计算 (Q·K^T)
flops_per_layer += batch_size * n_head * seq_len * d_head * seq_len * 2
# 3. 注意力输出计算 (softmax·V)
flops_per_layer += batch_size * n_head * seq_len * seq_len * d_head * 2
# 4. 输出投影层
flops_per_layer += batch_size * seq_len * n_embd * n_embd * 2
# 前馈网络部分
ffn_dim = 4 * n_embd # 通常扩展4倍
# 5. FFN第一层
flops_per_layer += batch_size * seq_len * n_embd * ffn_dim * 2
# 6. FFN第二层
flops_per_layer += batch_size * seq_len * ffn_dim * n_embd * 2
# 总层FLOPs
total_layer_flops = n_layer * flops_per_layer
return total_layer_flops
# 计算理论FLOPs
batch_size = 1
seq_len = 256
n_embd = 128
n_layer = 4
n_head = 8
theoretical_flops = calculate_gpt2_flops(batch_size, seq_len, n_embd, n_layer, n_head)
print(f"\n理论计算总FLOPs: {theoretical_flops:,}")
print(f"FLOPs (十亿): {theoretical_flops / 1e9:.2f} G")
print(f"与实际值差异: {(flops - theoretical_flops) / flops * 100:.2f}%")
\ No newline at end of file
import torch
import torch.nn as nn
import time
from thop import profile # 需要安装:pip install thop
class Actor(nn.Module):
"""示例Actor网络(全连接结构)"""
def __init__(self, state_dim=24, action_dim=4):
super().__init__()
self.net = nn.Sequential(
nn.Linear(state_dim, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, action_dim),
nn.Tanh()
)
def forward(self, state):
return self.net(state)
# 创建模型和测试数据
device = torch.device( "cpu")
model = Actor().to(device)
dummy_input = torch.randn(1, 24).to(device) # 批大小1, 状态维度24
# ===== 计算FLOPs和参数量 =====
flops, params = profile(model, inputs=(dummy_input,))
print(f"参数量(Parameters): {params / 1e6:.2f} M")
print(f"FLOPs: {flops / 1e6:.5f} M")
# ===== 测量推理延迟 =====
warmup_steps = 100 # 预热次数
test_steps = 1000 # 测试次数
# GPU预热(让CUDA完成初始化)
for _ in range(warmup_steps):
_ = model(dummy_input)
# 实际测量
start_time = time.time()
for _ in range(test_steps):
with torch.no_grad(): # 禁用梯度计算
_ = model(dummy_input)
# 若使用GPU,需同步
if device.type == "cuda":
torch.cuda.synchronize()
end_time = time.time()
avg_latency = (end_time - start_time) * 1000 / test_steps # 毫秒/次
print(f"平均推理延迟: {avg_latency:.4f} ms")
# ===== 输出详细每层统计 =====
print("\n各层详细信息:")
model_summary = profile(model, inputs=(dummy_input,), verbose=True)
\ No newline at end of file
import sys
import time
import numpy as np
import torch
import torch.nn as nn
from transformers import GPT2Config, GPT2Model
import psutil # 导入用于监控 CPU 使用率的库
from ptflops import get_model_complexity_info
# 设置随机种子保证可复现性
torch.manual_seed(42)
np.random.seed(42)
# 自定义的MLP模型
class Actor(nn.Module):
def __init__(self, obs_dim, embd_dim, output_dim, config):
super(Actor, self).__init__()
self.gpt = GPT2Model(config)
self.fc1 = nn.Linear(obs_dim, embd_dim)
self.fc2 = nn.Linear(embd_dim, output_dim)
self.activation = nn.ReLU()
def forward(self, x):
x = self.activation(self.fc1(x))
gpt_out = self.gpt(inputs_embeds=x.unsqueeze(1)).last_hidden_state
return self.fc2(gpt_out.squeeze(1))
def generate_neural_net_input(num_drones=5, num_users=20):
"""
生成54维神经网络输入数组,包含:
- 5个无人机的x,y坐标(前10位)
- 20个用户的x,y坐标(中间40位)
- 4个无人机间距离(最后4位)
参数:
num_drones: 无人机数量(默认为5)
num_users: 用户数量(默认为20)
返回:
54维numpy数组
"""
# 1. 生成无人机位置(随机分布在100x100区域内)
drone_positions = np.random.uniform(0, 100, (num_drones, 2))
# 2. 生成用户位置(随机分布在相同区域内)
user_positions = np.random.uniform(0, 100, (num_users, 2))
# 3. 计算无人机间距离(取4个特定组合:无人机0-1, 0-2, 1-3, 2-4)
distances = [
np.linalg.norm(drone_positions[0] - drone_positions[1]), # d0-1
np.linalg.norm(drone_positions[0] - drone_positions[2]), # d0-2
np.linalg.norm(drone_positions[1] - drone_positions[3]), # d1-3
np.linalg.norm(drone_positions[2] - drone_positions[4]) # d2-4
]
# 4. 组合所有数据为54维数组
# 前10位:无人机坐标展平 (5 drones × 2 coords = 10)
# 中间40位:用户坐标展平 (20 users × 2 coords = 40)
# 最后4位:距离值
input_array = np.concatenate([
drone_positions.flatten(),
user_positions.flatten(),
distances
])
return input_array
def print_input_description(input_array):
"""打印输入的详细描述"""
# 无人机位置
print("无人机位置 (5 drones):")
for i in range(0, 10, 2):
drone_idx = i // 2
x, y = input_array[i], input_array[i + 1]
print(f" 无人机{drone_idx}: x={x:.2f}, y={y:.2f}")
# 用户位置(部分显示)
print("\n用户位置 (前5个):")
for i in range(10, 20, 2): # 只显示前5个用户
user_idx = (i - 10) // 2
x, y = input_array[i], input_array[i + 1]
print(f" 用户{user_idx}: x={x:.2f}, y={y:.2f}")
# 无人机间距离
print("\n无人机间距离:")
dist_labels = ["无人机0-1", "无人机0-2", "无人机1-3", "无人机2-4"]
for i, label in enumerate(dist_labels):
print(f" {label}: {input_array[50 + i]:.2f}")
# 配置参数
obs_dim = 54 # 2*5 + 20*2 + 4
batch_size = 32
embd_dim = 256
config = GPT2Config(n_embd=embd_dim, n_layer=2, n_head=4)
torch.device("cpu")
# 创建模型
device = torch.device("cpu")
print(f"使用设备: {device}")
model = Actor(obs_dim, embd_dim, 2, config).to(device)
model.load_state_dict(torch.load('actor2-4-54.pt', map_location='cpu'))
print(f"测试神经网络输入: {device}")
input = generate_neural_net_input(5,20)
print_input_description(input)
ss = []
ss.append(input)
input =torch.tensor(input, dtype=torch.float32)
out = model(torch.tensor(ss, dtype=torch.float32))
print(out)
print(f" 无人机移动: x={out[0][0]:.2f}, y={out[0][1]:.2f}")
# 生成测试数据
def generate_batch(batch_size):
return torch.randn(batch_size, obs_dim).to(device)
# 预热运行
print("进行预热运行...")
with torch.no_grad():
for _ in range(10):
data = generate_batch(batch_size)
_ = model(data)
# 记录CPU使用率
cpu_usages = [] # 用于保存每次测试的CPU使用率峰值
# 测试不同批量大小的前向传播时间
batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
print("\n开始性能测试...")
print(
f"{'批量大小':<10} | {'平均时间(ms)':<12} | {'最小时间(ms)':<12} | {'最大时间(ms)':<12} | {'标准差':<10} | {'CPU峰值(%)':<10}")
print("-" * 90)
results = {}
for bs in batch_sizes:
execution_times = []
n_runs = 100
cpu_usage_peak = 0
# 开始监控CPU使用率
p = psutil.Process()
for i in range(n_runs):
data = generate_batch(bs)
# 开始计时
start_time = time.perf_counter_ns()
with torch.no_grad():
output = model(data)
# 结束计时
end_time = time.perf_counter_ns()
elapsed_ms = (end_time - start_time) / 1_000_000 # 毫秒
execution_times.append(elapsed_ms)
# 记录CPU使用率峰值
current_cpu = p.cpu_percent(interval=None)
if current_cpu > cpu_usage_peak:
cpu_usage_peak = current_cpu
# 保存当前批量大小的CPU峰值使用率
cpu_usages.append(cpu_usage_peak)
# 计算统计指标
avg_time = np.mean(execution_times)
min_time = np.min(execution_times)
max_time = np.max(execution_times)
std_dev = np.std(execution_times)
results[bs] = {
'avg': avg_time,
'min': min_time,
'max': max_time,
'std': std_dev,
'cpu_peak': cpu_usage_peak
}
print(
f"{bs:<10} | {avg_time:.6f} ms | {min_time:.6f} ms | {max_time:.6f} ms | {std_dev:.6f} | {cpu_usage_peak:.1f}%")
bs32_times = []
data = generate_batch(32)
for _ in range(100):
start_time = time.perf_counter_ns()
with torch.no_grad():
output = model(data)
end_time = time.perf_counter_ns()
bs32_times.append((end_time - start_time) / 1_000_000)
# 打印最终结果
print("\n测试摘要:")
print(f"{'批量大小':<10} | {'平均时间(ms)':<12} | {'CPU峰值(%)':<10}")
print("-" * 50)
for bs in batch_sizes:
print(f"{bs:<10} | {results[bs]['avg']:.6f} ms | {results[bs]['cpu_peak']:.1f}%")
# 输出模型信息
print("\n模型信息:")
print(f"总参数数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f} M")
print("\n测试期间峰值系统资源使用情况:")
print(f"CPU峰值使用率: {max(cpu_usages):.1f}%")
print(f"内存峰值使用率: {psutil.virtual_memory().percent}%")
print(
f"当前内存使用: {psutil.virtual_memory().used / 1024 ** 3:.2f} GB / {psutil.virtual_memory().total / 1024 ** 3:.2f} GB")
# 修正后的FLOPs计算部分
print("\n使用 ptflops 计算模型FLOPs:")
# 我们需要创建一个模型实例的包装器类,而不是直接使用函数
class ModelWrapper(nn.Module):
def __init__(self, model):
super(ModelWrapper, self).__init__()
self.model = model
def forward(self, x):
return self.model(x)
# 创建包装器实例
model_wrapper = ModelWrapper(model)
# 模型输入形状 (1, obs_dim) - 表示单样本输入
input_shape = (obs_dim,)
macs, params = get_model_complexity_info(
model_wrapper,
input_shape,
as_strings=False,
print_per_layer_stat=False,
verbose=False
)
# 根据 MACs 估算 FLOPs (通常 FLOPs ≈ 2 * MACs)
total_flops = 2 * macs # 近似值,实际中可能略有不同
print(total_flops)
# 输出结果
print(f"模型参数量: {params / 1e6:.2f} M")
print(f"MACs (乘法累加操作): {macs / 1e6:.2f} MMACs")
print(f"FLOPs 估计: {total_flops / 1e6:.2f} MFLOPs (单精度, 单样本)")
# 计算批量大小为32时的总FLOPs
batch_size_for_flops = 256
total_flops_batch = total_flops * batch_size_for_flops # 理论上批量线性增长
print(f"\n批量大小 {batch_size_for_flops} 时的总FLOPs估计: {total_flops_batch / 1e9:.2f} GFLOPs")
# 计算实际性能表现
avg_time_sec = results[128]['avg'] / 1000 # 转换为秒
gflops_per_forward = total_flops_batch / 1e9
gflops_per_sec = gflops_per_forward / avg_time_sec
print(f"前向传播计算效率: {gflops_per_sec:.2f} GFLOPS")
# 计算CPU理论峰值性能
cpu_count = psutil.cpu_count(logical=False) # 物理核心数
if cpu_freq := psutil.cpu_freq():
cpu_freq_ghz = cpu_freq.current / 1000 # 当前CPU频率GHz
else:
# 备用方法
import platform
if platform.system() == 'Darwin': # macOS
import subprocess
output = subprocess.check_output(["sysctl", "-n", "hw.cpufrequency"]).decode().strip()
cpu_freq_ghz = float(output) / 1000000000
else:
cpu_freq_ghz = 3.0 # 默认假设为3.0GHz
# 假设每个核心每周期执行8次浮点运算(考虑AVX向量化)
per_cycle_flops = 8
theoretical_gflops = cpu_count * cpu_freq_ghz * per_cycle_flops
print(f"CPU理论峰值: {theoretical_gflops:.1f} GFLOPS (单精度浮点)")
# 计算实际利用率
utilization = gflops_per_sec / theoretical_gflops * 100
print(f"CPU利用率: {utilization:.1f}%")
...@@ -143,11 +143,12 @@ class Runner(object): ...@@ -143,11 +143,12 @@ class Runner(object):
return train_infos return train_infos
def save(self): def save(self):
print("save")
for agent_id in range(self.num_agents): for agent_id in range(self.num_agents):
policy_actor = self.trainer[agent_id].policy.actor policy_actor = self.trainer[agent_id].policy.actor
torch.save( torch.save(
policy_actor.state_dict(), policy_actor,
str(self.save_dir) + "/actor_agent" + str(agent_id) + ".pt", str(self.save_dir) + "/zym_actor_agent" + str(agent_id) + ".pt",
) )
policy_critic = self.trainer[agent_id].policy.critic policy_critic = self.trainer[agent_id].policy.critic
torch.save( torch.save(
......
...@@ -24,11 +24,12 @@ class EnvRunner(Runner): ...@@ -24,11 +24,12 @@ class EnvRunner(Runner):
super(EnvRunner, self).__init__(config) super(EnvRunner, self).__init__(config)
def run(self): def run(self):
print("wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww")
self.warmup() self.warmup()
print("time.time()")
start = time.time() start = time.time()
episodes = int(self.num_env_steps) // self.episode_length // self.n_rollout_threads episodes = int(self.num_env_steps) // self.episode_length // self.n_rollout_threads
self.save()
for episode in range(episodes): for episode in range(episodes):
if self.use_linear_lr_decay: if self.use_linear_lr_decay:
self.trainer.policy.lr_decay(episode, episodes) self.trainer.policy.lr_decay(episode, episodes)
...@@ -68,9 +69,10 @@ class EnvRunner(Runner): ...@@ -68,9 +69,10 @@ class EnvRunner(Runner):
# post process # post process
total_num_steps = (episode + 1) * self.episode_length * self.n_rollout_threads total_num_steps = (episode + 1) * self.episode_length * self.n_rollout_threads
self.save()
# save model # save model
if episode % self.save_interval == 0 or episode == episodes - 1: if episode % self.save_interval == 0 or episode == episodes - 1:
print("saave")
self.save() self.save()
# log information # log information
......
from flask import Flask, request, jsonify
import numpy as np
from train.newnetwork import step
app = Flask(__name__)
def getinfo(data_array):
"""
模拟的getinfo方法 - 实际应用中替换为您的真实算法
这里返回固定示例数据,实际使用时替换为您的计算逻辑
"""
# 示例返回数据 - 实际应替换为您的算法计算结果
array = step(5, 3, data_array)
print(array)
uav1 = []
uav1.append(data_array[0] + array[0])
uav1.append(data_array[1] + array[1])
uav2 = []
uav2.append(data_array[3] + array[2])
uav2.append(data_array[4] + array[3])
uav3 = []
uav3.append(data_array[6] + array[4])
uav3.append(data_array[7] + array[5])
res= []
res.append(uav1)
res.append(uav2)
res.append(uav3)
return {
"user_target_com_rate": [],
"device_target_pos": res,
"user_target_pos_auc": [],
"flight_types": [60, 60, 60],
"target_deployment": []
}
@app.route('/process', methods=['POST'])
def process_data():
try:
# 解析JSON输入
input_data = request.json
# 验证数据结构
if 'data' not in input_data:
return jsonify({"error": "Missing 'data' field"}), 400
data = input_data['data']
# 提取并处理deviceState字段
device_state = data.get('deviceState', [])
extracted_device = []
for item in device_state:
# 取每个元素的前三位,过滤掉无效值
valid_values = [x for x in item[:3] if x is not None and isinstance(x, (int, float))]
extracted_device.extend(valid_values)
# 提取并处理userPos字段
user_pos = data.get('userPos', [])
extracted_user = []
for pos in user_pos:
# 过滤无效坐标
if len(pos) >= 2 and all(isinstance(x, (int, float)) for x in pos[:2]):
extracted_user.extend(pos[:2])
# 合并所有数据形成大数组
combined_array = extracted_device + extracted_user
print(combined_array)
# 调用getinfo方法(这里模拟实现)
result = getinfo(combined_array)
# 构建响应
response = {
"user_target_com_rate": result.get("user_target_com_rate", []),
"device_target_pos": result.get("device_target_pos", []),
"user_target_pos_auc": result.get("user_target_pos_auc", []),
"flight_types": result.get("flight_types", []),
"target_deployment": result.get("target_deployment", [])
}
return jsonify(response)
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
\ No newline at end of file
import torch
import torch.nn as nn
import math
class HybridCNN(nn.Module):
def __init__(self, n, user):
super(HybridCNN, self).__init__()
self.n = n
self.user = user
# 输入处理层 (batch, 3n + user) -> (batch, 1, 3n + user)
self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
# 特征提取层
self.fc = nn.Linear(64 * (3 * n + user*2), 64)
# 输出头定义
self.cont_head = nn.Linear(64, 2 * n) # 连续输出
self.binary_head = nn.Linear(64, user * n) # 二值输出
# 预生成缩放模板
self.register_buffer('scale_template',
torch.Tensor([(20 if i % 2 else 2 * math.pi) for i in range(2 * n)]))
def forward(self, x):
# 输入验证
expected_dim = 3 * self.n + self.user*2
if x.size(-1) != expected_dim:
raise ValueError(f"输入维度应为 {expected_dim},实际得到 {x.size(-1)}")
# 输入处理 (batch, D) -> (batch, 1, D)
x = x.unsqueeze(1)
# 特征提取
x = torch.relu(self.conv1(x)) # (batch, 64, D)
x = x.view(x.size(0), -1) # (batch, 64*D)
x = torch.relu(self.fc(x)) # (batch, 64)
# 连续输出处理
cont_output = torch.sigmoid(self.cont_head(x)) * self.scale_template
# 二值输出处理(训练时使用STE技巧)
bin_logits = self.binary_head(x)
bin_output = (bin_logits > 0).float()
if self.training:
bin_output = bin_output + bin_logits - bin_logits.detach()
return torch.cat([cont_output, bin_output], dim=1)
def step(n, user, array):
# 初始化参数
# 初始化网络
net = HybridCNN(n=n, user=user)
# print(f"网络结构(n={n}, user={user}):\n{net}")
test_input = [array]
test_input = torch.tensor(test_input)
# print(f"\n测试输入形状: {test_input.shape}")
# 前向传播
output = net(test_input)
return output
# print(test_input)
# print(output)
# 验证输出
# print(f"\n输出形状: {output.shape} (2n + user*n = {2 * n} + {user * n} = {2 * n + user * n})")
# 验证函数
def validate_output(output, n, user):
# 连续部分检查
cont = output[:, :2 * n]
even_check = torch.all((cont[:, ::2] >= 0) & (cont[:, ::2] <= 2 * math.pi))
odd_check = torch.all((cont[:, 1::2] >= 0) & (cont[:, 1::2] <= 20))
# 二值部分检查
bin_part = output[:, 2 * n:]
binary_check = torch.all(torch.isin(bin_part, torch.tensor([0.0, 1.0])))
return even_check and odd_check and binary_check
import torch
import torch.nn as nn
import math
class HybridCNN(nn.Module):
def __init__(self, n, user):
super(HybridCNN, self).__init__()
self.n = n
self.user = user
input_dim = 3 * n + 2 * user # 输入维度
output_dim = 2 * n # 输出维度
# 输入处理层 (batch, input_dim) -> (batch, 1, input_dim)
self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
# 特征提取层
self.fc1 = nn.Linear(64 * input_dim, 128)
self.fc2 = nn.Linear(128, 64)
# 输出层 - 3*n维输出
self.output_head = nn.Linear(64, output_dim)
# 缩放参数,确保输出在[-10, 10]范围内
self.scale_factor = 10.0
def forward(self, x):
# 输入验证
expected_dim = 3 * self.n + 2 * self.user
if x.size(-1) != expected_dim:
raise ValueError(f"输入维度应为 {expected_dim},实际得到 {x.size(-1)}")
# 输入处理 (batch, D) -> (batch, 1, D)
x = x.unsqueeze(1)
# 卷积层
x = torch.relu(self.conv1(x)) # (batch, 64, D)
# 展平
x = x.view(x.size(0), -1) # (batch, 64*D)
# 全连接层
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
# 输出层 - 使用tanh将输出限制在[-1,1]范围内
output = torch.tanh(self.output_head(x))
# 缩放输出到[-10,10]范围
output = output * self.scale_factor
return output
def step(n, user, array):
# 检查输入数组长度
expected_length = 3 * n + 2 * user
if len(array) != expected_length:
raise ValueError(f"输入数组长度应为 {expected_length},实际得到 {len(array)}")
# 初始化网络
net = HybridCNN(n=n, user=user)
# 转换为张量
test_input = torch.tensor([array], dtype=torch.float32)
# 前向传播
output = net(test_input)
# 转换为列表并返回
return output.detach().numpy().flatten().tolist()
\ No newline at end of file
import socket
import json
import random
import time
def send_array(host='127.0.0.1', port=12345):
data = [-1.3105, 0.5705, -0.2628, 2.6665, 0.8016, -0.8752, -0.2601, 0.5445, 2.0032, 0.4143, -0.6441,-2.2407, -0.5950, 0.4144, 0.0528, 0.1691, -0.9757, 1.5469, 0.3840, 0.2890, 0.2386]
print("待发送数组:", data)
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect((host, port))
# 将数组序列化为JSON字符串并编码
data = json.dumps(data).encode('utf-8')
s.sendall(data)
print(f"已成功发送至 {host}:{port}")
# 设置接收超时(5秒)
s.settimeout(10)
s.shutdown(socket.SHUT_WR)
# 接收响应
response = b""
while True:
data = s.recv(1024)
if not data:
break
response += data
print(f"收到响应: {response.decode('utf-8')}")
except Exception as e:
print(f"连接错误: {str(e)}")
if __name__ == '__main__':
send_array() # 使用默认地址和端口
\ No newline at end of file
import socket
import random
import json
from threading import Thread
from network import step
def handle_client(conn, addr):
"""处理客户端连接"""
try:
# 接收客户端数据(可忽略内容)
received_data = bytearray()
while True:
chunk = conn.recv(1024)
if not chunk:
break
received_data.extend(chunk)
info = json.loads(received_data.decode('utf-8'))
array = step(5,3,info)
array = array.tolist()
array = array[0]
# 序列化并发送
response = json.dumps(array).encode('utf-8')
conn.sendall(response)
print(f"向 {addr} 发送: {response}")
except Exception as e:
print(f"客户端 {addr} 错误: {str(e)}")
finally:
conn.close()
def tensor_to_array(tensor):
"""安全转换张量为NumPy数组"""
if tensor.requires_grad:
tensor = tensor.detach()
if tensor.device.type != 'cpu':
tensor = tensor.cpu()
return tensor.numpy()
def start_server(host='0.0.0.0', port=12345):
"""启动TCP服务器"""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind((host, port))
s.listen()
print(f"服务端启动,监听 {host}:{port}")
# 持续接受新连接
while True:
conn, addr = s.accept()
print(f"收到来自 {addr} 的连接")
Thread(target=handle_client, args=(conn, addr)).start()
if __name__ == '__main__':
start_server()
\ No newline at end of file
...@@ -69,13 +69,14 @@ def make_eval_env(all_args): ...@@ -69,13 +69,14 @@ def make_eval_env(all_args):
def parse_args(args, parser): def parse_args(args, parser):
parser.add_argument("--scenario_name", type=str, default="MPE", help="Which scenario to run on") parser.add_argument("--scenario_name", type=str, default="MPE", help="Which scenario to run on")
parser.add_argument("--num_landmarks", type=int, default=3) parser.add_argument("--num_landmarks", type=int, default=3)
parser.add_argument("--num_agents", type=int, default=9, help="number of players") parser.add_argument("--num_agents", type=int, default=7, help="number of players")
all_args = parser.parse_known_args(args)[0] all_args = parser.parse_known_args(args)[0]
return all_args return all_args
def main(args): def main(args):
parser = get_config() parser = get_config()
all_args = parse_args(args, parser) all_args = parse_args(args, parser)
...@@ -152,7 +153,7 @@ def main(args): ...@@ -152,7 +153,7 @@ def main(args):
envs = make_train_env(all_args) envs = make_train_env(all_args)
eval_envs = make_eval_env(all_args) if all_args.use_eval else None eval_envs = make_eval_env(all_args) if all_args.use_eval else None
num_agents = all_args.num_agents num_agents = all_args.num_agents
print(" config = ")
config = { config = {
"all_args": all_args, "all_args": all_args,
"envs": envs, "envs": envs,
...@@ -169,6 +170,8 @@ def main(args): ...@@ -169,6 +170,8 @@ def main(args):
from runner.separated.env_runner import EnvRunner as Runner from runner.separated.env_runner import EnvRunner as Runner
runner = Runner(config) runner = Runner(config)
print("runner.run()")
runner.save()
runner.run() runner.run()
# post process # post process
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment