111

40758424 · 张逸鸣 · a562d9db · 40758424 · 40758424 · 40758424
Commit 40758424 authored Jun 20, 2025 by 张逸鸣
21 changed files
--- a/algorithms/algorithm/rMAPPOPolicy.py
+++ b/algorithms/algorithm/rMAPPOPolicy.py
@@ -4,6 +4,7 @@
 # @Email   : hezhiqiang01@baidu.com
 # @File    : rMAPPOPolicy.py
 """
+import time
 import torch
 from algorithms.algorithm.r_actor_critic import R_Actor, R_Critic
@@ -71,12 +72,16 @@ class RMAPPOPolicy:
        :return rnn_states_actor: (torch.Tensor) updated actor network RNN states.
        :return rnn_states_critic: (torch.Tensor) updated critic network RNN states.
        """
+        start_time = time.time()
        actions, action_log_probs, rnn_states_actor = self.actor(obs,
                                                                 rnn_states_actor,
                                                                 masks,
                                                                 available_actions,
                                                                 deterministic)
+        end_time = time.time()
+        time_diff = end_time - start_time  # 计算时间差（单位为秒）
+     #   print(f"代码执行耗时: {time_diff:.30f} 秒")
        values, rnn_states_critic = self.critic(cent_obs, rnn_states_critic, masks)
        return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic

--- a/algorithms/algorithm/r_actor_critic.py
+++ b/algorithms/algorithm/r_actor_critic.py
@@ -4,6 +4,7 @@
 # @Email   : hezhiqiang01@baidu.com
 # @File    : r_actor_critic.py
 """
+import time
 import torch
 import torch.nn as nn
@@ -37,7 +38,7 @@ class R_Actor(nn.Module):
        self.tpdv = dict(dtype=torch.float32, device=device)
        obs_shape = get_shape_from_obs_space(obs_space)
-        base = CNNBase if len(obs_shape) == 3 else MLPBaseGPT2
+        base = CNNBase if len(obs_shape) == 3 else MLPBase
        self.base = base(args, obs_shape)
        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
@@ -61,17 +62,23 @@ class R_Actor(nn.Module):
        :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
        :return rnn_states: (torch.Tensor) updated RNN hidden states.
        """
+        start_time = time.time()
        obs = check(obs).to(**self.tpdv)
        rnn_states = check(rnn_states).to(**self.tpdv)
        masks = check(masks).to(**self.tpdv)
        if available_actions is not None:
+            print(("22222222222"))
            available_actions = check(available_actions).to(**self.tpdv)
        actor_features = self.base(obs)
        if self._use_naive_recurrent_policy or self._use_recurrent_policy:
+            print(("11111111"))
            actor_features, rnn_states = self.rnn(actor_features, rnn_states, masks)
+        end_time = time.time()
+        time_diff = end_time - start_time  # 计算时间差（单位为秒）
+     #   print(f"代码执行耗时: {time_diff:.30f} 秒")
        actions, action_log_probs = self.act(actor_features, available_actions, deterministic)
        return actions, action_log_probs, rnn_states
@@ -172,3 +179,4 @@ class R_Critic(nn.Module):
        values = self.v_out(critic_features)
        return values, rnn_states
--- a/algorithms/utils/act.py
+++ b/algorithms/utils/act.py
+import time
 from .distributions import Bernoulli, Categorical, DiagGaussian
 import torch
 import torch.nn as nn
@@ -57,7 +59,9 @@ class ACTLayer(nn.Module):
        :return actions: (torch.Tensor) actions to take.
        :return action_log_probs: (torch.Tensor) log probabilities of taken actions.
        """
+        start_time = time.time()
        if self.mixed_action:
            actions = []
            action_log_probs = []
            for action_out in self.action_outs:
@@ -71,6 +75,7 @@ class ACTLayer(nn.Module):
            action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
        elif self.multi_discrete:
            actions = []
            action_log_probs = []
            for action_out in self.action_outs:
@@ -83,6 +88,7 @@ class ACTLayer(nn.Module):
            actions = torch.cat(actions, -1)
            action_log_probs = torch.cat(action_log_probs, -1)
        elif self.continuous_action:
            # actions = []
            # action_log_probs = []
            action_logit = self.action_out(x)
@@ -95,10 +101,13 @@ class ACTLayer(nn.Module):
            # actions = torch.cat(actions, -1)
            # action_log_probs = torch.sum(torch.cat(action_log_probs, -1), -1, keepdim=True)
        else:
            action_logits = self.action_out(x, available_actions)
            actions = action_logits.mode() if deterministic else action_logits.sample() 
            action_log_probs = action_logits.log_probs(actions)
+        end_time = time.time()
+        time_diff = end_time - start_time  # 计算时间差（单位为秒）
        return actions, action_log_probs
    def get_probs(self, x, available_actions=None):

--- a/algorithms/utils/mlp.py
+++ b/algorithms/utils/mlp.py
@@ -10,7 +10,7 @@ from envs.env_core import EnvCore
 from .util import init, get_clones
 from transformers import GPT2Config, GPT2Model
 """MLP modules."""
+from transformers import EncodecModel
 Feature = 4
 class MLPLayer(nn.Module):
@@ -105,8 +105,8 @@ class MLPBaseGPT2(nn.Module):
            self.feature_norm = nn.LayerNorm(obs_dim)
        config = GPT2Config(n_embd=128,
-                            n_layer=2,
+                            n_layer=6,
-                            n_head=4)
+                            n_head=8)
        self.mlp = new_mlp_gpt2(obs_dim,
                    128,
                    1,
@@ -369,7 +369,6 @@ def get_attn_pad_mask(seq_q, seq_k):
    return torch.tensor(data3)
 class new_mlp_gpt2(nn.Module):
    def __init__(self, input_dim, embd_dim, output_dim, config):
        super(new_mlp_gpt2, self).__init__()

--- a/envs/AerialVehicle.py
+++ b/envs/AerialVehicle.py
@@ -73,8 +73,7 @@ class AerialVehicle(object):
        x = self.PlaceX + dist * math.cos(direction)
        y = self.PlaceY + dist * math.sin(direction)
-        if self.isBeyond(x, y):
-            return 5
        self.PlaceX = x
        self.PlaceY = y
        return self.InPlace()
@@ -88,22 +87,22 @@ class AerialVehicle(object):
    def InPlace(self):
        punish = 0
        if self.PlaceX > self.MaxPlaceX:
-        #    self.PlaceX = self.MaxPlaceX
+            self.PlaceX = self.MaxPlaceX
            punish+=3
        if self.PlaceX > self.MaxPlaceX - 5:
            punish += 1
        if self.PlaceX < self.MinPlaceX:
-            #   self.PlaceX = self.MinPlaceX
+            self.PlaceX = self.MinPlaceX
            punish += 3
        if self.PlaceX < self.MinPlaceX + 5:
            punish += 1
        if self.PlaceY > self.MaxPlaceY:
-            #    self.PlaceY = self.MaxPlaceY
+            self.PlaceY = self.MaxPlaceY
            punish += 3
        if self.PlaceY > self.MaxPlaceY - 5:
            punish += 1
        if self.PlaceY < self.MinPlaceY:
-            #    self.PlaceY = self.MinPlaceY
+            self.PlaceY = self.MinPlaceY
            punish += 3
        if self.PlaceY < self.MinPlaceY + 5:
            punish += 1
@@ -192,17 +191,24 @@ class AerialVehicle(object):
    # 能否从无人机处获得分片
    def tryGetContent(self, x, y, time, UserIndex, transSpeedBaseLine, requestIndex):
        if self.CachedContentList[requestIndex] != 1:
-            return -1
+            return False, -1
        if UserIndex not in self.ServiceList:
-            return -1
+            return False, -1
        speed = self.getTransSpeed(x, y)
        if speed < transSpeedBaseLine:
-            return False
+            return False, (self.getSizeOFSlice() / speed)
        if (self.getSizeOFSlice() / speed) > time:
-            return False
+            return False, (self.getSizeOFSlice() / speed)
-        return True
+        return True, (self.getSizeOFSlice() / speed)
+    def tryGetContent2(self, x, y):
+        speed = self.getTransSpeed(x, y)
+        print(speed)
+        print((self.getSizeOFSlice() / speed))
+        return (self.getSizeOFSlice() / speed)
    def clearServiceList(self):
        self.ServiceNum = 0
@@ -210,6 +216,27 @@ class AerialVehicle(object):
+    def cal_bs_communication_delay(self, num):
+        bs_transmit_power = 2
+        noise_equivalent_power = np.power(0.1, 13)
+        bs_bandwidth = 60 * np.power(10, 6)
+        average_path_loss = self.getAvgLOS(-2000, -2000)
+        signal_noise_ratio = bs_transmit_power / (
+                noise_equivalent_power * np.power(10, average_path_loss / 10))
+        transmission_speed = (bs_bandwidth / num) * math.log2(1 + signal_noise_ratio) / (1024 * 1024 * 8)
+        return self.getSizeOFSlice() / transmission_speed
+        # transmission_speed = (self.bs_bandwidth / self.uav_num) * math.log2(
+        #     1 + self.unit_distance_channel_gain / distance * self.bs_transmit_power / self.uav_num / self.noise_equivalent_power)
+        # return self.file_size / transmission_speed
 ####################################leader
@@ -221,7 +248,12 @@ class AerialVehicle(object):
+a = AerialVehicle(20, 50, 256, 200, 11.9, 0.13, 2e9, 40e6, 2, 3e8, 6, 20, 0.02, 1e-13, 500, 500,0,0)
+a.ServiceNum = 6
+a.PlaceX = 100
+a.PlaceY = 100
+a.K = 1
+a.tryGetContent2(0,0)
--- a/envs/UserForUAV.py
+++ b/envs/UserForUAV.py
 import random
 from numpy import random as r
+userperf = [0.7, 0.2, 0.03, 0.03, 0.04]
+typeNum = 5
+type1 = [0.7, 0.2, 0.03, 0.03, 0.04]
+type2 = [0.2, 0.7, 0.03, 0.03, 0.04]
+type3 = [0.03, 0.2, 0.7, 0.03, 0.04]
+type4 = [0.03, 0.2, 0.03, 0.7, 0.04]
+type5 = [0.04, 0.2, 0.03, 0.03, 0.7]
 class UserForUAV(object):
    def __init__(self, UserId, TotalContentNum, FalvorNum, K):
@@ -14,7 +22,9 @@ class UserForUAV(object):
        self.K = K
        self.RequestIndex = 0
-        self.UAVList = []
+        self.Service = []
+        self.eachNum = TotalContentNum / typeNum
        if FalvorNum > TotalContentNum:
            self.FalvorNum = TotalContentNum
@@ -31,6 +41,17 @@ class UserForUAV(object):
            self.RequestIndex = self.FalvorList[x]
        return self.RequestIndex
+        # 按照zipf分布，模拟用户请求
+    def genRequestIndex(self):
+        x = r.zipf(a=2, size=1)[0]
+        x = x - 1
+        if x >= len(self.FalvorList):
+            self.RequestIndex = self.FalvorList[len(self.FalvorList) - 1]
+        else:
+            self.RequestIndex = self.FalvorList[x]
+        return self.RequestIndex
    def getRequestIndex(self):
        return self.RequestIndex
@@ -47,15 +68,58 @@ class UserForUAV(object):
        for i in range(0, len(self.UAVList)):
            self.UAVList[i].tryGetService(self.PlaceX, self.PlaceY, self.RequestIndex, self.UserId)
-    def tryGetCache(self, transSpeedBaseLine, time):
+    def addUAV(self, index):
+        self.Service.append(index)
+    def clearService(self):
+        self.Service.clear()
+    def chooseService(self):
+        dict = {}
+        for i in range(0, len(self.Service)):
+            dist = self.UAVList[self.Service[i]].getDist(self.PlaceX, self.PlaceY)
+            dict[self.Service[i]] = dist
+        a = sorted(dict.items(),key = lambda x:x[1],reverse = False)
+        realService = []
+        for key in a[:self.K]:
+            realService.append(key[0])
+        self.Service = realService
+    def getService(self):
+        return self.Service
+    def tryGetCache(self, transSpeedBaseLine, limitedTime):
        vehiclesAbleToTrans = 0
        self.genRequestIndex()
+        get = 0
+        time = 0
+        miss = 0
+        time = []
        for i in range(0, len(self.UAVList)):
-            if self.UAVList[i].tryGetContent(self.PlaceX, self.PlaceY, time, self.UserId, transSpeedBaseLine,  self.RequestIndex):
+            canGet, tmp = self.UAVList[i].tryGetContent(self.PlaceX, self.PlaceY, limitedTime, self.UserId, transSpeedBaseLine,  self.RequestIndex)
+            if canGet:
                vehiclesAbleToTrans = vehiclesAbleToTrans + 1
-        return vehiclesAbleToTrans, self.RequestIndex
+            if tmp != -1:
+             #   print("uav")
+             #   print(tmp)
+                time.append(tmp)
+                get = get + 1
+        if self.K > get:
+            miss = self.K - get
+        if get == 0:
+            return vehiclesAbleToTrans, 0, miss, self.RequestIndex
+        time.sort()
+        if get >= self.K:
+            return vehiclesAbleToTrans, time[self.K - 1], miss, self.RequestIndex
+        return vehiclesAbleToTrans, 0, miss, self.RequestIndex
+ss = []
+for i in range(0,100):
+    ss.append(r.zipf(1.5,10)[0])
+print(r.zipf(1.6,100))
--- a/envs/actor2-4-54.pt
+++ b/envs/actor2-4-54.pt
--- a/envs/actor2-4.pt
+++ b/envs/actor2-4.pt
--- a/envs/env_core.py
+++ b/envs/env_core.py
@@ -9,12 +9,12 @@ from numpy import random
 import torch
 from envs.UserForUAV import UserForUAV
-CustomerNum = 30
+CustomerNum = 40
-AerialVehiclesNum = 7
+AerialVehiclesNum = 5
 TotalContentNum = 30
 MaxPlaceX = 500
 MaxPlaceY = 500
-Move = MaxPlaceX / 100
+Move = 15
 ContentSize = 256
 CacheLimit = 3600
 Hight = 200
@@ -31,7 +31,7 @@ Noise = 1e-13
 DownSize = 0
 FalvorNum = 2
-K = 4
+K = 3
 transSpeedBaseLine = 0.5
 timeLImit = 20
@@ -94,6 +94,7 @@ class EnvCore(object):
        self.bags = 0
        self.slices = 0
+        self.time = 0
        self.allBags = []
        self.allSlices = []
@@ -124,6 +125,9 @@ class EnvCore(object):
        print(self.dimension4)
        print(self.dimension1*5 +  self.dimension2 +  self.dimension3 +  self.dimension4)
+        self.trace = []
+        self.clearTrace()
@@ -163,6 +167,7 @@ class EnvCore(object):
        punish = []
        for i in range(0, len(self.AerialVehicles)):
            act = actions[i]
            dist = act[0] * self.Move
@@ -176,25 +181,62 @@ class EnvCore(object):
        totalReward = 0
        num = 0
        cacheMit = 0
+        totalMiss = 0
+        totalTime = 0
+        missList = []
+        times =  []
        for i in range(0, len(self.Customers)):
-            reward, requestIndex = self.Customers[i].tryGetCache(transSpeedBaseLine, timeLImit)
+            reward, time, miss,  requestIndex= self.Customers[i].tryGetCache(transSpeedBaseLine, timeLImit)
-            reward = reward
+            times.append(time)
            totalReward += reward
+            missList.append(miss)
+            totalMiss = totalMiss + miss
+            if reward >= self.k:
+                num += 1
            if requestIndex in self.CacheContent:
                cacheMit += 1
                self.CacheDict[requestIndex] = self.CacheDict[requestIndex] + 1
-            if reward >= self.k:
-                num += 1
+        for i in range(0, len(self.Customers)):
+            avg = []
+            for j in range(0, missList[i]):
+               # print("bs")
+               # print(self.AerialVehicles[i].cal_bs_communication_delay(totalMiss))
+                avg.append(self.AerialVehicles[j].cal_bs_communication_delay(totalMiss))
+            if missList[i] != 0:
+                avg.sort()
+                times[i] = avg[len(avg) - 1]
+        for i in range(0, len(self.Customers)):
+            totalTime += times[i]
+        totalTime = totalTime /200
        totalReward = totalReward * 1 / self.k
-        self.nowStep += 1
+        totalReward = totalReward + num * 50 + totalTime
+        self.appendTrace()
+        totalReward = totalReward
+        self.nowStep += 1
+        self.time += totalTime
        self.bags += num
        self.slices += totalReward
   #     print(self.k)
        if self.nowStep % 200 == 0:
-           print(self.bags)
+           print('bags: '+str(self.bags))
-           print(self.slices)
+           print('totalReward: '+str(self.slices))
-           print(self.CacheContent)
+           print('time: '+str(self.time))
+          # print(times)
+          # print(missList)
+           if self.epi > 600:
+               self.plotTrace()
           self.allBags.append(self.bags)
           self.allSlices.append(self.slices)
@@ -211,20 +253,25 @@ class EnvCore(object):
               print(self.allBags)
               print(self.allSlices)
+               print(times)
+               print(missList)
           if self.epi > 500 :
               self.DownSize = 0
           if self.epi > 550 :
               self.DownSize = 0
+           self.clearTrace()
           self.bags = 0
           self.slices = 0
+           self.time = 0
-        totalReward = totalReward + num*50
        for i in range(0, len(self.AerialVehicles)):
            sub_agent_reward.append([totalReward - punish[i]*10])
@@ -248,7 +295,8 @@ class EnvCore(object):
    def addService(self, act):
        self.AllocReward = 0
-        reward = [0 for _ in range(TotalContentNum)]
+        reward = [0 for _ in range(len(self.Customers))]
        for i in range(0, len(self.Customers)):
            for j in range(0, len(self.AerialVehicles)):
@@ -262,6 +310,35 @@ class EnvCore(object):
            if reward[i] >= self.k:
                self.AllocReward = self.AllocReward + 1
+    def addService22(self, act):
+        self.AllocReward = 0
+        reward = [0 for _ in range(TotalContentNum)]
+        act = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0]
+        for i in range(0, len(self.Customers)):
+            for j in range(0, len(self.AerialVehicles)):
+                num = i * len(self.AerialVehicles) + j
+                if act[ num] == 1:
+                    #  print('yres')
+                    self.AerialVehicles[j].addService(i)
+                    reward[i] = reward[i] + 1
+        for i in range(0, len(reward)):
+            if reward[i] >= self.k:
+                self.AllocReward = self.AllocReward + 1
+    def clearTrace(self):
+        self.trace.clear()
+        for i in range(0, len(self.AerialVehicles)):
+            self.trace.append([])
+    def appendTrace(self):
+        for i in range(0, len(self.AerialVehicles)):
+            tmp = []
+            tmp.append(self.AerialVehicles[i].PlaceX)
+            tmp.append(self.AerialVehicles[i].PlaceY)
+            self.trace[i].append(tmp)
    def RecommendReward(self):
        if self.Recommend < 10:
            return 10
@@ -441,13 +518,15 @@ class EnvCore(object):
    def resetServiceList(self):
        for i in range(0, len(self.AerialVehicles)):
            self.AerialVehicles[i].clearServiceList()
+        for i in range(0, len(self.Customers)):
+            self.Customers[i].clearService()
    def resetCustomerPosition(self):
        xDelta = self.MaxPlaceX - self.MinPlaceX
        yDelta = self.MaxPlaceY - self.MinPlaceY
-        xx = [342.67990918, 476.6966731, 1.97413316, 256.09613169, 406.31048083,
+        xx = [342.67990918, 476.6966731, 201.97413316, 256.09613169, 406.31048083,
              306.26303341, 360.87765872, 145.93803409, 458.88706126, 357.2878917,
-              271.27218401, 71.0850238, 186.67038003, 337.06680753, 220.91658721,
+              271.27218401, 171.0850238, 186.67038003, 337.06680753, 220.91658721,
              217.00699667, 308.88348923, 256.56912128, 325.19859097, 300.5194767]
        yy = [402.61159842, 260.8235762, 454.3244404, 159.61804449, 45.22967464,
              150.35002832, 56.99218093, 414.34066315, 23.44815969, 313.14357416,
@@ -461,6 +540,32 @@ class EnvCore(object):
        for i in range(0, len(self.Customers)):
            self.Customers[i].moveTo(xx[i], yy[i])
+    def plotTrace(self):
+        color = ['red','green','blue','purple','yellow']
+        lable = ['UAV1', 'UAV2', 'UAV3', 'UAV4', 'UAV5']
+        for i in range(len(self.Customers)):
+            plt.plot(self.Customers[i].PlaceX,self.Customers[i].PlaceY,'o', color='black')
+            plt.text(self.Customers[i].PlaceX,self.Customers[i].PlaceY,str(i),fontsize=10, ha='right', va='bottom')
+        for i in range(len(self.trace)):
+            for j in range(len(self.trace[i])):
+                plt.plot(self.trace[i][j][0], self.trace[i][j][1], ',', color=color[i])
+        # 连接各个点
+            for j in range(len(self.trace[i])-1):
+                start = (self.trace[i][j][0], self.trace[i][j+1][0])
+                end = (self.trace[i][j][1], self.trace[i][j+1][1])
+                plt.plot(start, end, color=color[i])
+        for i in range(len(self.trace)):
+            plt.plot(self.trace[i][0][0], self.trace[i][0][1], '*', color='orange')
+            plt.plot(self.trace[i][len(self.trace[i])-1][0], self.trace[i][len(self.trace[i])-1][1], '*', color='pink')
+        plt.legend()
+        plt.show()
    def newUAV(self):
        return AerialVehicle(CustomerNum, TotalContentNum, ContentSize, Hight,
                 EnvA, EnvB, Frequency, Bandwidth, TransmitPower, SpeedOfLight,

--- a/envs/plot.py
+++ b/envs/plot.py
--- a/envs/test2.py
+++ b/envs/test2.py
+import torch
+from transformers import GPT2Config, GPT2Model
+from fvcore.nn import FlopCountAnalysis
+n_embd = 256
+# 创建GPT2配置
+config = GPT2Config(
+    n_embd=n_embd,  # 嵌入维度
+    n_layer=4,  # Transformer层数
+    n_head=4,  # 注意力头数
+    n_positions=256,  # 最大序列长度支持
+    vocab_size=0  # 禁用词汇表（使用inputs_embeds）
+)
+# 创建GPT2模型
+model = GPT2Model(config)
+# 创建输入数据 (batch_size=1, sequence_length=256, hidden_size=128)
+input_data = torch.randn(1, 256, n_embd)  # 序列长度256，嵌入维度128
+# 手动计算参数量
+def count_parameters(model):
+    """计算模型参数量"""
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+params = count_parameters(model)
+print(f"参数量: {params:,} ({params / 1e6:.2f} M)")
+# 使用 fvcore 计算 FLOPs
+# 创建模型包装器来处理字典输入
+class GPT2Wrapper(torch.nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.model = model
+    def forward(self, inputs_embeds):
+        return self.model(inputs_embeds=inputs_embeds).last_hidden_state
+wrapped_model = GPT2Wrapper(model)
+# 使用 fvcore 计算 FLOPs
+flop_analyzer = FlopCountAnalysis(wrapped_model, input_data)
+flops = flop_analyzer.total()
+print(f"\n总 FLOPs: {flops:,}")
+print(f"FLOPs (百万): {flops / 1e6:.2f} M")
+print(f"FLOPs (十亿): {flops / 1e9:.2f} G")
+# 理论计算验证
+def calculate_gpt2_flops(batch_size, seq_len, n_embd, n_layer, n_head):
+    """理论计算GPT-2模型的FLOPs"""
+    d_head = n_embd // n_head  # 每个注意力头的维度
+    # 计算每层的FLOPs
+    flops_per_layer = 0
+    # 注意力部分
+    # 1. Q、K、V投影层
+    flops_per_layer += 3 * batch_size * seq_len * n_embd * n_embd * 2  # 乘加操作
+    # 2. 注意力分数计算 (Q·K^T)
+    flops_per_layer += batch_size * n_head * seq_len * d_head * seq_len * 2
+    # 3. 注意力输出计算 (softmax·V)
+    flops_per_layer += batch_size * n_head * seq_len * seq_len * d_head * 2
+    # 4. 输出投影层
+    flops_per_layer += batch_size * seq_len * n_embd * n_embd * 2
+    # 前馈网络部分
+    ffn_dim = 4 * n_embd  # 通常扩展4倍
+    # 5. FFN第一层
+    flops_per_layer += batch_size * seq_len * n_embd * ffn_dim * 2
+    # 6. FFN第二层
+    flops_per_layer += batch_size * seq_len * ffn_dim * n_embd * 2
+    # 总层FLOPs
+    total_layer_flops = n_layer * flops_per_layer
+    return total_layer_flops
+# 计算理论FLOPs
+batch_size = 1
+seq_len = 256
+n_embd = 128
+n_layer = 4
+n_head = 8
+theoretical_flops = calculate_gpt2_flops(batch_size, seq_len, n_embd, n_layer, n_head)
+print(f"\n理论计算总FLOPs: {theoretical_flops:,}")
+print(f"FLOPs (十亿): {theoretical_flops / 1e9:.2f} G")
+print(f"与实际值差异: {(flops - theoretical_flops) / flops * 100:.2f}%")
\ No newline at end of file
--- a/envs/test3.py
+++ b/envs/test3.py
+import torch
+import torch.nn as nn
+import time
+from thop import profile  # 需要安装：pip install thop
+class Actor(nn.Module):
+    """示例Actor网络（全连接结构）"""
+    def __init__(self, state_dim=24, action_dim=4):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(state_dim, 256),
+            nn.ReLU(),
+            nn.Linear(256, 256),
+            nn.ReLU(),
+            nn.Linear(256, action_dim),
+            nn.Tanh()
+        )
+    def forward(self, state):
+        return self.net(state)
+# 创建模型和测试数据
+device = torch.device( "cpu")
+model = Actor().to(device)
+dummy_input = torch.randn(1, 24).to(device)  # 批大小1, 状态维度24
+# ===== 计算FLOPs和参数量 =====
+flops, params = profile(model, inputs=(dummy_input,))
+print(f"参数量(Parameters): {params / 1e6:.2f} M")
+print(f"FLOPs: {flops / 1e6:.5f} M")
+# ===== 测量推理延迟 =====
+warmup_steps = 100  # 预热次数
+test_steps = 1000  # 测试次数
+# GPU预热（让CUDA完成初始化）
+for _ in range(warmup_steps):
+    _ = model(dummy_input)
+# 实际测量
+start_time = time.time()
+for _ in range(test_steps):
+    with torch.no_grad():  # 禁用梯度计算
+        _ = model(dummy_input)
+    # 若使用GPU，需同步
+    if device.type == "cuda":
+        torch.cuda.synchronize()
+end_time = time.time()
+avg_latency = (end_time - start_time) * 1000 / test_steps  # 毫秒/次
+print(f"平均推理延迟: {avg_latency:.4f} ms")
+# ===== 输出详细每层统计 =====
+print("\n各层详细信息：")
+model_summary = profile(model, inputs=(dummy_input,), verbose=True)
\ No newline at end of file
--- a/envs/test4.py
+++ b/envs/test4.py
+import sys
+import time
+import numpy as np
+import torch
+import torch.nn as nn
+from transformers import GPT2Config, GPT2Model
+import psutil  # 导入用于监控 CPU 使用率的库
+from ptflops import get_model_complexity_info
+# 设置随机种子保证可复现性
+torch.manual_seed(42)
+np.random.seed(42)
+# 自定义的MLP模型
+class Actor(nn.Module):
+    def __init__(self, obs_dim, embd_dim, output_dim, config):
+        super(Actor, self).__init__()
+        self.gpt = GPT2Model(config)
+        self.fc1 = nn.Linear(obs_dim, embd_dim)
+        self.fc2 = nn.Linear(embd_dim, output_dim)
+        self.activation = nn.ReLU()
+    def forward(self, x):
+        x = self.activation(self.fc1(x))
+        gpt_out = self.gpt(inputs_embeds=x.unsqueeze(1)).last_hidden_state
+        return self.fc2(gpt_out.squeeze(1))
+def generate_neural_net_input(num_drones=5, num_users=20):
+    """
+    生成54维神经网络输入数组，包含：
+    - 5个无人机的x,y坐标（前10位）
+    - 20个用户的x,y坐标（中间40位）
+    - 4个无人机间距离（最后4位）
+    参数:
+    num_drones: 无人机数量（默认为5）
+    num_users: 用户数量（默认为20）
+    返回:
+    54维numpy数组
+    """
+    # 1. 生成无人机位置（随机分布在100x100区域内）
+    drone_positions = np.random.uniform(0, 100, (num_drones, 2))
+    # 2. 生成用户位置（随机分布在相同区域内）
+    user_positions = np.random.uniform(0, 100, (num_users, 2))
+    # 3. 计算无人机间距离（取4个特定组合：无人机0-1, 0-2, 1-3, 2-4）
+    distances = [
+        np.linalg.norm(drone_positions[0] - drone_positions[1]),  # d0-1
+        np.linalg.norm(drone_positions[0] - drone_positions[2]),  # d0-2
+        np.linalg.norm(drone_positions[1] - drone_positions[3]),  # d1-3
+        np.linalg.norm(drone_positions[2] - drone_positions[4])  # d2-4
+    ]
+    # 4. 组合所有数据为54维数组
+    # 前10位：无人机坐标展平 (5 drones × 2 coords = 10)
+    # 中间40位：用户坐标展平 (20 users × 2 coords = 40)
+    # 最后4位：距离值
+    input_array = np.concatenate([
+        drone_positions.flatten(),
+        user_positions.flatten(),
+        distances
+    ])
+    return input_array
+def print_input_description(input_array):
+    """打印输入的详细描述"""
+    # 无人机位置
+    print("无人机位置 (5 drones):")
+    for i in range(0, 10, 2):
+        drone_idx = i // 2
+        x, y = input_array[i], input_array[i + 1]
+        print(f"  无人机{drone_idx}: x={x:.2f}, y={y:.2f}")
+    # 用户位置（部分显示）
+    print("\n用户位置 (前5个):")
+    for i in range(10, 20, 2):  # 只显示前5个用户
+        user_idx = (i - 10) // 2
+        x, y = input_array[i], input_array[i + 1]
+        print(f"  用户{user_idx}: x={x:.2f}, y={y:.2f}")
+    # 无人机间距离
+    print("\n无人机间距离:")
+    dist_labels = ["无人机0-1", "无人机0-2", "无人机1-3", "无人机2-4"]
+    for i, label in enumerate(dist_labels):
+        print(f"  {label}: {input_array[50 + i]:.2f}")
+# 配置参数
+obs_dim = 54 # 2*5 + 20*2 + 4
+batch_size = 32
+embd_dim = 256
+config = GPT2Config(n_embd=embd_dim, n_layer=2, n_head=4)
+torch.device("cpu")
+# 创建模型
+device = torch.device("cpu")
+print(f"使用设备: {device}")
+model = Actor(obs_dim, embd_dim, 2, config).to(device)
+model.load_state_dict(torch.load('actor2-4-54.pt', map_location='cpu'))
+print(f"测试神经网络输入: {device}")
+input = generate_neural_net_input(5,20)
+print_input_description(input)
+ss = []
+ss.append(input)
+input =torch.tensor(input, dtype=torch.float32)
+out = model(torch.tensor(ss, dtype=torch.float32))
+print(out)
+print(f"  无人机移动: x={out[0][0]:.2f}, y={out[0][1]:.2f}")
+# 生成测试数据
+def generate_batch(batch_size):
+    return torch.randn(batch_size, obs_dim).to(device)
+# 预热运行
+print("进行预热运行...")
+with torch.no_grad():
+    for _ in range(10):
+        data = generate_batch(batch_size)
+        _ = model(data)
+# 记录CPU使用率
+cpu_usages = []  # 用于保存每次测试的CPU使用率峰值
+# 测试不同批量大小的前向传播时间
+batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
+print("\n开始性能测试...")
+print(
+    f"{'批量大小':<10} | {'平均时间(ms)':<12} | {'最小时间(ms)':<12} | {'最大时间(ms)':<12} | {'标准差':<10} | {'CPU峰值(%)':<10}")
+print("-" * 90)
+results = {}
+for bs in batch_sizes:
+    execution_times = []
+    n_runs = 100
+    cpu_usage_peak = 0
+    # 开始监控CPU使用率
+    p = psutil.Process()
+    for i in range(n_runs):
+        data = generate_batch(bs)
+        # 开始计时
+        start_time = time.perf_counter_ns()
+        with torch.no_grad():
+            output = model(data)
+        # 结束计时
+        end_time = time.perf_counter_ns()
+        elapsed_ms = (end_time - start_time) / 1_000_000  # 毫秒
+        execution_times.append(elapsed_ms)
+        # 记录CPU使用率峰值
+        current_cpu = p.cpu_percent(interval=None)
+        if current_cpu > cpu_usage_peak:
+            cpu_usage_peak = current_cpu
+    # 保存当前批量大小的CPU峰值使用率
+    cpu_usages.append(cpu_usage_peak)
+    # 计算统计指标
+    avg_time = np.mean(execution_times)
+    min_time = np.min(execution_times)
+    max_time = np.max(execution_times)
+    std_dev = np.std(execution_times)
+    results[bs] = {
+        'avg': avg_time,
+        'min': min_time,
+        'max': max_time,
+        'std': std_dev,
+        'cpu_peak': cpu_usage_peak
+    }
+    print(
+        f"{bs:<10} | {avg_time:.6f} ms   | {min_time:.6f} ms   | {max_time:.6f} ms   | {std_dev:.6f} | {cpu_usage_peak:.1f}%")
+bs32_times = []
+data = generate_batch(32)
+for _ in range(100):
+    start_time = time.perf_counter_ns()
+    with torch.no_grad():
+        output = model(data)
+    end_time = time.perf_counter_ns()
+    bs32_times.append((end_time - start_time) / 1_000_000)
+# 打印最终结果
+print("\n测试摘要:")
+print(f"{'批量大小':<10} | {'平均时间(ms)':<12} | {'CPU峰值(%)':<10}")
+print("-" * 50)
+for bs in batch_sizes:
+    print(f"{bs:<10} | {results[bs]['avg']:.6f} ms   | {results[bs]['cpu_peak']:.1f}%")
+# 输出模型信息
+print("\n模型信息:")
+print(f"总参数数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f} M")
+print("\n测试期间峰值系统资源使用情况:")
+print(f"CPU峰值使用率: {max(cpu_usages):.1f}%")
+print(f"内存峰值使用率: {psutil.virtual_memory().percent}%")
+print(
+    f"当前内存使用: {psutil.virtual_memory().used / 1024 ** 3:.2f} GB / {psutil.virtual_memory().total / 1024 ** 3:.2f} GB")
+# 修正后的FLOPs计算部分
+print("\n使用 ptflops 计算模型FLOPs:")
+# 我们需要创建一个模型实例的包装器类，而不是直接使用函数
+class ModelWrapper(nn.Module):
+    def __init__(self, model):
+        super(ModelWrapper, self).__init__()
+        self.model = model
+    def forward(self, x):
+        return self.model(x)
+# 创建包装器实例
+model_wrapper = ModelWrapper(model)
+# 模型输入形状 (1, obs_dim) - 表示单样本输入
+input_shape = (obs_dim,)
+macs, params = get_model_complexity_info(
+    model_wrapper,
+    input_shape,
+    as_strings=False,
+    print_per_layer_stat=False,
+    verbose=False
+)
+# 根据 MACs 估算 FLOPs (通常 FLOPs ≈ 2 * MACs)
+total_flops = 2 * macs  # 近似值，实际中可能略有不同
+print(total_flops)
+# 输出结果
+print(f"模型参数量: {params / 1e6:.2f} M")
+print(f"MACs (乘法累加操作): {macs / 1e6:.2f} MMACs")
+print(f"FLOPs 估计: {total_flops / 1e6:.2f} MFLOPs (单精度, 单样本)")
+# 计算批量大小为32时的总FLOPs
+batch_size_for_flops = 256
+total_flops_batch = total_flops * batch_size_for_flops  # 理论上批量线性增长
+print(f"\n批量大小 {batch_size_for_flops} 时的总FLOPs估计: {total_flops_batch / 1e9:.2f} GFLOPs")
+# 计算实际性能表现
+avg_time_sec = results[128]['avg'] / 1000  # 转换为秒
+gflops_per_forward = total_flops_batch / 1e9
+gflops_per_sec = gflops_per_forward / avg_time_sec
+print(f"前向传播计算效率: {gflops_per_sec:.2f} GFLOPS")
+# 计算CPU理论峰值性能
+cpu_count = psutil.cpu_count(logical=False)  # 物理核心数
+if cpu_freq := psutil.cpu_freq():
+    cpu_freq_ghz = cpu_freq.current / 1000  # 当前CPU频率GHz
+else:
+    # 备用方法
+    import platform
+    if platform.system() == 'Darwin':  # macOS
+        import subprocess
+        output = subprocess.check_output(["sysctl", "-n", "hw.cpufrequency"]).decode().strip()
+        cpu_freq_ghz = float(output) / 1000000000
+    else:
+        cpu_freq_ghz = 3.0  # 默认假设为3.0GHz
+# 假设每个核心每周期执行8次浮点运算（考虑AVX向量化）
+per_cycle_flops = 8
+theoretical_gflops = cpu_count * cpu_freq_ghz * per_cycle_flops
+print(f"CPU理论峰值: {theoretical_gflops:.1f} GFLOPS (单精度浮点)")
+# 计算实际利用率
+utilization = gflops_per_sec / theoretical_gflops * 100
+print(f"CPU利用率: {utilization:.1f}%")
--- a/runner/separated/base_runner.py
+++ b/runner/separated/base_runner.py
@@ -143,11 +143,12 @@ class Runner(object):
        return train_infos
    def save(self):
+        print("save")
        for agent_id in range(self.num_agents):
            policy_actor = self.trainer[agent_id].policy.actor
            torch.save(
-                policy_actor.state_dict(),
+                policy_actor,
-                str(self.save_dir) + "/actor_agent" + str(agent_id) + ".pt",
+                str(self.save_dir) + "/zym_actor_agent" + str(agent_id) + ".pt",
            )
            policy_critic = self.trainer[agent_id].policy.critic
            torch.save(

--- a/runner/shared/env_runner.py
+++ b/runner/shared/env_runner.py
@@ -24,11 +24,12 @@ class EnvRunner(Runner):
        super(EnvRunner, self).__init__(config)
    def run(self):
+        print("wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww")
        self.warmup()
+        print("time.time()")
        start = time.time()
        episodes = int(self.num_env_steps) // self.episode_length // self.n_rollout_threads
+        self.save()
        for episode in range(episodes):
            if self.use_linear_lr_decay:
                self.trainer.policy.lr_decay(episode, episodes)
@@ -68,9 +69,10 @@ class EnvRunner(Runner):
            # post process
            total_num_steps = (episode + 1) * self.episode_length * self.n_rollout_threads
+            self.save()
            # save model
            if episode % self.save_interval == 0 or episode == episodes - 1:
+                print("saave")
                self.save()
            # log information

--- a/train/app.py
+++ b/train/app.py
+from flask import Flask, request, jsonify
+import numpy as np
+from train.newnetwork import step
+app = Flask(__name__)
+def getinfo(data_array):
+    """
+    模拟的getinfo方法 - 实际应用中替换为您的真实算法
+    这里返回固定示例数据，实际使用时替换为您的计算逻辑
+    """
+    # 示例返回数据 - 实际应替换为您的算法计算结果
+    array = step(5, 3, data_array)
+    print(array)
+    uav1 = []
+    uav1.append(data_array[0] + array[0])
+    uav1.append(data_array[1] + array[1])
+    uav2 = []
+    uav2.append(data_array[3] + array[2])
+    uav2.append(data_array[4] + array[3])
+    uav3 = []
+    uav3.append(data_array[6] + array[4])
+    uav3.append(data_array[7] + array[5])
+    res= []
+    res.append(uav1)
+    res.append(uav2)
+    res.append(uav3)
+    return {
+        "user_target_com_rate": [],
+        "device_target_pos": res,
+        "user_target_pos_auc": [],
+        "flight_types": [60, 60, 60],
+        "target_deployment": []
+    }
+@app.route('/process', methods=['POST'])
+def process_data():
+    try:
+        # 解析JSON输入
+        input_data = request.json
+        # 验证数据结构
+        if 'data' not in input_data:
+            return jsonify({"error": "Missing 'data' field"}), 400
+        data = input_data['data']
+        # 提取并处理deviceState字段
+        device_state = data.get('deviceState', [])
+        extracted_device = []
+        for item in device_state:
+            # 取每个元素的前三位，过滤掉无效值
+            valid_values = [x for x in item[:3] if x is not None and isinstance(x, (int, float))]
+            extracted_device.extend(valid_values)
+        # 提取并处理userPos字段
+        user_pos = data.get('userPos', [])
+        extracted_user = []
+        for pos in user_pos:
+            # 过滤无效坐标
+            if len(pos) >= 2 and all(isinstance(x, (int, float)) for x in pos[:2]):
+                extracted_user.extend(pos[:2])
+        # 合并所有数据形成大数组
+        combined_array = extracted_device + extracted_user
+        print(combined_array)
+        # 调用getinfo方法（这里模拟实现）
+        result = getinfo(combined_array)
+        # 构建响应
+        response = {
+            "user_target_com_rate": result.get("user_target_com_rate", []),
+            "device_target_pos": result.get("device_target_pos", []),
+            "user_target_pos_auc": result.get("user_target_pos_auc", []),
+            "flight_types": result.get("flight_types", []),
+            "target_deployment": result.get("target_deployment", [])
+        }
+        return jsonify(response)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)
\ No newline at end of file
--- a/train/network.py
+++ b/train/network.py
+import torch
+import torch.nn as nn
+import math
+class HybridCNN(nn.Module):
+    def __init__(self, n, user):
+        super(HybridCNN, self).__init__()
+        self.n = n
+        self.user = user
+        # 输入处理层 (batch, 3n + user) -> (batch, 1, 3n + user)
+        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
+        # 特征提取层
+        self.fc = nn.Linear(64 * (3 * n + user*2), 64)
+        # 输出头定义
+        self.cont_head = nn.Linear(64, 2 * n)  # 连续输出
+        self.binary_head = nn.Linear(64, user * n)  # 二值输出
+        # 预生成缩放模板
+        self.register_buffer('scale_template',
+                             torch.Tensor([(20 if i % 2 else 2 * math.pi) for i in range(2 * n)]))
+    def forward(self, x):
+        # 输入验证
+        expected_dim = 3 * self.n + self.user*2
+        if x.size(-1) != expected_dim:
+            raise ValueError(f"输入维度应为 {expected_dim}，实际得到 {x.size(-1)}")
+        # 输入处理 (batch, D) -> (batch, 1, D)
+        x = x.unsqueeze(1)
+        # 特征提取
+        x = torch.relu(self.conv1(x))  # (batch, 64, D)
+        x = x.view(x.size(0), -1)  # (batch, 64*D)
+        x = torch.relu(self.fc(x))  # (batch, 64)
+        # 连续输出处理
+        cont_output = torch.sigmoid(self.cont_head(x)) * self.scale_template
+        # 二值输出处理（训练时使用STE技巧）
+        bin_logits = self.binary_head(x)
+        bin_output = (bin_logits > 0).float()
+        if self.training:
+            bin_output = bin_output + bin_logits - bin_logits.detach()
+        return torch.cat([cont_output, bin_output], dim=1)
+def step(n, user, array):
+    # 初始化参数
+    # 初始化网络
+    net = HybridCNN(n=n, user=user)
+  #  print(f"网络结构(n={n}, user={user}):\n{net}")
+    test_input = [array]
+    test_input = torch.tensor(test_input)
+  #  print(f"\n测试输入形状: {test_input.shape}")
+    # 前向传播
+    output = net(test_input)
+    return output
+  #  print(test_input)
+ #   print(output)
+    # 验证输出
+  #  print(f"\n输出形状: {output.shape} (2n + user*n = {2 * n} + {user * n} = {2 * n + user * n})")
+# 验证函数
+def validate_output(output, n, user):
+    # 连续部分检查
+    cont = output[:, :2 * n]
+    even_check = torch.all((cont[:, ::2] >= 0) & (cont[:, ::2] <= 2 * math.pi))
+    odd_check = torch.all((cont[:, 1::2] >= 0) & (cont[:, 1::2] <= 20))
+    # 二值部分检查
+    bin_part = output[:, 2 * n:]
+    binary_check = torch.all(torch.isin(bin_part, torch.tensor([0.0, 1.0])))
+    return even_check and odd_check and binary_check
--- a/train/newnetwork.py
+++ b/train/newnetwork.py
+import torch
+import torch.nn as nn
+import math
+class HybridCNN(nn.Module):
+    def __init__(self, n, user):
+        super(HybridCNN, self).__init__()
+        self.n = n
+        self.user = user
+        input_dim = 3 * n + 2 * user  # 输入维度
+        output_dim = 2 * n  # 输出维度
+        # 输入处理层 (batch, input_dim) -> (batch, 1, input_dim)
+        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
+        # 特征提取层
+        self.fc1 = nn.Linear(64 * input_dim, 128)
+        self.fc2 = nn.Linear(128, 64)
+        # 输出层 - 3*n维输出
+        self.output_head = nn.Linear(64, output_dim)
+        # 缩放参数，确保输出在[-10, 10]范围内
+        self.scale_factor = 10.0
+    def forward(self, x):
+        # 输入验证
+        expected_dim = 3 * self.n + 2 * self.user
+        if x.size(-1) != expected_dim:
+            raise ValueError(f"输入维度应为 {expected_dim}，实际得到 {x.size(-1)}")
+        # 输入处理 (batch, D) -> (batch, 1, D)
+        x = x.unsqueeze(1)
+        # 卷积层
+        x = torch.relu(self.conv1(x))  # (batch, 64, D)
+        # 展平
+        x = x.view(x.size(0), -1)  # (batch, 64*D)
+        # 全连接层
+        x = torch.relu(self.fc1(x))
+        x = torch.relu(self.fc2(x))
+        # 输出层 - 使用tanh将输出限制在[-1,1]范围内
+        output = torch.tanh(self.output_head(x))
+        # 缩放输出到[-10,10]范围
+        output = output * self.scale_factor
+        return output
+def step(n, user, array):
+    # 检查输入数组长度
+    expected_length = 3 * n + 2 * user
+    if len(array) != expected_length:
+        raise ValueError(f"输入数组长度应为 {expected_length}，实际得到 {len(array)}")
+    # 初始化网络
+    net = HybridCNN(n=n, user=user)
+    # 转换为张量
+    test_input = torch.tensor([array], dtype=torch.float32)
+    # 前向传播
+    output = net(test_input)
+    # 转换为列表并返回
+    return output.detach().numpy().flatten().tolist()
\ No newline at end of file
--- a/train/sender.py
+++ b/train/sender.py
+import socket
+import json
+import random
+import time
+def send_array(host='127.0.0.1', port=12345):
+    data = [-1.3105, 0.5705, -0.2628, 2.6665, 0.8016, -0.8752, -0.2601, 0.5445, 2.0032, 0.4143, -0.6441,-2.2407, -0.5950, 0.4144, 0.0528, 0.1691, -0.9757, 1.5469, 0.3840, 0.2890, 0.2386]
+    print("待发送数组:", data)
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.connect((host, port))
+            # 将数组序列化为JSON字符串并编码
+            data = json.dumps(data).encode('utf-8')
+            s.sendall(data)
+            print(f"已成功发送至 {host}:{port}")
+            # 设置接收超时（5秒）
+            s.settimeout(10)
+            s.shutdown(socket.SHUT_WR)
+            # 接收响应
+            response = b""
+            while True:
+                data = s.recv(1024)
+                if not data:
+                    break
+                response += data
+            print(f"收到响应: {response.decode('utf-8')}")
+    except Exception as e:
+            print(f"连接错误: {str(e)}")
+if __name__ == '__main__':
+    send_array()  # 使用默认地址和端口
\ No newline at end of file
--- a/train/tcp.py
+++ b/train/tcp.py
+import socket
+import random
+import json
+from threading import Thread
+from network import step
+def handle_client(conn, addr):
+    """处理客户端连接"""
+    try:
+        # 接收客户端数据（可忽略内容）
+        received_data = bytearray()
+        while True:
+            chunk = conn.recv(1024)
+            if not chunk:
+                break
+            received_data.extend(chunk)
+        info = json.loads(received_data.decode('utf-8'))
+        array = step(5,3,info)
+        array = array.tolist()
+        array = array[0]
+        # 序列化并发送
+        response = json.dumps(array).encode('utf-8')
+        conn.sendall(response)
+        print(f"向 {addr} 发送: {response}")
+    except Exception as e:
+        print(f"客户端 {addr} 错误: {str(e)}")
+    finally:
+        conn.close()
+def tensor_to_array(tensor):
+    """安全转换张量为NumPy数组"""
+    if tensor.requires_grad:
+        tensor = tensor.detach()
+    if tensor.device.type != 'cpu':
+        tensor = tensor.cpu()
+    return tensor.numpy()
+def start_server(host='0.0.0.0', port=12345):
+    """启动TCP服务器"""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind((host, port))
+        s.listen()
+        print(f"服务端启动，监听 {host}:{port}")
+        # 持续接受新连接
+        while True:
+            conn, addr = s.accept()
+            print(f"收到来自 {addr} 的连接")
+            Thread(target=handle_client, args=(conn, addr)).start()
+if __name__ == '__main__':
+    start_server()
\ No newline at end of file
--- a/train/train.py
+++ b/train/train.py
@@ -69,13 +69,14 @@ def make_eval_env(all_args):
 def parse_args(args, parser):
    parser.add_argument("--scenario_name", type=str, default="MPE", help="Which scenario to run on")
    parser.add_argument("--num_landmarks", type=int, default=3)
-    parser.add_argument("--num_agents", type=int, default=9, help="number of players")
+    parser.add_argument("--num_agents", type=int, default=7, help="number of players")
    all_args = parser.parse_known_args(args)[0]
    return all_args
 def main(args):
    parser = get_config()
    all_args = parse_args(args, parser)
@@ -152,7 +153,7 @@ def main(args):
    envs = make_train_env(all_args)
    eval_envs = make_eval_env(all_args) if all_args.use_eval else None
    num_agents = all_args.num_agents
+    print(" config = ")
    config = {
        "all_args": all_args,
        "envs": envs,
@@ -169,6 +170,8 @@ def main(args):
        from runner.separated.env_runner import EnvRunner as Runner
    runner = Runner(config)
+    print("runner.run()")
+    runner.save()
    runner.run()
    # post process