#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import os


folder_path = "/home/lxm/1vs1/total"  # 替换为你的文件夹路径
# # 获取文件夹中的所有文件
file_list = os.listdir(folder_path)
data_all=pd.DataFrame()

for file_name in tqdm(file_list, desc="Processing files"):
    if file_name.endswith(".csv"):
        
        file_path = os.path.join(folder_path, file_name)
#         print(file_path)
        # 解析CSV文件
        df = pd.read_csv(file_path)  # 读取csv文件到dataframe中
        data =df.iloc[:,1:] 
        data_all = pd.concat([data_all, data], axis=0, ignore_index=True)  # 将文件内容合并到data_all中


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
dataset=data_all.values
scaler.fit(dataset) ## 生成规则
dataset = scaler.transform(dataset) ## 将规则应用于训练集



import numpy as np

def trajectory_split(data,window,offset):
    child_trajectory= []
    tr1=[]
    for i in range(0,len(data)-window-1,offset):
        if (i + window)<len(data) :
            id1=dataset[i,-1]
            id2=dataset[i+window-1,-1]
            if id1==id2 and id1==0:
                tr1.append(dataset[i:(i+window),:])
            if id1==id2:
                a = dataset[i:(i+window),:-1]
#                 a = dataset[i:(i+window),1:4]
                child_trajectory.append(a)
    return np.array(child_trajectory),np.array(tr1)

trajectorys,tr1=trajectory_split(dataset,300,300)
print(trajectorys.shape)


# In[8]:


def cal_fd(l1,l2):
    dis=l1-l2
    dis_abs=np.abs(dis)
    mean = dis_abs.mean(axis=0)  # 计算完之后array的长度等于列数
    return sum(mean)/len(mean)

# In[10]:


def cal_fd_full(trajectorys):
    fd=np.zeros((len(trajectorys), len(trajectorys)))
    for i in range(len(trajectorys)):
        for j in range(i+1,len(trajectorys)):
            x=trajectorys[i]
            y=trajectorys[j]
            fd[i][j]=cal_fd(x,y)
    fd_full=np.zeros((len(trajectorys), len(trajectorys)))
    for i in range(len(trajectorys)):
        for j in range(len(trajectorys)):
            if i==j:
                fd_full[i][j]=0
            elif i<j:
                fd_full[i][j]=fd[i][j]
            else:
                fd_full[i][j]=fd[j][i]
    return fd_full

def get_center(cluster):
    if len(cluster)<=2:
        return cluster[0]
    fd=cal_fd_full(cluster)
    dis=np.sum(fd,axis=0)
    min_index = np.argmin(dis)
    return cluster[min_index]

def get_all_center(clusters):
    centers=[]
    for i in range(len(clusters)):
        center=get_center(clusters[i])
        centers.append(center)
    return centers
#n1,n2为轨迹簇中轨迹的数目
def cal_ward(n1,n2,center1,center2):
    fd=cal_fd(center1,center2)
    ward=n1*n2*fd/(n1+n2)
    return ward
def cal_all_ward(clusters,centers):
    ward_arr=np.zeros((len(clusters), len(clusters)))
    for i in range(len(clusters)):
        for j in range(i+1,len(clusters)):
            ward_arr[i][j]=cal_ward(len(clusters[i]),len(clusters[j]),centers[i],centers[j])
    ward_arr_full=np.zeros((len(clusters), len(clusters)))
    for i in range(len(ward_arr)):
        for j in range(len(ward_arr)):
            if i>=j:
                ward_arr_full[i][j]=float('inf')
            else:
                ward_arr_full[i][j]=ward_arr[i][j]
    return ward_arr_full



# In[ ]:


def clustering(trajectorys,k):
    clusters = [[point] for point in trajectorys]
    count=len(clusters)
    print(count)
    index=0
    dis_arr=[]
    d_arr=[]
    while len(clusters)>k:
        #计算每个簇的中心轨迹
        if count==len(clusters):
            centers=[point for point in trajectorys]
        else:
            centers=get_all_center(clusters)
        #计算每个轨迹簇的ward距离
        ward_all=cal_all_ward(clusters,centers)
        #合并ward距离最小的两个轨迹簇，使轨迹簇的数目减一
        min_index = np.unravel_index(np.argmin(ward_all), ward_all.shape)
        min_dis=ward_all[min_index[0]][[min_index[1]]]
#         print(min_dis)
        if index==0:
            dis=min_dis[0]
            index+=1
        d=min_dis[0]-dis
        print(d)
        dis_arr.append(min_dis[0])
        d_arr.append(d)
        dis=min_dis[0]
        clusters[min_index[0]].extend(clusters[min_index[1]])
        clusters.pop(min_index[1])
    return clusters,dis_arr,d_arr


clusters,dis_arr,d_arr=clustering(trajectorys,6599)
for cluster in clusters:
    print(len(clusters))


# In[ ]:




