#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
from tqdm import tqdm
import os
import numpy as np
#先分训练集和测试集
folder_path = "/home/lxm/1vs1/total"  # 替换为你的文件夹路径
# # 获取文件夹中的所有文件
file_list = os.listdir(folder_path)
data_train=pd.DataFrame()
data_test=pd.DataFrame()
count=0
for file_name in tqdm(file_list, desc="Processing files"):
    if file_name.endswith(".csv"):
        count+=1
        if count<=4:
            file_path = os.path.join(folder_path, file_name)
            # 解析CSV文件
            df = pd.read_csv(file_path)  # 读取csv文件到dataframe中
            data =df.iloc[:,1:] 
            data1=pd.DataFrame(data,columns=['tar_x','tar_y','tar_z','tarEli','position_change','v','v_change','r','eli_change',
                                            'h_change','id'])
            data_train = pd.concat([data_train, data1], axis=0, ignore_index=True)  # 将文件内容合并到data_all中
#             print("train",file_path)
        else:
            file_path = os.path.join(folder_path, file_name)
            # 解析CSV文件
            df = pd.read_csv(file_path)  # 读取csv文件到dataframe中
            data =df.iloc[:,1:] 
            data1=pd.DataFrame(data,columns=['tar_x','tar_y','tar_z','tarEli','position_change','v','v_change','r','eli_change',
                                            'h_change','id'])
            data_test = pd.concat([data_test, data1], axis=0, ignore_index=True)  # 将文件内容合并到data_all中
#             print("test",file_path)
data_train,data_test


# In[28]:


from sklearn.preprocessing import MinMaxScaler
train=data_train.values
test=data_test.values
scaler = MinMaxScaler()
scaler.fit(train) ## 生成规则
data_tr = scaler.transform(train) ## 将规则应用于训练集
data_te = scaler.transform(test) ## 将规则应用于测试集
data_tr,data_te


# In[33]:


# combined_array = np.vstack((train, test))
combined_array = np.vstack((data_tr, data_te))
dataset1=combined_array[:, :3]
dataset1.shape


# In[34]:


import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import calinski_harabasz_score
from sklearn.metrics import davies_bouldin_score
def k_means_cluster(k,data):
    X = data
    # X, _ = make_blobs(n_samples=300, centers=4, random_state=42, n_features=3)
    # X.shape

    #使用 k均值算法进行聚类
    
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)


    # 获取聚类结果和中心点
    labels = kmeans.labels_
    centers = kmeans.cluster_centers_

    db_index_1 = davies_bouldin_score(X, labels)


    print("Davies-Bouldin Index for Clustering 1:", db_index_1)
    print(centers)
    return centers,labels


    # 可视化聚类结果
#     fig = plt.figure()
#     ax = fig.add_subplot(111, projection='3d')
#     ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels,cmap='viridis', s=2, marker='.')
#     # ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='red', marker='X', s=200, label='Cluster Centers')
#     ax.set_title('K-Means Clustering of 3D Trajectory Points')
#     ax.set_xlabel('X-axis')
#     ax.set_ylabel('Y-axis')
#     ax.set_zlabel('Z-axis')
#     plt.legend()
#     plt.show()

centers,labels=k_means_cluster(2,dataset1)
centers


# In[39]:


len(labels)
#给数据集加上labels
new_col=labels[:, np.newaxis]
dataset2=np.hstack((combined_array, new_col))
dataset3=np.copy(dataset2)
dataset3[:, -1], dataset3[:, -2] = dataset2[:, -2], dataset2[:, -1]
#拆分数组变成加了标签的训练和测试集
train_new = dataset3[:1844129, :]
test_new = dataset3[1844129:, :]
train_new.shape,test_new.shape


# In[41]:


def create_dataset(dataset,window,offset,predict_length):
    dataX, dataY = [], []
    for i in range(0,len(dataset)-window-1,offset):
        if (i + window+predict_length)<len(dataset) :
            id1=dataset[i,-1]
            id2=dataset[i+window-1,-1]
            id3=dataset[i+window+predict_length-1,-1]
            
            if id1==id2 and id1==id3:
                a = dataset[i:(i+window),:]
                dataX.append(a)
                dataY.append(dataset[i + window:(i + window+predict_length),0:3])
    return np.array(dataX), np.array(dataY)


# In[44]:


# train_X,train_Y=create_dataset(data_tr,200,100,100)
# test_X,test_Y=create_dataset(data_te,200,100,100)
train_X,train_Y=create_dataset(train_new,200,100,100)
test_X,test_Y=create_dataset(test_new,200,100,100)
train_X.shape,train_Y.shape,test_X.shape,test_Y.shape


# In[ ]:

import pandas as pd
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import LSTM
import tensorflow as tf
from matplotlib import pyplot
import numpy as np
import random
import time

start_time = time.time()
##模型定义 design network 200帧预测100帧

model = Sequential()
model.add(LSTM(70, input_shape=(200, 12)))  # LSTM层
# model.add(LSTM(units=100))  # LSTM层，50个神经元
# model.add(Dense(200,kernel_initializer='normal',activation='sigmoid'))  # 输出维度为 10 * 3 = 30
model.add(Dense(300,kernel_initializer='normal',activation='relu'))  # 输出维度为 10 * 3 = 30
# model.add(Dropout(0.2))
# 重塑输出，使其成为[10, 3]
model.add(tf.keras.layers.Reshape((100, 3)))
model.compile(loss='mean_absolute_error', optimizer='adam')  # 使用均方误差作为损失函数，Adam优化器

#模型训练 fit network
history = model.fit(train_X, train_Y, epochs=300, batch_size=72, validation_data=(test_X, test_Y), verbose=2,
                    shuffle=False)

end_time = time.time()
print("time:",end_time-start_time)
#输出 plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()



# 使用模型进行预测
yhat = model.predict(test_X)


model.save('/home/lxm/model/lstm_model_feature2.h5')
# tf.keras.backend.clear_session()

