#!/usr/bin/env python
# coding: utf-8

import os
import time

import pandas as pd
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import LSTM
import tensorflow as tf
from matplotlib import pyplot
import numpy as np
import random

folder_path = "/home/lxm/1vs1/total"  # 替换为你的文件夹路径
# # 获取文件夹中的所有文件
file_list = os.listdir(folder_path)
data_all=pd.DataFrame()

for file_name in tqdm(file_list, desc="Processing files"):
    if file_name.endswith(".csv"):
        
        file_path = os.path.join(folder_path, file_name)
#         print(file_path)
        # 解析CSV文件
        df = pd.read_csv(file_path)  # 读取csv文件到dataframe中
        data =df.iloc[:,1:] 
        data_all = pd.concat([data_all, data], axis=0, ignore_index=True)  # 将文件内容合并到data_all中



from sklearn.preprocessing import MinMaxScaler
#保证为float ensure all data is float
value = data_all.values
value = value.astype('float32')
#归一化 normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(value)


def create_dataset(dataset,window,offset,predict_length):
    dataX, dataY = [], []
    for i in range(0,len(dataset)-window-1,offset):
        if (i + window+predict_length)<len(dataset) :
            id1=dataset[i,-1]
            id2=dataset[i+window-1,-1]
            id3=dataset[i+window+predict_length-1,-1]
            if id1==id2 and id1==id3:
                a = dataset[i:(i+window),1:4]
                dataX.append(a)
                dataY.append(dataset[i + window:(i + window+predict_length),1:4])
    return np.array(dataX), np.array(dataY)


dataX,dataY=create_dataset(scaled,200,100,100)



#随机取80%的轨迹为训练集，20%的轨迹为测试集
#p为百分比
def split_dataset(dataX,dataY,p=0.8):
    train_X=[]
    train_Y=[]
    test_X=[]
    test_Y=[]
#     train_size = int(len(dataset) * 0.8)
    for i in range(len(dataX)):
        r=random.random()
        if r<=p:
            train_X.append(dataX[i])
            train_Y.append(dataY[i])
        else:
            test_X.append(dataX[i])
            test_Y.append(dataY[i])
    return np.array(train_X), np.array(train_Y), np.array(test_X), np.array(test_Y)
            
train_X,train_Y,test_X,test_Y=split_dataset(dataX,dataY)




start_time = time.time()
##模型定义 design network 200帧预测100帧

model = Sequential()
model.add(LSTM(50, input_shape=(200, 3)))  # LSTM层，50个神经元
# model.add(LSTM(units=100))  # LSTM层，50个神经元
# model.add(Dense(200,kernel_initializer='normal',activation='sigmoid'))  # 输出维度为 10 * 3 = 30
# model.add(Dropout(0.5))
model.add(Dense(300,kernel_initializer='normal',activation='relu'))  # 输出维度为 10 * 3 = 30

# 重塑输出，使其成为[10, 3]
model.add(tf.keras.layers.Reshape((100, 3)))
model.compile(loss='mean_squared_error', optimizer='adam')  # 使用均方误差作为损失函数，Adam优化器

#模型训练 fit network
history = model.fit(train_X, train_Y, epochs=200, batch_size=72, validation_data=(test_X, test_Y), verbose=2,
                    shuffle=False)
end_time = time.time()
print("time:",end_time-start_time)
#输出 plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()



# 使用模型进行预测
yhat = model.predict(test_X)


model.save('/home/lxm/model/lstm_model_compare.h5')





