美文网首页
2018-06-03 PYTHON code

2018-06-03 PYTHON code

作者: jfdlagbja | 来源:发表于2018-06-03 20:24 被阅读0次
    # python 2.7 ##
    
    import matplotlib.pyplot as plt
    import pandas as pd
    import numpy as np
    # import datetime
    # import lightgbm as lgb
    # from math import radians, cos, sin, asin, sqrt
    import os
    cwd = os.getcwd()
    
    
    
    ###
    path_train = cwd+"/data/dm/train.csv"  # 训练文件路径
    path_test = "/data/dm/test.csv"  # 测试文件路径
    path_result_out = "model/pro_result.csv" #预测结果文件路径
    
    
    ori_data = pd.read_csv(path_train)
    # ori_data.sort(['A', 'B'], ascending=[1, 0])
    data = ori_data
    # sort the data
    data = data.sort_values(by=['TERMINALNO',  'TIME'])
    # show the unix time by minutes
    data['TIME'] = data['TIME']/60
    data['TIME'] = data['TIME'].astype('int')
    # define new_trip_id, some the original id are wrong
    data['NEW_TRIPID'] = data['TRIP_ID']
    # swap two columns
    columnsTitles = ['TERMINALNO', 'TIME', 'NEW_TRIPID', 'LONGITUDE', 'LATITUDE', 'DIRECTION', 'HEIGHT', 'SPEED', 'CALLSTATE',
                   'Y', 'TRIP_ID']
    data = data.reindex(columns=columnsTitles)
    
    plt.ioff()
    
    
    # def plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Term):
    #     N_trips = len(Y4)
    #     f, axs = plt.subplots(6, N_trips, figsize=(N_trips / 6 * 6, 6))
    #     f.subplots_adjust(hspace=.5, wspace=.5)
    #     axs = axs.ravel()
    #
    #     for j in range(0, N_trips):  # iterate on trips
    #         axs[(1 - 1) * N_trips + j].plot(X[j], Y1[j])
    #         axs[(2 - 1) * N_trips + j].plot(X[j], Y2[j])
    #         axs[(3 - 1) * N_trips + j].plot(X[j], Y3[j])
    #         axs[(4 - 1) * N_trips + j].plot(X[j], Y4[j])
    #         axs[(5 - 1) * N_trips + j].plot(X[j], Y5[j])
    #         axs[(6 - 1) * N_trips + j].plot(X[j], Y6[j])
    #     f.savefig(str(Term) + "test.png")
    
    
    # plot
    def plt_trips(x, y1, y2, y3, y4, y5, y6, Tmp_Term):
        # f = plt.figure(figsize=(10, 8))
        # f1, f2, f3, f4, f5, f6 = (f.add_subplot(str(23)+str(i)) for i in range(1, 7))
        y_label = ['Longitude', 'Latitude', 'Direction', 'Height', 'Speed', 'Call_state']
        y = [y1, y2, y3, y4, y5, y6]
        fig, axs = plt.subplots(6, 1, figsize=(0.05*len(x), 8))
        fig.subplots_adjust(hspace=.5, wspace=.5)
    
        axs = axs.ravel()
    
        for i in range(6):
            axs[i].plot(x, y[i])
            axs[i].set_xlabel('time')
            axs[i].set_ylabel(y_label[i])
            axs[i].set_title('Time and ' + y_label[i])
        fig.savefig(str(Tmp_Term) + "test.png")
        # plt.close(fig)  # close the figure
        #
        # plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
        #                     wspace=0.35)
        # plt.show()
    
    
    # def div2Trip(data):
    #     Curr_Term = data.iloc[0, 0]
    #     Curr_Time = data.iloc[0, 1]
    #     Curr_Trip = data.iloc[0, 2]
    #     # Create 1d list to store a single trip
    #     x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
    #     # Create a variable-sized 2d list for a terminal
    #     X, Y1, Y2, Y3, Y4, Y5, Y6 = ([] for i in range(7))
    #
    #     for i in range(0, 100):
    #         Tmp_Term = data.iloc[i, 0]
    #         Tmp_Time = data.iloc[i, 1]
    #         Tmp_Trip = data.iloc[i, 2]
    #         if Tmp_Term == Curr_Term:
    #             if Tmp_Trip == Curr_Trip:
    #                 # print('aaa')
    #                 x.append(data.iloc[i, 1]-Curr_Time)
    #                 y1.append(data.iloc[i, 3])
    #                 y2.append(data.iloc[i, 4])
    #                 y3.append(data.iloc[i, 5])
    #                 y4.append(data.iloc[i, 6])
    #                 y5.append(data.iloc[i, 7])
    #                 y6.append(data.iloc[i, 8])
    #             else:
    #                 Curr_Time = data.iloc[i, 1]
    #                 Curr_Trip = data.iloc[i, 2]
    #                 X += [x]
    #                 Y1 += [y1]
    #                 Y2 += [y2]
    #                 Y3 += [y3]
    #                 Y4 += [y4]
    #                 Y5 += [y5]
    #                 Y6 += [y6]
    #                 print('bbb')
    #                 x = []
    #                 y1, y2, y3, y4, y5, y6 = ([] for i in range(6))
    #                 x.append(data.iloc[i, 1]-Curr_Time)
    #                 y1.append(data.iloc[i, 3])
    #                 y2.append(data.iloc[i, 4])
    #                 y3.append(data.iloc[i, 5])
    #                 y4.append(data.iloc[i, 6])
    #                 y5.append(data.iloc[i, 7])
    #                 y6.append(data.iloc[i, 8])
    #         else:
    #             print ('ddd')
    #             Curr_Term = data.iloc[i, 0]
    #             plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Curr_Term - 1)
    #             # continue
    
    
    def comb2trips(data):
        # initialize
        curr_term = data.iloc[0, 0]
        begin_time = data.iloc[0, 1]
        curr_trip = 1
        gap_time = 0    # time gap between this trip end and next trip beginning
        # Create 1d list to store a single trip
        x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
        # change the tripI-id for the first line
        data.iloc[0, 2] = 1
    
        for i in range(1, len(data)):
            tmp_term = data.iloc[i, 0]
            tmp_time = data.iloc[i, 1]
            #  tmp_trip = data.iloc[i, 2]
            if tmp_term == curr_term:
                # if tmp_trip != curr_trip:
                # if find the trip has changed
                if (tmp_time - data.iloc[i-1, 1]) > 5:  # 5*60 if in seconds
                    gap_time += tmp_time - data.iloc[i-1, 1]
                    curr_trip += 1
                    print('Find a new trip ' + str(i))
    
                x.append(data.iloc[i, 1] - begin_time - gap_time)
                y1.append(data.iloc[i, 3])
                y2.append(data.iloc[i, 4])
                y3.append(data.iloc[i, 5])
                y4.append(data.iloc[i, 6])
                y5.append(data.iloc[i, 7])
                y6.append(data.iloc[i, 8])
            else:
                curr_term = data.iloc[i, 0]
                curr_trip = 1
                begin_time = data.iloc[i, 1]  # redefine begin time for a new term
                gap_time = 0
                print ('Curr_Term ' + str(curr_term))
                # plt_trips(x, y1, y2, y3, y4, y5, y6, curr_term - 1)
                x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
            # re_define the trip index
            data.iloc[i, 2] = curr_trip
    
    
    comb2trips(data)
    
    data.to_pickle('rearranged_data')
    df = pd.read_pickle('rearranged_data')
    
    del data
    del ori_data
    
    df['NEW_TRIPID'].describe()
    df['TRIP_ID'].describe()
    
    df['Y'].describe()
    
    
    def features_append(features, series):
        return features
    
    
    def define_test(data):
        length = max(data.iloc[:, 0])
        result = pd.DataFrame(index=range(length), columns=['Id', 'Pred'])
        return result
    
    
    def pred_ratio(features):
        return 0
    
    
    def pred_gen(df, result):
        # initialize
        curr_term = 1
        features = []
        for i in range(0, len(df)):
            tmp_term = df.iloc[i, 0]
            if tmp_term == curr_term:
                features_append(features, df.iloc[i, 4])
            else:
                print(tmp_term)
                result.iloc[curr_term-1, 0] = curr_term
                result.iloc[curr_term-1, 1] = pred_ratio(features)
                curr_term = tmp_term
                features = []
            if i == len(df)-1:
                print('end of final term: ')
                print(tmp_term)
                result.iloc[curr_term - 1, 0] = curr_term
                result.iloc[curr_term - 1, 1] = pred_ratio(features)
                curr_term = tmp_term
                features = []
    
    
    result = define_test(df)
    pred_gen(df, result)
    
    

    相关文章

      网友评论

          本文标题:2018-06-03 PYTHON code

          本文链接:https://www.haomeiwen.com/subject/ympjsftx.html