WeiDQ
/
DTI-CDF

 
			
							import sys
sys.path.append('/home/chujunyi/MFDF/3_my_code')
import os
os.chdir('/home/chujunyi/MFDF/1_original_data/E')
from Graph_utils import *
from functions import *
from SNF import *
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import auc,roc_auc_score,roc_curve,precision_recall_curve
from sklearn.preprocessing import MaxAbsScaler
from  sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier
import gcforest
from gcforest.gcforest import GCForest
import xgboost
from functools import reduce
import glob
from scipy import interp
import csv
import time
import random
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix

from collections import Counter
from sklearn.metrics import precision_recall_fscore_support

np.set_printoptions(threshold = np.inf)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

with open('/home/chujunyi/MFDF/1_original_data/E/E_combine_origin_new_interactions.txt','r') as f:
    E_ddr_new_pair = f.readlines()
E_ddr_new_pair = [item.replace('\n','').split('\t') for item in E_ddr_new_pair]
E_ddr_new_pair = [item[0] + ',' + item[1] for item in E_ddr_new_pair]
#-------------------------

R_all_train_test = "e_admat_dgc_mat_2_line.txt"
(D,T,DT_signature,aAllPossiblePairs,dDs,dTs,diDs,diTs) = get_All_D_T_thier_Labels_Signatures(R_all_train_test)

DT_feature_pair_list = []
for index in diDs.values():
    for column in diTs.values():
        DT_feature_pair_list.append([index,column]) 
#-------------------------

def calc_metrics(y_results,DT_feature_pair_list,E_ddr_new_pair):
    all_fp, all_fn = [],[]
    all_tp, all_tn = [],[]
    y_choose_all = pd.DataFrame()
    for mode in range(len(y_results)):
        for seed in range(len(y_results[mode])):
            for fold in range(len(y_results[mode][seed][0])): #[0] = test, [1] = train            
                a = pd.DataFrame()
                a['test_idx_fold'] = y_results[mode][seed][0][fold][0]
                a['y_predprob_test_0'] = y_results[mode][seed][0][fold][3]
                a['y_predprob_test_1'] = y_results[mode][seed][0][fold][4]
                a['y_pred_test'] = y_results[mode][seed][0][fold][1]
                a['y_true_test'] = y_results[mode][seed][0][fold][2]
                tn, fp, fn, tp = confusion_matrix(a['y_true_test'], a['y_pred_test'],labels = [0,1]).ravel()
                all_fp.append(fp)
                all_fn.append(fn)
                all_tp.append(tp)
                all_tn.append(tn)
                y_choose = a[a['y_pred_test'] == 0][a['y_true_test'] == 1]
                y_choose_all = pd.concat([y_choose_all, y_choose])
        mean_fp = np.mean(all_fp) * 10
        mean_fn = np.mean(all_fn) * 10           
        mean_tp = np.mean(all_tp) * 10
        mean_tn = np.mean(all_tn) * 10

        micro_precision = mean_tp / (mean_tp + mean_fp)
        micro_recall = mean_tp/ (mean_tp + mean_fn)
        micro_fscore = 2 * micro_precision * micro_recall / (micro_precision + micro_recall)
        
        print('mean_fp = {}, mean_fn = {}, mean_tp = {}, mean_tn = {}'.format(mean_fp, mean_fn, mean_tp, mean_tn))
        print('micro_precision = {}, micro_recall = {}, micro_fscore = {}'.format(micro_precision, micro_recall, micro_fscore))
        
    y_choose_all.drop_duplicates(subset='test_idx_fold', keep='first', inplace=True) # (16, 5)
    y_choose_all['test_idx_fold'] = y_choose_all['test_idx_fold'].astype('int')

    y_choose_idx = [item for item in y_choose_all['test_idx_fold']]
    DTI_CDF_new_pair = [DT_feature_pair_list[i] for i in y_choose_idx]
    y_choose_all['new_pair'] = DTI_CDF_new_pair

    DTI_CDF_new_pair = [item[0] + ',' + item[1] for item in DTI_CDF_new_pair]
    
    ddr_intersection_dti = list(set(DTI_CDF_new_pair).intersection(set(E_ddr_new_pair)))
    print('DTI_CDF_new_pair: ', DTI_CDF_new_pair)
    print('ddr_intersection_dti = ', ddr_intersection_dti)
    
    return DTI_CDF_new_pair, y_choose_all, ddr_intersection_dti
	
def get_toy_config(rf_tree = 1, rf_max_depth = 1, rf_tree_2 = 1, rf_max_depth_2 = 1,
                   xgb_tree = 1, xgb_max_depth = 1, min_child_weight = 1, lr = 1,
                   xgb_tree_2 = 1, xgb_max_depth_2 = 1, min_child_weight_2 = 1, lr_2 = 1,
                   layer = 'rf1'):
    
    if layer == 'rf1' or layer == 'xgb1': 
        max_layers = 1
        n_folds = 1
    else: 
        max_layers, n_folds = 5, 10

    config = {}
    ca_config = {}
    ca_config["random_state"] = 1231
    ca_config["look_indexs_cycle"] = None
    ca_config["max_layers"] = max_layers
    ca_config["early_stopping_rounds"] = 1
    ca_config["n_classes"] = 2
    ca_config["estimators"] = []
        
    rf = {"n_folds": n_folds, "type": "RandomForestClassifier",
          "n_estimators": rf_tree, "max_depth": rf_max_depth, 'criterion' : 'entropy', 'bootstrap' : True,
          "n_jobs": -1, "criterion" : 'entropy', "class_weight":"balanced"}
    
    rf_2 = {"n_folds": n_folds, "type": "RandomForestClassifier",
          "n_estimators": rf_tree_2, "max_depth": rf_max_depth_2, 'criterion' : 'entropy', 'bootstrap' : True,
          "n_jobs": -1, "criterion" : 'entropy', "class_weight":"balanced"}
    
    xgb = {"n_folds": n_folds, "type": "XGBClassifier",'base_score':0.5, 'booster':'gbtree', 'colsample_bylevel':1,
           'colsample_bytree':1, 'eval_metric':'auc', 'gamma':0, 'learning_rate':lr,
           'max_delta_step':0, 'max_depth':xgb_max_depth, 'min_child_weight':min_child_weight, 'missing':None,
           'n_estimators':xgb_tree, 'n_jobs':-1, 'nthread':-1,
           'objective':'binary:logistic', 'random_state':1231, 'reg_alpha':0,
           'reg_lambda':1, 'scale_pos_weight':1, 'seed':1231, 'silent':True,
           'subsample':1}

    xgb_2 = {"n_folds": n_folds, "type": "XGBClassifier",'base_score':0.5, 'booster':'gbtree', 'colsample_bylevel':1,
           'colsample_bytree':1, 'eval_metric':'auc', 'gamma':0, 'learning_rate':lr_2,
           'max_delta_step':0, 'max_depth':xgb_max_depth_2, 'min_child_weight':min_child_weight_2, 'missing':None,
           'n_estimators':xgb_tree_2, 'n_jobs':-1, 'nthread':-1,
           'objective':'binary:logistic', 'random_state':1231, 'reg_alpha':0,
           'reg_lambda':1, 'scale_pos_weight':1, 'seed':1231, 'silent':True,
           'subsample':1}     
        
    if layer == 'rf1':
        ca_config["estimators"].append(rf)
        
    elif layer == 'xgb1':
        ca_config["estimators"].append(xgb)
        
    elif layer == 'rf1xgb1':
        ca_config["estimators"].append(rf)
        ca_config["estimators"].append(xgb)
        
    elif layer == 'rf1xgb1_2':
        ca_config["estimators"].append(rf_2)
        ca_config["estimators"].append(xgb_2)
    
    elif layer == 'xgb2':
        ca_config["estimators"].append(xgb)
        ca_config["estimators"].append(xgb_2)
    
    elif layer == 'rf2':
        ca_config["estimators"].append(rf)
        ca_config["estimators"].append(rf_2) 
    
    elif layer == 'rf2xgb1':
        ca_config["estimators"].append(rf)
        ca_config["estimators"].append(rf_2) 
        ca_config["estimators"].append(xgb_2)
    
    elif layer == 'rf1xgb2':
        ca_config["estimators"].append(xgb)
        ca_config["estimators"].append(xgb_2)
        ca_config["estimators"].append(rf_2)
    
    elif layer == 'rf2xgb2':
        ca_config["estimators"].append(rf)
        ca_config["estimators"].append(rf_2) 
        ca_config["estimators"].append(xgb)
        ca_config["estimators"].append(xgb_2)     
        
    config["cascade"] = ca_config
    return config
	
def run_classification(mode, seed, X_train_10_fold, X_test_10_fold, y_train_10_fold, y_test_10_fold,test_idx_10_fold, train_idx_10_fold, layer):   
        
    if mode == 'p':
        xgb_param = {}
        xgb_param[1231] = {'xgb_max_depth':3, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':800,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.1, 'lr_2':0.1, 'xgb_tree_2':400}

        xgb_param[8367] = {'xgb_max_depth':7, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':100,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.1, 'lr_2':0.1, 'xgb_tree_2':800}

        xgb_param[22]   = {'xgb_max_depth':20, 'min_child_weight':0.1, 'lr':0.1, 'xgb_tree':200,
                           'xgb_max_depth_2':7, 'min_child_weight_2':0.01, 'lr_2':0.1, 'xgb_tree_2':800}

        xgb_param[1812] = {'xgb_max_depth':20, 'min_child_weight':0.1, 'lr':0.1, 'xgb_tree':200,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.001, 'lr_2':0.1, 'xgb_tree_2':800}

        xgb_param[4659] = {'xgb_max_depth':9, 'min_child_weight':0.1, 'lr':0.01, 'xgb_tree':800,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.001, 'lr_2':0.1, 'xgb_tree_2':400}
        
        rf_param = {}
        rf_param[1231] = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 800, 'rf_max_depth_2': 20}
        rf_param[8367] = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 100, 'rf_max_depth_2': 30}                   
        rf_param[22]   = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 400, 'rf_max_depth_2': None}                   
        rf_param[1812] = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': 20}                   
        rf_param[4659] = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': None}
                         
        
    elif mode == 'D':    
        xgb_param = {}
        xgb_param[1231] = {'xgb_max_depth':9, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':200,
                           'xgb_max_depth_2':7, 'min_child_weight_2':0.1, 'lr_2':0.1, 'xgb_tree_2':400}

        xgb_param[8367] = {'xgb_max_depth':5, 'min_child_weight':0.01, 'lr':0.1, 'xgb_tree':400,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.1, 'lr_2':0.1, 'xgb_tree_2':400}

        xgb_param[22]   = {'xgb_max_depth':9, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':400,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.01, 'lr_2':0.1, 'xgb_tree_2':600}

        xgb_param[1812] = {'xgb_max_depth':5, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':400,
                           'xgb_max_depth_2':20, 'min_child_weight_2':1, 'lr_2':0.1, 'xgb_tree_2':600}

        xgb_param[4659] = {'xgb_max_depth':5, 'min_child_weight':0.01, 'lr':0.1, 'xgb_tree':800,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.001, 'lr_2':0.1, 'xgb_tree_2':600}
        
        rf_param = {}
        rf_param[1231] = {'rf_tree':600, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': 20}
        rf_param[8367] = {'rf_tree':300, 'rf_max_depth':3, 'rf_tree_2': 300, 'rf_max_depth_2': 20}                   
        rf_param[22]   = {'rf_tree':200, 'rf_max_depth':3, 'rf_tree_2': 800, 'rf_max_depth_2': 20}                   
        rf_param[1812] = {'rf_tree':300, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': 20}                   
        rf_param[4659] = {'rf_tree':200, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': 20}

    elif mode == 'T':    
        xgb_param = {}
        xgb_param[1231] = {'xgb_max_depth':7, 'min_child_weight':0.01, 'lr':0.1, 'xgb_tree':800,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.001, 'lr_2':0.1, 'xgb_tree_2':400}

        xgb_param[8367] = {'xgb_max_depth':7, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':600,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.01, 'lr_2':0.1, 'xgb_tree_2':800}

        xgb_param[22]   = {'xgb_max_depth':9, 'min_child_weight':0.1, 'lr':0.1, 'xgb_tree':800,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.01, 'lr_2':0.1, 'xgb_tree_2':400}

        xgb_param[1812] = {'xgb_max_depth':9, 'min_child_weight':0.001, 'lr':0.1, 'xgb_tree':200,
                           'xgb_max_depth_2':20, 'min_child_weight_2':0.1, 'lr_2':0.1, 'xgb_tree_2':200}

        xgb_param[4659] = {'xgb_max_depth':7, 'min_child_weight':0.1, 'lr':0.1, 'xgb_tree':600,
                           'xgb_max_depth_2':9, 'min_child_weight_2':0.01, 'lr_2':0.1, 'xgb_tree_2':800}
        
        rf_param = {}
        rf_param[1231] = {'rf_tree':600, 'rf_max_depth':3, 'rf_tree_2': 300, 'rf_max_depth_2': 20}
        rf_param[8367] = {'rf_tree':600, 'rf_max_depth':3, 'rf_tree_2': 300, 'rf_max_depth_2': 30}                   
        rf_param[22]   = {'rf_tree':100, 'rf_max_depth':3, 'rf_tree_2': 600, 'rf_max_depth_2': 30}                   
        rf_param[1812] = {'rf_tree':600, 'rf_max_depth':3, 'rf_tree_2': 200, 'rf_max_depth_2': None}                   
        rf_param[4659] = {'rf_tree':400, 'rf_max_depth':3, 'rf_tree_2': 800, 'rf_max_depth_2': 30}

    xgb_tree, xgb_max_depth = xgb_param[seed]['xgb_tree'], xgb_param[seed]['xgb_max_depth']
    min_child_weight, lr =  xgb_param[seed]['min_child_weight'], xgb_param[seed]['lr']
    xgb_tree_2, xgb_max_depth_2 = xgb_param[seed]['xgb_tree_2'], xgb_param[seed]['xgb_max_depth_2']
    min_child_weight_2, lr_2 =  xgb_param[seed]['min_child_weight_2'], xgb_param[seed]['lr_2']
    
    rf_tree, rf_max_depth, rf_tree_2, rf_max_depth_2 = rf_param[seed]['rf_tree'], rf_param[seed]['rf_max_depth'], rf_param[seed]['rf_tree_2'], rf_param[seed]['rf_max_depth_2']
    
    result,test_true_predict_compare = run_classification_configuration(X_train_10_fold, X_test_10_fold, 
                                                                        y_train_10_fold, y_test_10_fold,
                                                                        test_idx_10_fold, train_idx_10_fold,
                                                                        rf_tree, rf_max_depth, 
                                                                        rf_tree_2, rf_max_depth_2,
                                                                        xgb_tree, xgb_max_depth, min_child_weight, lr, 
                                                                        xgb_tree_2, xgb_max_depth_2, min_child_weight_2, lr_2, 
                                                                        layer, mode, seed)
        
    print('************results : one_mode + one_seed + one_layer + one_parameter************')  
    print('Avg_AUPR_training = {:.4},Avg_AUPR_testing = {:.4},Avg_AUC_training = {:.4},Avg_AUC_testing = {:.4}'.format(
            result[0],result[1],result[3],result[4]))
    print('folds_AUPR_testing:', result[2])
    print('folds_AUPR_training:', result[6])
    print('folds_AUC_testing:', result[5])
    print('folds_AUC_training:',result[7])
    print('precision_testing = {}, recall_testing = {}, fscore_testing = {}'.format(result[8][0], result[8][1], result[8][2]))
    print('precision_training = {}, recall_training = {}, fscore_training = {}'.format(result[9][0], result[9][1], result[9][2]))
    #print('G_mean = ', result[0][12])
    print('************************************')  
    return result, test_true_predict_compare

def gain_results(seeds, mode_list, layer, only_PathCS_feature = False):

    aupr_list, auc_list = [], []
    precision_list, recall_list, fscore_list = [], [], []
    test_true_predict_compare_10cv_seeds_modes = []
    recall_25_list, recall_50_list, recall_100_list, recall_200_list, recall_400_list = [], [], [], [], []
#     recall_25_list, recall_50_list = [], []
    G_mean_list = []
    for mode in mode_list:
        
        trails_AUPRs, trails_AUCs = [], []
        trails_precisions, trails_recalls, trails_fscores = [], [], []
        trails_recall_25, trails_recall_50, trails_recall_100, trails_recall_200, trails_recall_400 = [], [], [], [], []
#         trails_recall_25, trails_recall_50 = [], []
        trials_G_means = []
        seeds_results = []
        test_true_predict_compare_10cv_seeds = []
        for seed in seeds:
            print ("---------GENERATE_FOLD_{}_{}-----------------------------------------------".format(mode, seed))

            filename = '/home/chujunyi/MFDF/3_my_code/E/data/E_folddata_X-Y_S' + str(mode) + '_seed' + str(seed) + '.npz'
            folddata_XY = np.load(filename)

            X_train_10_fold, X_test_10_fold  = folddata_XY['X_train_10_fold'],folddata_XY['X_test_10_fold']
            y_train_10_fold, y_test_10_fold = folddata_XY['y_train_10_fold'],folddata_XY['y_test_10_fold']
            train_idx_10_fold, test_idx_10_fold = folddata_XY['train_idx_10_fold'],folddata_XY['test_idx_10_fold']
            
            if only_PathCS_feature == True:
                if mode == 'p':
                    X_train_10_fold = map(lambda x : x[:,:12], X_train_10_fold)
                    X_test_10_fold = map(lambda x : x[:,:12], X_test_10_fold)
                else:
                    X_train_10_fold = map(lambda x : x[:,:10], X_train_10_fold)
                    X_test_10_fold = map(lambda x : x[:,:10], X_test_10_fold)
                
            print ('-------------------------------------------------THIS SEED FINISHED----------------------------------')

            results,test_true_predict_compare_10cv = run_classification(mode, seed, X_train_10_fold, X_test_10_fold, y_train_10_fold, 
                                                                        y_test_10_fold,test_idx_10_fold, train_idx_10_fold, layer)
            
            seeds_results.append(results)
            trails_AUPRs.extend(results[2]) # 5z
            trails_AUCs.extend(results[5]) # 50
            trails_precisions.append(results[8][0]) # 5
            trails_recalls.append(results[8][1]) # 5
            trails_fscores.append(results[8][2]) # 5
            test_true_predict_compare_10cv_seeds.append(test_true_predict_compare_10cv)
            trails_recall_25.append(results[10])
            trails_recall_50.append(results[11])
            trails_recall_100.append(results[12])
            trails_recall_200.append(results[13])
            trails_recall_400.append(results[14])
            trials_G_means.extend(results[12])###15
            
            aupr, roc_auc = np.mean(trails_AUPRs), np.mean(trails_AUCs)
            precision, recall, fscore = np.mean(trails_precisions), np.mean(trails_recalls), np.mean(trails_fscores)
#             recall_25, recall_50 = np.mean(trails_recall_25), np.mean(trails_recall_50)
            recall_25, recall_50, recall_100, recall_200, recall_400 = np.mean(trails_recall_25), np.mean(trails_recall_50), np.mean(trails_recall_100), np.mean(trails_recall_200), np.mean(trails_recall_400)
            G_mean = np.mean(trials_G_means)
            
            print( "################Results###################" )
            print('model_architecture:',layer)
            print( "Mode: %s" % mode )
            print( "Average: AUPR: %s" % aupr ) 
            print( "Average: AUC: %s" % roc_auc )
            print( "Average: precision = {}, recall = {}, fscore = {} ".format(precision, recall, fscore))
            print( "Average: G_mean = ", G_mean)
            
        for result_ in seeds_results:
            print('seed_results: ')
            print('Avg_AUPR_training:',result_[0])
            print('Avg_AUPR:',result_[1])
            print('folds_AUPR:',result_[2])
            print('Avg_AUC_training:',result_[3])
            print('Avg_AUC:',result_[4])
            print('folds_AUC:',result_[5])
            print('precision_testing = {}, recall_testing = {}, fscore_testing = {}'.format(result_[8][0], result_[8][1], result_[8][2]))
            print('precision_training = {}, recall_training = {}, fscore_training = {}'.format(result_[9][0],result_[9][1], result_[9][2]))
            print('G_mean = ', result_[12])
            print('')
            print( "###########################################")
            
        
        aupr_list.append(aupr) 
        auc_list.append(roc_auc)
        precision_list.append(precision)
        recall_list.append(recall)
        fscore_list.append(fscore)
        recall_25_list.append(recall_25)
        recall_50_list.append(recall_50)
        recall_100_list.append(recall_100)
        recall_200_list.append(recall_200)
        recall_400_list.append(recall_400)
        G_mean_list.append(G_mean)
        test_true_predict_compare_10cv_seeds_modes.append(test_true_predict_compare_10cv_seeds)
        print( "################Results###################" )
        print('model_architecture:',layer)
        print( "Mode: %s" % mode_list )
        print( "Average AUPR: %s" % aupr_list ) 
        print( "Average AUC: %s" % auc_list )
        print( "Average precision = {} ".format(precision_list))
        print( "Average recall = {} ".format(recall_list))
        print( "Average f1score = {} ".format(fscore_list))
        print( "Average recall_25 = {} ".format(recall_25_list))
        print( "Average recall_50 = {} ".format(recall_50_list))
        print( "Average recall_100 = {} ".format(recall_100_list))
        print( "Average recall_200 = {} ".format(recall_200_list))
        print( "Average recall_400 = {} ".format(recall_400_list))
        print( "Average G_mean = ,", G_mean_list)
        print( "###########################################")
    
    return test_true_predict_compare_10cv_seeds_modes
	
def run_classification_configuration(X_train_10_fold, X_test_10_fold, y_train_10_fold, y_test_10_fold,test_idx_10_fold, train_idx_10_fold,
                                     rf_tree, rf_max_depth, rf_tree_2, rf_max_depth_2,
                                     xgb_tree, xgb_max_depth, min_child_weight, lr, xgb_tree_2, xgb_max_depth_2, min_child_weight_2, lr_2,
                                     layer, mode, seed):
    
    folds_AUC_testing, folds_AUPR_testing = [], []
    folds_AUC_training, folds_AUPR_training= [], []
    folds_metrics3_training, folds_metrics3_testing = [], []
    test_true_predict_compare, train_true_predict_compare = [], []
    folds_recall_25, folds_recall_50, folds_recall_100, folds_recall_200, folds_recall_400 = [], [], [], [], []
    folds_G_mean = []
    i = 0
    for X_train, X_test, y_train, y_test, test_idx_fold, train_idx_fold in zip(X_train_10_fold, X_test_10_fold, y_train_10_fold, y_test_10_fold, test_idx_10_fold, train_idx_10_fold):
        
        config = get_toy_config(rf_tree, rf_max_depth, rf_tree_2, rf_max_depth_2,
                                xgb_tree, xgb_max_depth, min_child_weight, lr, xgb_tree_2, xgb_max_depth_2, min_child_weight_2, lr_2,
                                layer)
        gc = GCForest(config)
        print(config)
        X_train_enc = gc.fit_transform(X_train, y_train)

        y_pred_train = gc.predict(X_train)
        y_predprob_train = gc.predict_proba(X_train)
        y_pred_test = gc.predict(X_test)
        y_predprob_test = gc.predict_proba(X_test)
        
        temp = pd.DataFrame([y_test, y_predprob_test[:,1],y_pred_test]).T.sort_values(by = 1, ascending = False)
        recall_25 = precision_recall_fscore_support(temp.iloc[:25,:][0], temp.iloc[:25,:][2], pos_label = 1, average = 'binary')[1]
        recall_50 = precision_recall_fscore_support(temp.iloc[:50,:][0], temp.iloc[:50,:][2], pos_label = 1, average = 'binary')[1]
        recall_100 = precision_recall_fscore_support(temp.iloc[:100,:][0], temp.iloc[:100,:][2], pos_label = 1, average = 'binary')[1]
        recall_200 = precision_recall_fscore_support(temp.iloc[:200,:][0], temp.iloc[:200,:][2], pos_label = 1, average = 'binary')[1]
        recall_400 = precision_recall_fscore_support(temp.iloc[:400,:][0], temp.iloc[:400,:][2], pos_label = 1, average = 'binary')[1]
        
        test_true_predict_compare.append([test_idx_fold, y_pred_test, y_test, y_predprob_test[:,0], y_predprob_test[:,1]]) #10-cv
        train_true_predict_compare.append([train_idx_fold, y_pred_train, y_train, y_predprob_train[:,0], y_predprob_train[:,1]]) #10-cv

#         with open(r'E:\11_MFDF_code\3_my_code\9_major_revised\NR\code\12\model\NR_PathCS_' + layer + '_S' + str(mode) + '_seed' + str(seed) + '_fold' + str(i) + '.pkl', "wb") as f1:
#             pickle.dump(gc, f1, pickle.HIGHEST_PROTOCOL)
                
        precision_training, recall_training, _ = precision_recall_curve(y_train, y_predprob_train[:,1], pos_label=1)
        precision_testing, recall_testing, _ =   precision_recall_curve(y_test, y_predprob_test[:,1], pos_label=1)    
        
        AUPR_training, AUPR_testing = auc(recall_training,precision_training), auc(recall_testing, precision_testing)
        AUC_training, AUC_testing = roc_auc_score(y_train, y_predprob_train[:,1]), roc_auc_score(y_test, y_predprob_test[:,1])
        
        metrics3_testing = precision_recall_fscore_support(y_test, y_pred_test, pos_label = 1, average = 'binary')[:3]
        metrics3_training = precision_recall_fscore_support(y_train, y_pred_train, pos_label = 1, average = 'binary')[:3]
                
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred_test, labels = [0,1]).ravel()
        specificity = float(tn) / float(tn + fp)
        recall = metrics3_testing[1]
        G_mean = np.sqrt(recall * specificity)

        folds_AUC_testing.append(AUC_testing)
        folds_AUPR_testing.append(AUPR_testing)
        folds_metrics3_testing.append(metrics3_testing)
        folds_AUC_training.append(AUC_training)
        folds_AUPR_training.append(AUPR_training)
        folds_metrics3_training.append(metrics3_training)
        folds_G_mean.append(G_mean)
        folds_recall_25.append(recall_25)
        folds_recall_50.append(recall_50)
        folds_recall_100.append(recall_100)
        folds_recall_200.append(recall_200)
        folds_recall_400.append(recall_400)
        i += 1
    Avg_AUPR_training = np.mean(folds_AUPR_training)
    Avg_AUPR_testing = np.mean(folds_AUPR_testing)
    Avg_AUC_training = np.mean(folds_AUC_training)
    Avg_AUC_testing = np.mean(folds_AUC_testing) 
    Avg_metrics3_training = np.mean(folds_metrics3_training, axis = 0)
    Avg_metrics3_testing = np.mean(folds_metrics3_testing, axis = 0)
    Avg_G_mean = np.mean(folds_G_mean)
    
    return [Avg_AUPR_training, Avg_AUPR_testing, folds_AUPR_testing,  #012
            Avg_AUC_training, Avg_AUC_testing, folds_AUC_testing, #345
            folds_AUPR_training, folds_AUC_training, #67
            Avg_metrics3_testing, Avg_metrics3_training, #89
            folds_recall_25, folds_recall_50, folds_recall_100, folds_recall_200, folds_recall_400, folds_G_mean], [test_true_predict_compare, train_true_predict_compare] #


mode_list = ["p", "D", "T"]
seeds = [1231, 8367, 22, 1812, 4659]
model_architecture = ['rf1', 'xgb1', 'rf2', 'xgb2', 'rf2xgb2', 'rf1xgb2', 'rf2xgb1', 'rf1xgb1_2','rf1xgb1'] # DNN time


for layer in model_architecture:
    print('model_architecture:',layer)
    time_begin_cpu = time.clock()
    time_begin_wall = time.time()
    rf1_PathCS_test_true_predict_compare_10cv_seeds_modes = gain_results(seeds,mode_list,layer, only_PathCS_feature = True)
    DTI_CDF_new_pair, y_choose_all, ddr_intersection_dti = calc_metrics(rf1_PathCS_test_true_predict_compare_10cv_seeds_modes,DT_feature_pair_list,E_ddr_new_pair)
    time_end_cpu = time.clock()
    time_end_wall = time.time()
    print('CPU Time = {}, Wall Time  = {}'.format(time_end_cpu - time_begin_cpu, time_end_wall - time_begin_wall))