haibojin001
/
NNIF

 
			
							from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import gdown
import os
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.linear_model import LogisticRegressionCV
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score


def load_svhn():
    """ Loads SVHN dataset"""
    cache_dir = os.path.join(os.path.expanduser('~'), '.keras')
    datadir_base = os.path.expanduser(cache_dir)
    datadir = os.path.join(datadir_base, 'datasets', 'svhn')
    if not os.path.exists(datadir):
        os.makedirs(datadir)

    X_train_path = os.path.join(datadir, 'X_train.npy')
    y_train_path = os.path.join(datadir, 'y_train.npy')
    X_test_path  = os.path.join(datadir, 'X_test.npy')
    y_test_path  = os.path.join(datadir, 'y_test.npy')

    if not os.path.exists(X_train_path):
        gdown.download('https://drive.google.com/uc?id=1G1_onGVI9OKRN9ANMS2kKX5_OkN2Pqjd', os.path.join(datadir, 'X_train.npy'), quiet=False)
    if not os.path.exists(y_train_path):
        gdown.download('https://drive.google.com/uc?id=1ijYnRSTB7S2zctjUax2ycW-TgK7Ux2cG', os.path.join(datadir, 'y_train.npy'), quiet=False)
    if not os.path.exists(X_test_path):
        gdown.download('https://drive.google.com/uc?id=1TVhS8ns7fPrtUdLZ2nYUsGtRUQT9yaKC', os.path.join(datadir, 'X_test.npy'), quiet=False)
    if not os.path.exists(y_test_path):
        gdown.download('https://drive.google.com/uc?id=1ySH19ynJmXLsAfjec0mHfdptduRGCgb2', os.path.join(datadir, 'y_test.npy'), quiet=False)

    X_train = np.load(X_train_path)
    y_train = np.load(y_train_path)
    X_test  = np.load(X_test_path)
    y_test  = np.load(y_test_path)

    return (X_train, y_train), (X_test, y_test)

def one_hot(indices, depth):
    """Converting the indices to one hot representation
    :param indices: numpy array
    :param depth: the depth of the one hot vectors
    """
    ohm = np.zeros([indices.shape[0], depth])
    ohm[np.arange(indices.shape[0]), indices] = 1
    return ohm


# lid of a batch of query points X
def mle_batch(data, batch, k):
    data = np.asarray(data, dtype=np.float32)
    batch = np.asarray(batch, dtype=np.float32)

    k = min(k, len(data)-1)
    f = lambda v: - k / np.sum(np.log(v/v[-1]))
    a = cdist(batch, data)
    a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1]
    a = np.apply_along_axis(f, axis=1, arr=a)
    return a

def train_lr(X, y):
    """
    :param X: the data samples
    :param y: the labels
    :return:
    """
    lr = LogisticRegressionCV(n_jobs=-1, max_iter=20000, cv=3).fit(X, y)
    return lr

def compute_roc(y_true, y_pred, plot=False):
    """
    :param y_true: ground truth
    :param y_pred: predictions
    :param plot:
    :return:
    """
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    auc_score = roc_auc_score(y_true, y_pred)
    if plot:
        plt.figure(figsize=(7, 6))
        plt.plot(fpr, tpr, color='blue',
                 label='ROC (AUC = %0.4f)' % auc_score)
        plt.legend(loc='lower right')
        plt.title("ROC Curve")
        plt.xlabel("FPR")
        plt.ylabel("TPR")
        plt.show()

    return fpr, tpr, auc_score