|
- """
- This code does three things one after the other:
- 1) Quickly evaluate the model accuracy performance on a specified dataset (train/validation/test).
- 2) Attack the specified dataset subset with a specified attack (only on the first call to this script with the attack).
- 3) For each sample in the 'set' subset (val/test), calculate and save the Influence Functions scores I_up_loss
- """
-
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
-
- import matplotlib
-
- # Force matplotlib to not use any Xwindows backend.
- # import platform
- # if platform.system() == 'Linux':
- matplotlib.use('Agg')
-
- import logging
- import numpy as np
- import tensorflow as tf
- import os
- import imageio
- from tqdm import tqdm
- import darkon
- from cleverhans.attacks import FastGradientMethod, DeepFool, SaliencyMapMethod, CarliniWagnerL2, MadryEtAl, ElasticNetMethod
- from tensorflow.python.platform import flags
- from cleverhans.loss import CrossEntropy, WeightDecay, WeightedSum
- from NNIF_adv_defense.models.darkon_resnet34_model import DarkonReplica
- from cleverhans.utils import AccuracyReport, set_log_level
- from NNIF_adv_defense.tools.utils import one_hot
- from sklearn.neighbors import NearestNeighbors
- import matplotlib.pyplot as plt
- from NNIF_adv_defense.datasets.influence_feeder import MyFeederValTest
- import pickle
- from cleverhans.utils import random_targets
- from cleverhans.evaluation import batch_eval
- import time
-
- FLAGS = flags.FLAGS
-
- flags.DEFINE_integer('batch_size', 125, 'Size of evaluating batches')
- flags.DEFINE_string('dataset', 'cifar10', 'datasset: cifar10/100 or svhn')
- flags.DEFINE_string('set', 'val', 'val or test set to evaluate')
- flags.DEFINE_string('attack', 'deepfool', 'adversarial attack: deepfool, jsma, cw, cw_nnif')
- flags.DEFINE_string('checkpoint_dir', '', 'Checkpoint dir, the path to the saved model architecture and weights')
-
-
- # TODO: remove
- flags.DEFINE_string('mode', 'null', 'to bypass pycharm bug')
- flags.DEFINE_string('port', 'null', 'to bypass pycharm bug')
-
- if FLAGS.set == 'val':
- test_val_set = True # evaluating on the validation set
- WORKSPACE = 'influence_workspace_validation'
- USE_TRAIN_MINI = False # use all the training set examples in evaluation
- else:
- test_val_set = False # evaluating on the
- WORKSPACE = 'influence_workspace_test_mini'
- USE_TRAIN_MINI = True
-
- TARGETED = FLAGS.attack != 'deepfool' # we use targeted attacks everywhere except deepfool
-
- _classes = {
- 'cifar10': (
- 'airplane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
- ),
- 'cifar100': (
- 'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle',
- 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel',
- 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock',
- 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur',
- 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster',
- 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion',
- 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse',
- 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear',
- 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine',
- 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose',
- 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake',
- 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table',
- 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout',
- 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf', 'woman', 'worm'
- ),
- 'svhn': (
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
- )
- }
-
- # this is the name of the scope of the author(s) Resnet34 graph. If the user wants to just load our network parameters
- # and maybe later even use our scores.npy outputs (it takes a long time to compute yourself...), he/she must use
- # these strings. Otherwise, any string is OK. We provide here as default the scope names we used.
- ARCH_NAME = {'cifar10': 'model1', 'cifar100': 'model_cifar_100', 'svhn': 'model_svhn'}
-
- weight_decay = 0.0004
- LABEL_SMOOTHING = {'cifar10': 0.1, 'cifar100': 0.01, 'svhn': 0.1}
-
- # Object used to keep track of (and return) key accuracies
- report = AccuracyReport()
-
- # Set TF random seed to improve reproducibility
- superseed = 123456789
- rand_gen = np.random.RandomState(superseed)
- tf.set_random_seed(superseed)
-
- # Set logging level to see debug information
- set_log_level(logging.DEBUG)
-
- # Create TF session
- config_args = dict(allow_soft_placement=True)
- sess = tf.Session(config=tf.ConfigProto(**config_args))
-
- # get records from training
- if FLAGS.checkpoint_dir != '':
- model_dir = FLAGS.checkpoint_dir # set user specified dir
- else:
- model_dir = os.path.join(FLAGS.dataset, 'trained_model') # set default dir
-
- workspace_dir = os.path.join(model_dir, WORKSPACE)
- attack_dir = os.path.join(model_dir, FLAGS.attack)
- if TARGETED:
- attack_dir = attack_dir + '_targeted'
-
- # make sure the attack dir is constructed
- if not os.path.exists(attack_dir):
- os.makedirs(attack_dir)
-
- val_indices = np.load(os.path.join(model_dir, 'val_indices.npy'))
-
- mini_train_inds = None
- if USE_TRAIN_MINI:
- train_mini_indices_path = os.path.join(model_dir, 'train_mini_indices.npy')
- if not os.path.exists(train_mini_indices_path):
- print('Creating train mini indices for the test set...')
- # first, we need to create a temporary feeder to fetch all the training samples indices (not validation)
- tmp_feeder = MyFeederValTest(dataset=FLAGS.dataset, rand_gen=rand_gen, as_one_hot=True, val_inds=val_indices,
- test_val_set=True, mini_train_inds=None)
- train_inds = tmp_feeder.train_inds
- random_inds = rand_gen.choice(train_inds, 5000, replace=False) # collecting features for 5000 random training
- # samples (instead of 49k) due to time complexity
- random_inds.sort()
- np.save(train_mini_indices_path, random_inds)
- del tmp_feeder
- else:
- print('loading train mini indices from {}'.format(train_mini_indices_path))
- mini_train_inds = np.load(train_mini_indices_path)
-
- feeder = MyFeederValTest(dataset=FLAGS.dataset, rand_gen=rand_gen, as_one_hot=True, val_inds=val_indices,
- test_val_set=test_val_set, mini_train_inds=mini_train_inds)
-
- # get the data
- X_train, y_train = feeder.train_indices(range(feeder.get_train_size()))
- X_val, y_val = feeder.val_indices(range(feeder.get_val_size()))
- X_test, y_test = feeder.test_data, feeder.test_label # getting the real test set
- y_train_sparse = y_train.argmax(axis=-1).astype(np.int32)
- y_val_sparse = y_val.argmax(axis=-1).astype(np.int32)
- y_test_sparse = y_test.argmax(axis=-1).astype(np.int32)
-
- if TARGETED:
- # get also the adversarial labels of the val and test sets
- if not os.path.isfile(os.path.join(attack_dir, 'y_val_targets.npy')):
- y_val_targets = random_targets(y_val_sparse , feeder.num_classes)
- y_test_targets = random_targets(y_test_sparse, feeder.num_classes)
- assert (y_val_targets.argmax(axis=1) != y_val_sparse).all()
- assert (y_test_targets.argmax(axis=1) != y_test_sparse).all()
- np.save(os.path.join(attack_dir, 'y_val_targets.npy') , y_val_targets)
- np.save(os.path.join(attack_dir, 'y_test_targets.npy'), y_test_targets)
- else:
- y_val_targets = np.load(os.path.join(attack_dir, 'y_val_targets.npy'))
- y_test_targets = np.load(os.path.join(attack_dir, 'y_test_targets.npy'))
-
- # Use Image Parameters
- img_rows, img_cols, nchannels = X_test.shape[1:4]
- nb_classes = y_test.shape[1]
-
- # Define input TF placeholder
- x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels), name='x')
- y = tf.placeholder(tf.float32, shape=(None, nb_classes), name='y')
-
- model = DarkonReplica(scope=ARCH_NAME[FLAGS.dataset], nb_classes=feeder.num_classes, n=5, input_shape=[32, 32, 3])
- preds = model.get_predicted_class(x)
- logits = model.get_logits(x)
- embeddings = model.get_embeddings(x)
-
- loss = CrossEntropy(model, smoothing=LABEL_SMOOTHING[FLAGS.dataset])
- regu_losses = WeightDecay(model)
- full_loss = WeightedSum(model, [(1.0, loss), (weight_decay, regu_losses)])
-
- # loading the checkpoint
- saver = tf.train.Saver()
- checkpoint_path = os.path.join(model_dir, 'best_model.ckpt')
- saver.restore(sess, checkpoint_path)
-
- # predict labels from trainset
- if USE_TRAIN_MINI:
- train_preds_file = os.path.join(model_dir, 'x_train_mini_preds.npy')
- train_features_file = os.path.join(model_dir, 'x_train_mini_features.npy')
- else:
- train_preds_file = os.path.join(model_dir, 'x_train_preds.npy')
- train_features_file = os.path.join(model_dir, 'x_train_features.npy')
- if not os.path.isfile(train_preds_file):
- tf_inputs = [x, y]
- tf_outputs = [preds, embeddings]
- numpy_inputs = [X_train, y_train]
-
- x_train_preds, x_train_features = batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, FLAGS.batch_size)
- x_train_preds = x_train_preds.astype(np.int32)
- np.save(train_preds_file, x_train_preds)
- np.save(train_features_file, x_train_features)
- else:
- x_train_preds = np.load(train_preds_file)
- x_train_features = np.load(train_features_file)
-
- # predict labels from validation set
- if not os.path.isfile(os.path.join(model_dir, 'x_val_preds.npy')):
- tf_inputs = [x, y]
- tf_outputs = [preds, embeddings]
- numpy_inputs = [X_val, y_val]
-
- x_val_preds, x_val_features = batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, FLAGS.batch_size)
- x_val_preds = x_val_preds.astype(np.int32)
- np.save(os.path.join(model_dir, 'x_val_preds.npy') , x_val_preds)
- np.save(os.path.join(model_dir, 'x_val_features.npy'), x_val_features)
- else:
- x_val_preds = np.load(os.path.join(model_dir, 'x_val_preds.npy'))
- x_val_features = np.load(os.path.join(model_dir, 'x_val_features.npy'))
-
- # predict labels from test set
- if not os.path.isfile(os.path.join(model_dir, 'x_test_preds.npy')):
- tf_inputs = [x, y]
- tf_outputs = [preds, embeddings]
- numpy_inputs = [X_test, y_test]
-
- x_test_preds, x_test_features = batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, FLAGS.batch_size)
- x_test_preds = x_test_preds.astype(np.int32)
- np.save(os.path.join(model_dir, 'x_test_preds.npy') , x_test_preds)
- np.save(os.path.join(model_dir, 'x_test_features.npy'), x_test_features)
- else:
- x_test_preds = np.load(os.path.join(model_dir, 'x_test_preds.npy'))
- x_test_features = np.load(os.path.join(model_dir, 'x_test_features.npy'))
-
- # initialize adversarial examples if necessary
- if not os.path.exists(os.path.join(attack_dir, 'X_val_adv.npy')):
- y_adv = tf.placeholder(tf.float32, shape=(None, nb_classes), name='y_adv')
-
- # Initialize the advarsarial attack object and graph
- deepfool_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0
- }
- jsma_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0,
- 'theta': 1.0,
- 'gamma': 0.1,
- }
- cw_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0,
- 'batch_size': 125, # NOTE: you might need to reduce the batch size if your GPU has low memory.
- 'confidence': 0.8,
- 'learning_rate': 0.01,
- 'initial_const': 0.1
- }
- fgsm_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0,
- 'eps': 0.1
- }
- pgd_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0,
- 'eps': 0.02,
- 'eps_iter': 0.002,
- 'ord': np.inf
- }
- ead_params = {
- 'clip_min': 0.0,
- 'clip_max': 1.0,
- 'batch_size': 125,
- 'confidence': 0.8,
- 'learning_rate': 0.01,
- 'initial_const': 0.1,
- 'decision_rule': 'L1'
- }
- if TARGETED:
- jsma_params.update({'y_target': y_adv})
- cw_params.update({'y_target': y_adv})
- fgsm_params.update({'y_target': y_adv})
- pgd_params.update({'y_target': y_adv})
- ead_params.update({'y_target': y_adv})
-
- if FLAGS.attack == 'deepfool':
- attack_params = deepfool_params
- attack_class = DeepFool
- elif FLAGS.attack == 'jsma':
- attack_params = jsma_params
- attack_class = SaliencyMapMethod
- elif FLAGS.attack == 'cw':
- attack_params = cw_params
- attack_class = CarliniWagnerL2
- elif FLAGS.attack == 'fgsm':
- attack_params = fgsm_params
- attack_class = FastGradientMethod
- elif FLAGS.attack == 'pgd':
- attack_params = pgd_params
- attack_class = MadryEtAl
- elif FLAGS.attack == 'ead':
- attack_params = ead_params
- attack_class = ElasticNetMethod
- else:
- raise AssertionError('Attack {} is not supported'.format(FLAGS.attack))
-
- attack = attack_class(model, sess=sess)
- adv_x = attack.generate(x, **attack_params)
- preds_adv = model.get_predicted_class(adv_x)
- logits_adv = model.get_logits(adv_x)
- embeddings_adv = model.get_embeddings(adv_x)
-
- # val attack
- tf_inputs = [x, y]
- tf_outputs = [adv_x, preds_adv, embeddings_adv]
- numpy_inputs = [X_val, y_val]
- if TARGETED:
- tf_inputs.append(y_adv)
- numpy_inputs.append(y_val_targets)
-
- X_val_adv, x_val_preds_adv, x_val_features_adv = batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, FLAGS.batch_size)
- x_val_preds_adv = x_val_preds_adv.astype(np.int32)
- np.save(os.path.join(attack_dir, 'X_val_adv.npy') , X_val_adv)
- np.save(os.path.join(attack_dir, 'x_val_preds_adv.npy') , x_val_preds_adv)
- np.save(os.path.join(attack_dir, 'x_val_features_adv.npy'), x_val_features_adv)
-
- # test attack
- tf_inputs = [x, y]
- tf_outputs = [adv_x, preds_adv, embeddings_adv]
- numpy_inputs = [X_test, y_test]
- if TARGETED:
- tf_inputs.append(y_adv)
- numpy_inputs.append(y_test_targets)
-
- X_test_adv, x_test_preds_adv, x_test_features_adv = batch_eval(sess, tf_inputs, tf_outputs, numpy_inputs, FLAGS.batch_size)
- x_test_preds_adv = x_test_preds_adv.astype(np.int32)
- np.save(os.path.join(attack_dir, 'X_test_adv.npy') , X_test_adv)
- np.save(os.path.join(attack_dir, 'x_test_preds_adv.npy') , x_test_preds_adv)
- np.save(os.path.join(attack_dir, 'x_test_features_adv.npy'), x_test_features_adv)
- else:
- X_val_adv = np.load(os.path.join(attack_dir, 'X_val_adv.npy'))
- x_val_preds_adv = np.load(os.path.join(attack_dir, 'x_val_preds_adv.npy'))
- x_val_features_adv = np.load(os.path.join(attack_dir, 'x_val_features_adv.npy'))
- X_test_adv = np.load(os.path.join(attack_dir, 'X_test_adv.npy'))
- x_test_preds_adv = np.load(os.path.join(attack_dir, 'x_test_preds_adv.npy'))
- x_test_features_adv = np.load(os.path.join(attack_dir, 'x_test_features_adv.npy'))
-
- # quick computations
- train_acc = np.mean(y_train_sparse == x_train_preds)
- val_acc = np.mean(y_val_sparse == x_val_preds)
- test_acc = np.mean(y_test_sparse == x_test_preds)
- val_adv_acc = np.mean(y_val_sparse == x_val_preds_adv)
- test_adv_acc = np.mean(y_test_sparse == x_test_preds_adv)
- print('train set acc: {}\nvalidation set acc: {}\ntest set acc: {}'.format(train_acc, val_acc, test_acc))
- print('adversarial ({}) validation set acc: {}\nadversarial ({}) test set acc: {}'.format(FLAGS.attack, val_adv_acc, FLAGS.attack, test_adv_acc))
-
- # what are the indices of the cifar10 set which the network succeeded classifying correctly,
- # but the adversarial attack changed to a different class?
- info = {}
- info['val'] = {}
- for i, set_ind in enumerate(feeder.val_inds):
- info['val'][i] = {}
- net_succ = x_val_preds[i] == y_val_sparse[i]
- attack_succ = x_val_preds[i] != x_val_preds_adv[i]
- info['val'][i]['global_index'] = set_ind
- info['val'][i]['net_succ'] = net_succ
- info['val'][i]['attack_succ'] = attack_succ
- info['test'] = {}
- for i, set_ind in enumerate(feeder.test_inds):
- info['test'][i] = {}
- net_succ = x_test_preds[i] == y_test_sparse[i]
- attack_succ = x_test_preds[i] != x_test_preds_adv[i]
- info['test'][i]['global_index'] = set_ind
- info['test'][i]['net_succ'] = net_succ
- info['test'][i]['attack_succ'] = attack_succ
-
- # calculate number of net_succ
- val_net_succ_indices = [ind for ind in info['val'] if info['val'][ind]['net_succ']]
- val_net_succ_attack_succ_indices = [ind for ind in info['val'] if info['val'][ind]['net_succ'] and info['val'][ind]['attack_succ']]
- test_net_succ_indices = [ind for ind in info['test'] if info['test'][ind]['net_succ']]
- test_net_succ_attack_succ_indices = [ind for ind in info['test'] if info['test'][ind]['net_succ'] and info['test'][ind]['attack_succ']]
- val_attack_rate = len(val_net_succ_attack_succ_indices) / len(val_net_succ_indices)
- test_attack_rate = len(test_net_succ_attack_succ_indices) / len(test_net_succ_indices)
- print('adversarial ({}) validation attack rate: {}\nadversarial ({}) test attack rate: {}'.format(FLAGS.attack, val_attack_rate, FLAGS.attack, test_attack_rate))
-
- info_file = os.path.join(attack_dir, 'info.pkl')
- if not os.path.isfile(info_file):
- print('saving info as pickle to {}'.format(info_file))
- with open(info_file, 'wb') as handle:
- pickle.dump(info, handle, protocol=pickle.HIGHEST_PROTOCOL)
- else:
- print('loading info as pickle from {}'.format(info_file))
- with open(info_file, 'rb') as handle:
- info_old = pickle.load(handle)
- assert info == info_old
-
- # start the knn observation
- knn = NearestNeighbors(n_neighbors=feeder.get_train_size(), p=2, n_jobs=20)
- knn.fit(x_train_features)
- if test_val_set:
- print('predicting knn for all val set')
- features = x_val_features
- features_adv = x_val_features_adv
- else:
- print('predicting knn for all test set')
- features = x_test_features
- features_adv = x_test_features_adv
- print('predicting knn dist/indices for normal image')
- all_neighbor_dists , all_neighbor_indices = knn.kneighbors(features, return_distance=True)
- print('predicting knn dist/indices for adv image')
- all_neighbor_dists_adv, all_neighbor_indices_adv = knn.kneighbors(features_adv, return_distance=True)
-
- # setting pred feeder. This is our feeder which is used to generate the features for the natural images
- pred_feeder = MyFeederValTest(dataset=FLAGS.dataset, rand_gen=rand_gen, as_one_hot=True,
- val_inds=feeder.val_inds, test_val_set=test_val_set, mini_train_inds=mini_train_inds)
- pred_feeder.val_origin_data = X_val
- pred_feeder.val_data = X_val
- pred_feeder.val_label = one_hot(x_val_preds, feeder.num_classes).astype(np.float32)
- pred_feeder.test_origin_data = X_test
- pred_feeder.test_data = X_test
- pred_feeder.test_label = one_hot(x_test_preds, feeder.num_classes).astype(np.float32)
-
- # setting adv feeder. This is our feeder which is used to generate the features for the adv images
- adv_feeder = MyFeederValTest(dataset=FLAGS.dataset, rand_gen=rand_gen, as_one_hot=True,
- val_inds=feeder.val_inds, test_val_set=test_val_set, mini_train_inds=mini_train_inds)
- adv_feeder.val_origin_data = X_val_adv
- adv_feeder.val_data = X_val_adv
- adv_feeder.val_label = one_hot(x_val_preds_adv, feeder.num_classes).astype(np.float32)
- adv_feeder.test_origin_data = X_test_adv
- adv_feeder.test_data = X_test_adv
- adv_feeder.test_label = one_hot(x_test_preds_adv, feeder.num_classes).astype(np.float32)
-
- # now finding the influence
- feeder.reset()
- pred_feeder.reset()
- adv_feeder.reset()
-
- inspector_pred = darkon.Influence(
- workspace=os.path.join(workspace_dir, 'pred'),
- feeder=pred_feeder,
- loss_op_train=full_loss.fprop(x=x, y=y),
- loss_op_test=loss.fprop(x=x, y=y),
- x_placeholder=x,
- y_placeholder=y)
-
- inspector_adv = darkon.Influence(
- workspace=os.path.join(workspace_dir, 'adv', FLAGS.attack),
- feeder=adv_feeder,
- loss_op_train=full_loss.fprop(x=x, y=y),
- loss_op_test=loss.fprop(x=x, y=y),
- x_placeholder=x,
- y_placeholder=y)
-
- # some optimizations for the darkon influence function implementations
- testset_batch_size = 100
- train_batch_size = 200
- train_iterations = 25 if USE_TRAIN_MINI else 245 # 5k(25x200) or 49k(245x200)
- approx_params = {
- 'scale': 200,
- 'num_repeats': 5,
- 'recursion_depth': 5 if USE_TRAIN_MINI else 49, # 5k(5x5x200) or 49k(5x49x200)
- 'recursion_batch_size': 200
- }
-
- sub_relevant_indices = [ind for ind in info[FLAGS.set]]
- relevant_indices = [info[FLAGS.set][ind]['global_index'] for ind in sub_relevant_indices]
-
- # calculate knn_ranks
- def find_ranks(sub_index, sorted_influence_indices, adversarial=False):
- print('Finding ranks for sub_index={} (adversarial={})'.format(sub_index, adversarial))
- if adversarial:
- ni = all_neighbor_indices_adv
- nd = all_neighbor_dists_adv
- else:
- ni = all_neighbor_indices
- nd = all_neighbor_dists
-
- ranks = -1 * np.ones(len(sorted_influence_indices), dtype=np.int32)
- dists = -1 * np.ones(len(sorted_influence_indices), dtype=np.float32)
- for target_idx in range(ranks.shape[0]):
- idx = sorted_influence_indices[target_idx]
- loc_in_knn = np.where(ni[sub_index] == idx)[0][0]
- knn_dist = nd[sub_index, loc_in_knn]
- ranks[target_idx] = loc_in_knn
- dists[target_idx] = knn_dist
- return ranks, dists
-
-
- for i in tqdm(range(len(sub_relevant_indices))):
- sub_index = sub_relevant_indices[i]
- if test_val_set:
- global_index = feeder.val_inds[sub_index]
- else:
- global_index = feeder.test_inds[sub_index]
- assert global_index == relevant_indices[i]
-
- _, real_label = feeder.test_indices(sub_index)
- real_label = np.argmax(real_label)
-
- if test_val_set:
- pred_label = x_val_preds[sub_index]
- else:
- pred_label = x_test_preds[sub_index]
-
- _, adv_label = adv_feeder.test_indices(sub_index)
- adv_label = np.argmax(adv_label)
-
- if info[FLAGS.set][sub_index]['attack_succ']:
- assert pred_label != adv_label, 'failed for i={}, sub_index={}, global_index={}'.format(i, sub_index, global_index)
- if info[FLAGS.set][sub_index]['net_succ']:
- assert pred_label == real_label, 'failed for i={}, sub_index={}, global_index={}'.format(i, sub_index, global_index)
-
- progress_str = 'sample {}/{}: calculating scores for {} index {} (sub={}).\n' \
- 'real label: {}, adv label: {}, pred label: {}. net_succ={}, attack_succ={}' \
- .format(i + 1, len(sub_relevant_indices), FLAGS.set, global_index, sub_index,
- _classes[FLAGS.dataset][real_label], _classes[FLAGS.dataset][adv_label], _classes[FLAGS.dataset][pred_label],
- info[FLAGS.set][sub_index]['net_succ'], info[FLAGS.set][sub_index]['attack_succ'])
- logging.info(progress_str)
- print(progress_str)
-
- cases = ['pred', 'adv']
- for case in cases:
- if case == 'pred':
- insp = inspector_pred
- feed = pred_feeder
- ni = all_neighbor_indices
- nd = all_neighbor_dists
- elif case == 'adv':
- insp = inspector_adv
- feed = adv_feeder
- ni = all_neighbor_indices_adv
- nd = all_neighbor_dists_adv
-
- # creating the relevant index folders
- dir = os.path.join(model_dir, FLAGS.set, FLAGS.set + '_index_{}'.format(global_index), case)
- if case == 'adv':
- dir = os.path.join(dir, FLAGS.attack)
- if not os.path.exists(dir):
- os.makedirs(dir)
-
- if os.path.exists(os.path.join(dir, 'scores.npy')):
- print('calcaulation for global index {} was already done. Leaving it'.format(global_index))
- continue
-
- start_time = time.time()
- scores = insp.upweighting_influence_batch(
- sess=sess,
- test_indices=[sub_index],
- test_batch_size=testset_batch_size,
- approx_params=approx_params,
- train_batch_size=train_batch_size,
- train_iterations=train_iterations)
- print('ihvp + scores calculation time: {} secs. global_index: {} (sub: {}), case: {}'
- .format(time.time() - start_time, global_index, sub_index, case))
-
- np.save(os.path.join(dir, 'scores.npy'), scores)
-
-
- # Just plotting and extra information. Not mandatory to go over it, but useful for visualization and debugging.
- print('saving image to {}'.format(os.path.join(dir, 'image.npy/png')))
- image, _ = feed.test_indices(sub_index)
- imageio.imwrite(os.path.join(dir, 'image.png'), image)
- np.save(os.path.join(dir, 'image.npy'), image)
-
- sorted_indices = np.argsort(scores)
- harmful = sorted_indices[:50]
- helpful = sorted_indices[-50:][::-1]
-
- # have some figures
- cnt_harmful_in_knn = 0
- print('\nHarmful:')
- for idx in harmful:
- print('[{}] {}'.format(feed.get_global_index('train', idx), scores[idx]))
- if idx in ni[sub_index, 0:50]:
- cnt_harmful_in_knn += 1
- harmful_summary_str = '{}: {} out of {} harmful images are in the {}-NN\n'.format(case, cnt_harmful_in_knn, len(harmful), 50)
- print(harmful_summary_str)
-
- cnt_helpful_in_knn = 0
- print('\nHelpful:')
- for idx in helpful:
- print('[{}] {}'.format(feed.get_global_index('train', idx), scores[idx]))
- if idx in ni[sub_index, 0:50]:
- cnt_helpful_in_knn += 1
- helpful_summary_str = '{}: {} out of {} helpful images are in the {}-NN\n'.format(case, cnt_helpful_in_knn, len(helpful), 50)
- print(helpful_summary_str)
-
- fig, axes1 = plt.subplots(5, 10, figsize=(30, 10))
- target_idx = 0
- for j in range(5):
- for k in range(10):
- idx = ni[sub_index, target_idx]
- axes1[j][k].set_axis_off()
- axes1[j][k].imshow(X_train[idx])
- label_str = _classes[FLAGS.dataset][y_train_sparse[idx]]
- axes1[j][k].set_title('[{}]: {}'.format(feed.get_global_index('train', idx), label_str))
- target_idx += 1
- plt.savefig(os.path.join(dir, 'nearest_neighbors.png'), dpi=350)
- plt.close()
-
- helpful_ranks, helpful_dists = find_ranks(sub_index, sorted_indices[-1000:][::-1], case == 'adv')
- harmful_ranks, harmful_dists = find_ranks(sub_index, sorted_indices[:1000], case == 'adv')
-
- print('saving knn ranks and dists to {}'.format(dir))
- np.save(os.path.join(dir, 'helpful_ranks.npy'), helpful_ranks)
- np.save(os.path.join(dir, 'helpful_dists.npy'), helpful_dists)
- np.save(os.path.join(dir, 'harmful_ranks.npy'), harmful_ranks)
- np.save(os.path.join(dir, 'harmful_dists.npy'), harmful_dists)
-
- fig, axes1 = plt.subplots(5, 10, figsize=(30, 10))
- target_idx = 0
- for j in range(5):
- for k in range(10):
- idx = helpful[target_idx]
- axes1[j][k].set_axis_off()
- axes1[j][k].imshow(X_train[idx])
- label_str = _classes[FLAGS.dataset][y_train_sparse[idx]]
- loc_in_knn = np.where(ni[sub_index] == idx)[0][0]
- axes1[j][k].set_title('[{}]: {} #nn:{}'.format(feed.get_global_index('train', idx), label_str, loc_in_knn))
- target_idx += 1
- plt.savefig(os.path.join(dir, 'helpful.png'), dpi=350)
- plt.close()
-
- fig, axes1 = plt.subplots(5, 10, figsize=(30, 10))
- target_idx = 0
- for j in range(5):
- for k in range(10):
- idx = harmful[target_idx]
- axes1[j][k].set_axis_off()
- axes1[j][k].imshow(X_train[idx])
- label_str = _classes[FLAGS.dataset][y_train_sparse[idx]]
- loc_in_knn = np.where(ni[sub_index] == idx)[0][0]
- axes1[j][k].set_title('[{}]: {} #nn:{}'.format(feed.get_global_index('train', idx), label_str, loc_in_knn))
- target_idx += 1
- plt.savefig(os.path.join(dir, 'harmful.png'), dpi=350)
- plt.close()
-
- # getting two ranks - one rank for the real label and another rank for the adv label.
- # what is a "rank"?
- # A rank is the average nearest neighbor location of all the helpful training indices.
- with open(os.path.join(dir, 'summary.txt'), 'w+') as f:
- f.write(harmful_summary_str)
- f.write(helpful_summary_str)
- f.write('label ({} -> {}). pred: {}. {} \nhelpful/harmful_rank mean: {}/{}\nhelpful/harmful_dist mean: {}/{}' \
- .format(_classes[FLAGS.dataset][real_label], _classes[FLAGS.dataset][adv_label],
- _classes[FLAGS.dataset][pred_label], case, helpful_ranks.mean(), harmful_ranks.mean(),
- helpful_dists.mean(), harmful_dists.mean()))
|