“”“Functions for building the face recognition network.
“””
MIT License
Copyright © 2016 David Sandberg
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
pylint: disable=missing-docstring
from future import absolute_import
from future import division
from future import print_function
import math
import os
import random
import re
from subprocess import Popen, PIPE
import numpy as np
import tensorflow as tf
from scipy import interpolate
from scipy import misc
from six import iteritems
from sklearn.model_selection import KFold
from tensorflow.python.platform import gfile
from tensorflow.python.training import training
def triplet_loss(anchor, positive, negative, alpha):
“”"Calculate the triplet loss according to the FaceNet paper
Args:
anchor: the embeddings for the anchor images.
positive: the embeddings for the positive images.
negative: the embeddings for the negative images.
Returns:
the triplet loss according to the FaceNet paper as a float tensor.
"""
with tf.variable_scope('triplet_loss'):
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
def center_loss(features, label, alfa, nrof_classes):
“”“Center loss based on the paper “A Discriminative Feature Learning Approach for Deep Face Recognition”
(http://ydwen.github.io/papers/WenECCV16.pdf)
“””
nrof_features = features.get_shape()[1]
centers = tf.get_variable(‘centers’, [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
centers = tf.scatter_sub(centers, label, diff)
with tf.control_dependencies([centers]):
loss = tf.reduce_mean(tf.square(features - centers_batch))
return loss, centers
def get_image_paths_and_labels(dataset):
image_paths_flat = []
labels_flat = []
for i in range(len(dataset)):
image_paths_flat += dataset[i].image_paths
labels_flat += [i] * len(dataset[i].image_paths)
return image_paths_flat, labels_flat
def shuffle_examples(image_paths, labels):
shuffle_list = list(zip(image_paths, labels))
random.shuffle(shuffle_list)
image_paths_shuff, labels_shuff = zip(*shuffle_list)
return image_paths_shuff, labels_shuff
def random_rotate_image(image):
angle = np.random.uniform(low=-10.0, high=10.0)
return misc.imrotate(image, angle, ‘bicubic’)
1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip
RANDOM_ROTATE = 1
RANDOM_CROP = 2
RANDOM_FLIP = 4
FIXED_STANDARDIZATION = 8
FLIP = 16
def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder):
images_and_labels_list = []
for _ in range(nrof_preprocess_threads):
filenames, label, control = input_queue.dequeue()
images = []
for filename in tf.unstack(filenames):
file_contents = tf.read_file(filename)
image = tf.image.decode_image(file_contents, 3)
image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE),
lambda: tf.py_func(random_rotate_image, [image], tf.uint8),
lambda: tf.identity(image))
image = tf.cond(get_control_flag(control[0], RANDOM_CROP),
lambda: tf.random_crop(image, image_size + (3,)),
lambda: tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1]))
image = tf.cond(get_control_flag(control[0], RANDOM_FLIP),
lambda: tf.image.random_flip_left_right(image),
lambda: tf.identity(image))
image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION),
lambda: (tf.cast(image, tf.float32) - 127.5) / 128.0,
lambda: tf.image.per_image_standardization(image))
image = tf.cond(get_control_flag(control[0], FLIP),
lambda: tf.image.flip_left_right(image),
lambda: tf.identity(image))
# pylint: disable=no-member
image.set_shape(image_size + (3,))
images.append(image)
images_and_labels_list.append([images, label])
image_batch, label_batch = tf.train.batch_join(
images_and_labels_list, batch_size=batch_size_placeholder,
shapes=[image_size + (3,), ()], enqueue_many=True,
capacity=4 * nrof_preprocess_threads * 100,
allow_smaller_final_batch=True)
return image_batch, label_batch
def get_control_flag(control, field):
return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1)
def _add_loss_summaries(total_loss):
“”"Add summaries for losses.
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
Args:
total_loss: Total loss from loss().
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summmary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.summary.scalar(l.op.name + ' (raw)', l)
tf.summary.scalar(l.op.name, loss_averages.average(l))
return loss_averages_op
def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars,
log_histograms=True):
# Generate moving averages of all losses and associated summaries.
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
if optimizer == 'ADAGRAD':
opt = tf.train.AdagradOptimizer(learning_rate)
elif optimizer == 'ADADELTA':
opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
elif optimizer == 'ADAM':
opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
elif optimizer == 'RMSPROP':
opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
elif optimizer == 'MOM':
opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
else:
raise ValueError('Invalid optimization algorithm')
grads = opt.compute_gradients(total_loss, update_gradient_vars)
# Apply gradients.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
if log_histograms:
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
if log_histograms:
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
moving_average_decay, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def prewhiten(x):
mean = np.mean(x)
std = np.std(x)
std_adj = np.maximum(std, 1.0 / np.sqrt(x.size))
y = np.multiply(np.subtract(x, mean), 1 / std_adj)
return y
def crop(image, random_crop, image_size):
if image.shape[1] > image_size:
sz1 = int(image.shape[1] // 2)
sz2 = int(image_size // 2)
if random_crop:
diff = sz1 - sz2
(h, v) = (np.random.randint(-diff, diff + 1), np.random.randint(-diff, diff + 1))
else:
(h, v) = (0, 0)
image = image[(sz1 - sz2 + v):(sz1 + sz2 + v), (sz1 - sz2 + h):(sz1 + sz2 + h), :]
return image
def flip(image, random_flip):
if random_flip and np.random.choice([True, False]):
image = np.fliplr(image)
return image
def to_rgb(img):
w, h = img.shape
ret = np.empty((w, h, 3), dtype=np.uint8)
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
return ret
def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
nrof_samples = len(image_paths)
images = np.zeros((nrof_samples, image_size, image_size, 3))
for i in range(nrof_samples):
img = misc.imread(image_paths[i])
if img.ndim == 2:
img = to_rgb(img)
if do_prewhiten:
img = prewhiten(img)
img = crop(img, do_random_crop, image_size)
img = flip(img, do_random_flip)
images[i, :, :, :] = img
return images
def get_label_batch(label_data, batch_size, batch_index):
nrof_examples = np.size(label_data, 0)
j = batch_index * batch_size % nrof_examples
if j + batch_size <= nrof_examples:
batch = label_data[j:j + batch_size]
else:
x1 = label_data[j:nrof_examples]
x2 = label_data[0:nrof_examples - j]
batch = np.vstack([x1, x2])
batch_int = batch.astype(np.int64)
return batch_int
def get_batch(image_data, batch_size, batch_index):
nrof_examples = np.size(image_data, 0)
j = batch_index * batch_size % nrof_examples
if j + batch_size <= nrof_examples:
batch = image_data[j:j + batch_size, :, :, :]
else:
x1 = image_data[j:nrof_examples, :, :, :]
x2 = image_data[0:nrof_examples - j, :, :, :]
batch = np.vstack([x1, x2])
batch_float = batch.astype(np.float32)
return batch_float
def get_triplet_batch(triplets, batch_index, batch_size):
ax, px, nx = triplets
a = get_batch(ax, int(batch_size / 3), batch_index)
p = get_batch(px, int(batch_size / 3), batch_index)
n = get_batch(nx, int(batch_size / 3), batch_index)
batch = np.vstack([a, p, n])
return batch
def get_learning_rate_from_file(filename, epoch):
with open(filename, ‘r’) as f:
for line in f.readlines():
line = line.split(’#’, 1)[0]
if line:
par = line.strip().split(’:’)
e = int(par[0])
if par[1] == ‘-’:
lr = -1
else:
lr = float(par[1])
if e <= epoch:
learning_rate = lr
else:
return learning_rate
class ImageClass():
“Stores the paths to images for a given class”
def __init__(self, name, image_paths):
self.name = name
self.image_paths = image_paths
def __str__(self):
return self.name + ', ' + str(len(self.image_paths)) + ' images'
def __len__(self):
return len(self.image_paths)
def get_dataset(path, has_class_directories=True):
dataset = []
path_exp = os.path.expanduser(path)
classes = [path for path in os.listdir(path_exp) if os.path.isdir(os.path.join(path_exp, path))]
classes.sort()
nrof_classes = len(classes)
for i in range(nrof_classes):
class_name = classes[i]
facedir = os.path.join(path_exp, class_name)
image_paths = get_image_paths(facedir)
dataset.append(ImageClass(class_name, image_paths))
return dataset
def get_image_paths(facedir):
image_paths = []
if os.path.isdir(facedir):
images = os.listdir(facedir)
image_paths = [os.path.join(facedir, img) for img in images]
return image_paths
def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode):
if mode == ‘SPLIT_CLASSES’:
nrof_classes = len(dataset)
class_indices = np.arange(nrof_classes)
np.random.shuffle(class_indices)
split = int(round(nrof_classes * (1 - split_ratio)))
train_set = [dataset[i] for i in class_indices[0:split]]
test_set = [dataset[i] for i in class_indices[split:-1]]
elif mode == ‘SPLIT_IMAGES’:
train_set = []
test_set = []
for cls in dataset:
paths = cls.image_paths
np.random.shuffle(paths)
nrof_images_in_class = len(paths)
split = int(math.floor(nrof_images_in_class * (1 - split_ratio)))
if split == nrof_images_in_class:
split = nrof_images_in_class - 1
if split >= min_nrof_images_per_class and nrof_images_in_class - split >= 1:
train_set.append(ImageClass(cls.name, paths[:split]))
test_set.append(ImageClass(cls.name, paths[split:]))
else:
raise ValueError(‘Invalid train/test split mode “%s”’ % mode)
return train_set, test_set
def load_model(model, input_map=None):
# Check if the model is a model directory (containing a metagraph and a checkpoint file)
# or if it is a protobuf file with a frozen graph
model_exp = os.path.expanduser(model)
if (os.path.isfile(model_exp)):
print(‘Model filename: %s’ % model_exp)
with gfile.FastGFile(model_exp, ‘rb’) as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, input_map=input_map, name=’’)
else:
print(‘Model directory: %s’ % model_exp)
meta_file, ckpt_file = get_model_filenames(model_exp)
print('Metagraph file: %s' % meta_file)
print('Checkpoint file: %s' % ckpt_file)
saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
def get_model_filenames(model_dir):
files = os.listdir(model_dir)
meta_files = [s for s in files if s.endswith(’.meta’)]
if len(meta_files) == 0:
raise ValueError(‘No meta file found in the model directory (%s)’ % model_dir)
elif len(meta_files) > 1:
raise ValueError(‘There should not be more than one meta file in the model directory (%s)’ % model_dir)
meta_file = meta_files[0]
ckpt = tf.train.get_checkpoint_state(model_dir)
if ckpt and ckpt.model_checkpoint_path:
ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
return meta_file, ckpt_file
meta_files = [s for s in files if '.ckpt' in s]
max_step = -1
for f in files:
step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
if step_str is not None and len(step_str.groups()) >= 2:
step = int(step_str.groups()[1])
if step > max_step:
max_step = step
ckpt_file = step_str.groups()[0]
return meta_file, ckpt_file
def distance(embeddings1, embeddings2, distance_metric=0):
if distance_metric == 0:
# Euclidian distance
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff), 1)
dist = np.sqrt(dist)
elif distance_metric == 1:
# Distance based on cosine similarity
dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
similarity = dot / norm
dist = np.arccos(similarity) / math.pi
else:
raise ‘Undefined distance metric %d’ % distance_metric
return dist
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0,
subtract_mean=False):
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds, nrof_thresholds))
fprs = np.zeros((nrof_folds, nrof_thresholds))
accuracy = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1 - mean, embeddings2 - mean, distance_metric)
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx], _ = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _, _ = \
calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
_, _, accuracy[fold_idx], _ = calculate_accuracy(thresholds[best_threshold_index],
dist[test_set],
actual_issame[test_set])
tpr = np.mean(tprs, 0)
fpr = np.mean(fprs, 0)
return tpr, fpr, accuracy
def calculate_accuracy(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
tp = np.sum(np.logical_and(predict_issame, actual_issame))
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
acc = np.round(float(tp + tn) / dist.size, 8)
xor = np.logical_xor(predict_issame, actual_issame)
index = np.where(xor == 1)
return tpr, fpr, acc, index[0]
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0,
subtract_mean=False):
assert (embeddings1.shape[0] == embeddings2.shape[0])
assert (embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1 - mean, embeddings2 - mean, distance_metric)
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train) >= far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
def calculate_val_far(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
n_same = np.sum(actual_issame)
n_diff = np.sum(np.logical_not(actual_issame))
val = float(true_accept) / float(n_same)
far = float(false_accept) / float(n_diff)
return val, far
def store_revision_info(src_path, output_dir, arg_string):
try:
# Get git hash
cmd = [‘git’, ‘rev-parse’, ‘HEAD’]
gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
(stdout, _) = gitproc.communicate()
git_hash = stdout.strip()
except OSError as e:
git_hash = ’ '.join(cmd) + ': ’ + e.strerror
try:
# Get local changes
cmd = ['git', 'diff', 'HEAD']
gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
(stdout, _) = gitproc.communicate()
git_diff = stdout.strip()
except OSError as e:
git_diff = ' '.join(cmd) + ': ' + e.strerror
# Store a text file in the log directory
rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
with open(rev_info_filename, "w") as text_file:
text_file.write('arguments: %s\n--------------------\n' % arg_string)
text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable
text_file.write('git hash: %s\n--------------------\n' % git_hash)
text_file.write('%s' % git_diff)
def list_variables(filename):
reader = training.NewCheckpointReader(filename)
variable_map = reader.get_variable_to_shape_map()
names = sorted(variable_map.keys())
return names
def put_images_on_grid(images, shape=(16, 8)):
nrof_images = images.shape[0]
img_size = images.shape[1]
bw = 3
img = np.zeros((shape[1] * (img_size + bw) + bw, shape[0] * (img_size + bw) + bw, 3), np.float32)
for i in range(shape[1]):
x_start = i * (img_size + bw) + bw
for j in range(shape[0]):
img_index = i * shape[0] + j
if img_index >= nrof_images:
break
y_start = j * (img_size + bw) + bw
img[x_start:x_start + img_size, y_start:y_start + img_size, :] = images[img_index, :, :, :]
if img_index >= nrof_images:
break
return img
def write_arguments_to_file(args, filename):
with open(filename, ‘w’) as f:
for key, value in iteritems(vars(args)):
f.write(’%s: %s\n’ % (key, str(value)))