From df2638b5bf2c4abd2d4aacc78d0cf3a648af0045 Mon Sep 17 00:00:00 2001 From: Daniel Ecer <de-code@users.noreply.github.com> Date: Sat, 29 Jul 2017 21:43:40 +0100 Subject: [PATCH] added more detailed scores --- sciencebeam_gym/trainer/evaluator.py | 33 +++++++++++++ .../trainer/models/pix2pix/evaluate.py | 46 ++++++++++++++++--- .../trainer/models/pix2pix/pix2pix_model.py | 2 +- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/sciencebeam_gym/trainer/evaluator.py b/sciencebeam_gym/trainer/evaluator.py index 230f940..978b1c8 100644 --- a/sciencebeam_gym/trainer/evaluator.py +++ b/sciencebeam_gym/trainer/evaluator.py @@ -56,6 +56,20 @@ def save_file(filename, data): with FileIO(filename, 'wb') as f: f.write(data) +def precision_from_tp_fp(tp, fp): + return tp / (tp + fp) + +def recall_from_tp_fn(tp, fn): + return tp / (tp + fn) + +def f1_from_precision_recall(precision, recall): + return 2 * precision * recall / (precision + recall) + +def f1_from_tp_fp_fn(tp, fp, fn): + return f1_from_precision_recall( + precision_from_tp_fp(tp, fp), + recall_from_tp_fn(tp, fn) + ) IMAGE_PREFIX = 'image_' @@ -106,6 +120,10 @@ class Evaluator(object): def _add_evaluation_result_fetches(self, fetches, tensors): if tensors.evaluation_result: + fetches['tp'] = tensors.evaluation_result.tp + fetches['fp'] = tensors.evaluation_result.fp + fetches['fn'] = tensors.evaluation_result.fn + fetches['tn'] = tensors.evaluation_result.tn fetches['accuracy'] = tensors.evaluation_result.accuracy fetches['micro_f1'] = tensors.evaluation_result.micro_f1 return fetches @@ -114,6 +132,10 @@ class Evaluator(object): if accumulated_results is None: accumulated_results = [] accumulated_results.append({ + 'tp': results['tp'], + 'fp': results['fp'], + 'fn': results['fn'], + 'tn': results['tn'], 'accuracy': results['accuracy'], 'micro_f1': results['micro_f1'], 'count': self.batch_size, @@ -129,10 +151,21 @@ class Evaluator(object): global_step ) ) + tp = np.sum([r['tp'] for r in accumulated_results], axis=0) + fp = np.sum([r['fp'] for r in accumulated_results], axis=0) + fn = np.sum([r['fn'] for r in accumulated_results], axis=0) + tn = np.sum([r['tn'] for r in accumulated_results], axis=0) + f1 = f1_from_tp_fp_fn(tp.astype(float), fp, fn) scores_str = json.dumps({ 'global_step': global_step, 'accuracy': float(np.mean([r['accuracy'] for r in accumulated_results])), + 'tp': tp.tolist(), + 'fp': fp.tolist(), + 'fn': fn.tolist(), + 'tn': tn.tolist(), + 'f1': f1.tolist(), 'micro_f1': float(np.mean([r['micro_f1'] for r in accumulated_results])), + 'macro_f1': float(np.mean(f1)), 'count': sum([r['count'] for r in accumulated_results]) }, indent=2) with FileIO(scores_file, 'w') as f: diff --git a/sciencebeam_gym/trainer/models/pix2pix/evaluate.py b/sciencebeam_gym/trainer/models/pix2pix/evaluate.py index 7f1efe6..fc1ea75 100644 --- a/sciencebeam_gym/trainer/models/pix2pix/evaluate.py +++ b/sciencebeam_gym/trainer/models/pix2pix/evaluate.py @@ -11,10 +11,17 @@ EvaluationTensors = collections.namedtuple( "tp", "fp", "fn", + "tn", + "precision", + "recall", + "f1", "accuracy", "micro_precision", "micro_recall", - "micro_f1" + "micro_f1", + "macro_precision", + "macro_recall", + "macro_f1" ] ) @@ -24,28 +31,52 @@ def output_probabilities_to_class(outputs): def to_1d_vector(tensor): return tf.reshape(tensor, [-1]) +def precision_from_tp_fp(tp, fp): + return tp / (tp + fp) + +def recall_from_tp_fn(tp, fn): + return tp / (tp + fn) + +def f1_from_precision_recall(precision, recall): + return 2 * precision * recall / (precision + recall) + def _evaluate_from_confusion_matrix(confusion, accuracy=None): + total = tf.reduce_sum(confusion) actual_p = tf.reduce_sum(confusion, axis=0) pred_p = tf.reduce_sum(confusion, axis=1) tp = tf.diag_part(confusion) fp = actual_p - tp fn = pred_p - tp + tn = total - tp - fp - fn + precision = precision_from_tp_fp(tp, fp) + recall = recall_from_tp_fn(tp, fn) + f1 = f1_from_precision_recall(precision, recall) total_tp = tf.reduce_sum(tp) total_fp = tf.reduce_sum(fp) total_fn = tf.reduce_sum(fn) # Note: micro averages (with equal weights) will lead to the same precision, recall, f1 - micro_precision = total_tp / (total_tp + total_fp) - micro_recall = total_tp / (total_tp + total_fn) - micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) + micro_precision = precision_from_tp_fp(total_tp, total_fp) + micro_recall = recall_from_tp_fn(total_tp, total_fn) + micro_f1 = f1_from_precision_recall(micro_precision, micro_recall) + macro_precision = tf.reduce_sum(precision) + macro_recall = tf.reduce_sum(recall) + macro_f1 = tf.reduce_sum(f1) return EvaluationTensors( confusion_matrix=confusion, tp=tp, fp=fp, fn=fn, + tn=tn, + precision=precision, + recall=recall, + f1=f1, accuracy=accuracy, micro_precision=micro_precision, micro_recall=micro_recall, - micro_f1=micro_f1 + micro_f1=micro_f1, + macro_precision=macro_precision, + macro_recall=macro_recall, + macro_f1=macro_f1 ) def evaluate_predictions(labels, predictions, n_classes, has_unknown_class=False): @@ -80,8 +111,11 @@ def evaluate_separate_channels(targets, outputs, has_unknown_class=False): ) -def evaluation_summary(evaluation_tensors): +def evaluation_summary(evaluation_tensors, layer_labels): tf.summary.scalar("micro_precision", evaluation_tensors.micro_precision) tf.summary.scalar("micro_recall", evaluation_tensors.micro_recall) tf.summary.scalar("micro_f1", evaluation_tensors.micro_f1) + tf.summary.scalar("macro_f1", evaluation_tensors.macro_f1) tf.summary.scalar("accuracy", evaluation_tensors.accuracy) + for i, layer_label in enumerate(layer_labels): + tf.summary.scalar("f1_{}_{}".format(i, layer_label), evaluation_tensors.f1[i]) diff --git a/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py b/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py index d5d32d0..a94615f 100644 --- a/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py +++ b/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py @@ -342,7 +342,7 @@ class Model(object): has_unknown_class=self.use_unknown_class ) tensors.evaluation_result = evaluation_result - evaluation_summary(evaluation_result) + evaluation_summary(evaluation_result, self.dimension_labels) tensors.global_step = pix2pix_model.global_step tensors.train = pix2pix_model.train -- GitLab