From 2f8fb51bf1c094858082cda6e6d466210d9ca989 Mon Sep 17 00:00:00 2001
From: moiseslodeiro <moises.lodeiro@gmail.com>
Date: Fri, 6 Jul 2018 09:48:10 +0000
Subject: [PATCH 1/2] Added sizeChecker.py to check that all bounding boxes
 haves at least 33x33

---
 sizeChecker.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 sizeChecker.py

diff --git a/sizeChecker.py b/sizeChecker.py
new file mode 100644
index 00000000..0d095ced
--- /dev/null
+++ b/sizeChecker.py
@@ -0,0 +1,74 @@
+#!/bin/python3
+# -*- encode: utf-8 -*-
+
+"""
+This mini program looks for xmin, ymin, xmax and ymax values in order to
+check if the figures have the proper size (min 33px)
+Version: 0.1
+Author: Moisés Lodeiro-Santiago @ https://github.com/moiseslodeiro
+"""
+
+from xml.etree import ElementTree
+from termcolor import colored
+from os.path import isdir
+from os import makedirs, rmdir
+from sys import exit
+from glob import glob as xmlFiles
+import shutil
+import argparse
+
+# Directories
+train_directory = './images/train' # It should contain the xml files with bounding boxes
+test_directory = './images/train' # It should contain the xml files with bounding boxes
+
+# Keep calm and do not touch the rest of the code :3
+parser = argparse.ArgumentParser()
+parser.add_argument("--move", help="Put all wrong xml and images to a wrong_data folder inside each folder", action="store_true")
+args = parser.parse_args()
+
+if not isdir(train_directory) or not isdir(test_directory):
+   print(colored('[!]', 'yellow', attrs=['bold']), colored('The training or test directories do not exist'))
+   exit(1)
+else:
+    print(colored('[Ok]', 'green'), colored('Directories exists'))
+
+everythingWentAsExpected = True
+
+for tree in [train_directory, test_directory]:
+    if args.move and not isdir(tree + '/wrong_data'):
+        makedirs(tree + '/wrong_data')
+
+    for file in xmlFiles(tree + '/*.xml'):
+       xmlFile = ElementTree.parse(file)
+       boxes = xmlFile.findall('object/bndbox')
+       for box in boxes:
+          xmin, ymin, xmax, ymax = box.getchildren()
+          x_value = int(xmax.text) - int(xmin.text)
+          y_value = int(ymax.text) - int(ymin.text)
+          
+          if x_value < 33 or y_value < 33:
+             print(colored('[!]', 'red'), 'File {} contains a bounding box smaller than 32 in height or width'.format(file))
+             print(colored('xmax - xmin', 'yellow', attrs=['bold']), x_value)
+             print(colored('ymax - ymin', 'yellow', attrs=['bold']), y_value)
+             everythingWentAsExpected = False
+             
+             if args.move:
+                wrongPicture = xmlFile.find('filename')
+                try:
+                    shutil.move(file, tree + '/wrong_data/')
+                    shutil.move(tree + '/' + wrongPicture.text, tree + '/wrong_data/')
+                    print(colored('Files moved to' + tree + '/wrong_data', 'blue'))
+                except Exception as e:
+                    print(colored(e, 'blue'))
+
+if everythingWentAsExpected:
+    print(colored('[Ok]', 'green'), 'All bounding boxes are equal or larger than 32 :-)')
+    try:
+       rmdir(train_directory + '/wrong_data')
+       rmdir(test_directory + '/wrong_data')
+    except OSError:
+       print(colored('[Info]', 'blue'), 'Directories wrong_data were not removed because they contain some files')
+
+else:
+   print()
+   print(colored('[Error]', 'red'), ' (╯°□°)╯ ┻━┻')

From 1b7dad76d90786704efd0793229fd5e8b31305ba Mon Sep 17 00:00:00 2001
From: moiseslodeiro <moises.lodeiro@gmail.com>
Date: Fri, 6 Jul 2018 09:53:12 +0000
Subject: [PATCH 2/2] Changes in README to include sizeChecker.py as optional
 part after labeling pictures

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index e22cd0b9..6f4f6c57 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,12 @@ Download and install LabelImg, point it to your \images\train directory, and the
 
 LabelImg saves a .xml file containing the label data for each image. These .xml files will be used to generate TFRecords, which are one of the inputs to the TensorFlow trainer. Once you have labeled and saved each image, there will be one .xml file for each image in the \test and \train directories.
 
+Also, you can check if the size of each bounding box is correct by running sizeChecker.py
+
+```
+(tensorflow1) C:\tensorflow1\models\research\object_detection> python sizeChecker.py --move
+```
+
 ### 4. Generate Training Data
 With the images labeled, it’s time to generate the TFRecords that serve as input data to the TensorFlow training model. This tutorial uses the xml_to_csv.py and generate_tfrecord.py scripts from [Dat Tran’s Raccoon Detector dataset](https://github.com/datitran/raccoon_dataset), with some slight modifications to work with our directory structure.