Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ Download and install LabelImg, point it to your \images\train directory, and the

LabelImg saves a .xml file containing the label data for each image. These .xml files will be used to generate TFRecords, which are one of the inputs to the TensorFlow trainer. Once you have labeled and saved each image, there will be one .xml file for each image in the \test and \train directories.

Also, you can check if the size of each bounding box is correct by running sizeChecker.py

```
(tensorflow1) C:\tensorflow1\models\research\object_detection> python sizeChecker.py --move
```

### 4. Generate Training Data
With the images labeled, it’s time to generate the TFRecords that serve as input data to the TensorFlow training model. This tutorial uses the xml_to_csv.py and generate_tfrecord.py scripts from [Dat Tran’s Raccoon Detector dataset](https://github.com/datitran/raccoon_dataset), with some slight modifications to work with our directory structure.

Expand Down
74 changes: 74 additions & 0 deletions sizeChecker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/python3
# -*- encode: utf-8 -*-

"""
This mini program looks for xmin, ymin, xmax and ymax values in order to
check if the figures have the proper size (min 33px)
Version: 0.1
Author: Moisés Lodeiro-Santiago @ https://github.com/moiseslodeiro
"""

from xml.etree import ElementTree
from termcolor import colored
from os.path import isdir
from os import makedirs, rmdir
from sys import exit
from glob import glob as xmlFiles
import shutil
import argparse

# Directories
train_directory = './images/train' # It should contain the xml files with bounding boxes
test_directory = './images/train' # It should contain the xml files with bounding boxes

# Keep calm and do not touch the rest of the code :3
parser = argparse.ArgumentParser()
parser.add_argument("--move", help="Put all wrong xml and images to a wrong_data folder inside each folder", action="store_true")
args = parser.parse_args()

if not isdir(train_directory) or not isdir(test_directory):
print(colored('[!]', 'yellow', attrs=['bold']), colored('The training or test directories do not exist'))
exit(1)
else:
print(colored('[Ok]', 'green'), colored('Directories exists'))

everythingWentAsExpected = True

for tree in [train_directory, test_directory]:
if args.move and not isdir(tree + '/wrong_data'):
makedirs(tree + '/wrong_data')

for file in xmlFiles(tree + '/*.xml'):
xmlFile = ElementTree.parse(file)
boxes = xmlFile.findall('object/bndbox')
for box in boxes:
xmin, ymin, xmax, ymax = box.getchildren()
x_value = int(xmax.text) - int(xmin.text)
y_value = int(ymax.text) - int(ymin.text)

if x_value < 33 or y_value < 33:
print(colored('[!]', 'red'), 'File {} contains a bounding box smaller than 32 in height or width'.format(file))
print(colored('xmax - xmin', 'yellow', attrs=['bold']), x_value)
print(colored('ymax - ymin', 'yellow', attrs=['bold']), y_value)
everythingWentAsExpected = False

if args.move:
wrongPicture = xmlFile.find('filename')
try:
shutil.move(file, tree + '/wrong_data/')
shutil.move(tree + '/' + wrongPicture.text, tree + '/wrong_data/')
print(colored('Files moved to' + tree + '/wrong_data', 'blue'))
except Exception as e:
print(colored(e, 'blue'))

if everythingWentAsExpected:
print(colored('[Ok]', 'green'), 'All bounding boxes are equal or larger than 32 :-)')
try:
rmdir(train_directory + '/wrong_data')
rmdir(test_directory + '/wrong_data')
except OSError:
print(colored('[Info]', 'blue'), 'Directories wrong_data were not removed because they contain some files')

else:
print()
print(colored('[Error]', 'red'), ' (╯°□°)╯ ┻━┻')