Visualize Tesseract Box File

 Simply python (only PIL) tesseract box file visualisation.


#!/usr/bin/env python

"""
 box_visualisation.py
"""

import sys
import csv
import pathlib

from PIL import Image, ImageDraw


def main(image_name):
    image_path = pathlib.Path(image_name)
    box_path = image_path.with_suffix('.box')
    boxa = list(csv.reader(open(box_path, 'r'), delimiter=' '))

    with Image.open(image_name) as im:
        wi, hi = im.size
        draw = ImageDraw.Draw(im)
        for box in boxa:
            # <symbol> <left> <bottom> <right> <top> <page>
            pil_coord = [(int(box[1]), hi - int(box[2])),
                 (int(box[3]), hi - int(box[4]))]
            draw.rectangle(pil_coord, fill=None,
                           outline=(255, 0, 0, 255), width=1)
    im.save(image_path.stem + "_boxed" + image_path.suffix)
    im.show()

if __name__ == '__main__':
    if len(sys.argv) == 1:
        print(f"Usage:\n\t{sys.argv[0]} image_name")
        sys.exit()
    main(sys.argv[1])
    
Do not forget to set TESSDATA_PREFIX (Windows):
set TESSDAT_PREFIX=f:\Project\tessdata
or on linux:
export TESSDAT_PREFIX=/usr/share/tesseract/tessdata
You can create your testing box file with:
tesseract eng.train.exp1.png eng.train.exp1 -l eng --oem 0 makebox
And then to visualize it with:
python box_visualisation.py eng.train.exp1.png

Comments

Popular posts from this blog

Tesseract LSTM training (aka Makefile training)