Show OCR progress
OCR process can last for a while, so monitoring progress could help. Here is simple example how to do in console app.
Code
For setting terminal color there is need to download Termcolor is a header-only C++ library:
curl -O https://raw.githubusercontent.com/ikalnytskyi/termcolor/master/include/termcolor/termcolor.hpp
Example program looks like this:
tesseract_progress.cpp
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/ocrclass.h>
#include <thread>
#include <iostream>
#include <fstream>
#include "termcolor.hpp"
/*
Show/hide console cursor
Source: https://github.com/p-ranav/indicators
*/
static inline void show_console_cursor(bool const show) {
#if defined(_MSC_VER)
HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_CURSOR_INFO cursorInfo;
GetConsoleCursorInfo(out, &cursorInfo);
cursorInfo.bVisible = show;
SetConsoleCursorInfo(out, &cursorInfo);
#else
std::fputs(show ? "\033[?25h" : "\033[?25l", stdout);
#endif
}
/*
Simple progress bar
*/
void show_progress(int progress) {
std::cout << termcolor::bold << termcolor::blue; // termcolor.hpp
std::cout << "\r" << "[" << std::string(progress / 2, (char)254u);
std::cout << std::string(100 / 2 - progress / 2, ' ') << "]";
std::cout << progress << "%";
std::cout << termcolor::reset<< termcolor::reset; // termcolor.hpp
std::cout.flush();
}
/*
Main loop
*/
int main() {
const char *filein, *fileout;
// suppress leptonica error messages
setMsgSeverity(L_SEVERITY_NONE);
// Run tesseract process and its progress monitor in separate threads
ETEXT_DESC *monitor = new ETEXT_DESC();
show_console_cursor(false);
filein = "f:/Project/tests/speccoll.png";
fileout = "myfile.html";
// OCR process
auto job_ocr = [monitor, filein, fileout]() {
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// suppress tesseract debug messages to avoid progress interuption
api->SetVariable("debug_file", "/dev/null");
Pix *image = pixRead(filein);
api->Init(NULL, "eng");
api->SetPageSegMode(tesseract::PSM_AUTO);
api->SetImage(image);
api->Recognize(monitor);
// Write html output of OCR to file
std::ofstream myfile(fileout);
if (myfile.is_open()) {
myfile << api->GetHOCRText(0);
}
myfile.close();
pixDestroy(&image);
api->End();
delete api;
};
// Monitoring process
auto job_monitor = [monitor]() {
while (true) {
show_progress(monitor[0].progress);
if (monitor[0].progress == 100) {
monitor[0].cancel_this = (void *)true;
// to ensure 100% is shown
show_progress(monitor[0].progress);
break;
}
}
};
std::thread thread1(job_ocr);
std::thread thread2(job_monitor);
thread1.join();
thread2.join();
show_console_cursor(true);
delete monitor;
return 0;
}
Build
"c:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" x64
SET PATH=%PATH%;f:\win64\bin
SET TESSDATA_PREFIX=f:\Project\tessdata
cl /EHsc /Fe tesseract_progress.cpp /std:c11 /If:\win64\include /link /LIBPATH:f:/win64/lib tesseract41.lib leptonica-1.81.0.lib /machine:x64
PS: With tesseract 5 you need to use tesseract::ETEXT_DESC
instead of ETEXT_DESC
.
Comments
Post a Comment