You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.5 KiB

extern crate leptess;
use ocr_json_common::TextBox;
use leptess::{leptonica, tesseract};
use std::env;
use std::path::Path;
/* TODO: preprox here */
fn main() {
let mut ocr_rects = Vec::new();
let file_name = if env::args().count() == 2 {
env::args().nth(1).unwrap()
} else {
panic!("Please enter a target file path")
};
let image_path = Path::new(&file_name);
let mut api = tesseract::TessApi::new(Some("/usr/share/tessdata/"), "eng").unwrap();
let pix = leptonica::pix_read(image_path).unwrap();
api.set_image(&pix);
// detect bounding boxes for words
let boxes = api
.get_component_images(leptess::capi::TessPageIteratorLevel_RIL_WORD, true)
.unwrap();
let mut boxid = 0;
// run OCR on each word bounding box
for b in &boxes {
api.set_rectangle(&b);
let text = api.get_utf8_text().unwrap();
let confi = api.mean_text_conf();
let bref = b.as_ref();
/*
println!(
"[X: {}, Y: {}, W: {}, H: {}]: confidence: {}, text: {}",
bref.x, bref.y, bref.w, bref.h, confi, text
);*/
ocr_rects.push(TextBox {
id: boxid,
hint: text,
confidence: confi as u32,
x: bref.x,
y: bref.y,
height: bref.h as u32,
width: bref.w as u32,
});
boxid += 1;
}
let json = serde_json::to_string(&ocr_rects).unwrap();
println!("{}", json);
}