Tait Hoyem 2 years ago
parent 076caf2e45
commit 25edddfe67

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

@ -1,332 +0,0 @@
Found 166 textline image components.
BorrowedBox(0x5649038d9ed0), confidence: 34, text: ldeas | talt.tech
BorrowedBox(0x5649038da200), confidence: 10, text: .
BorrowedBox(0x5649038da0e0), confidence: 74, text: Home | tait.tech
BorrowedBox(0x5649038da140), confidence: 10, text: .
BorrowedBox(0x5649038da750), confidence: 73, text: O lait Hoyem - Dashboard
BorrowedBox(0x5649038d9fd0), confidence: 10, text: .
BorrowedBox(0x5649038d9e50), confidence: 84, text: () Problem loading page
BorrowedBox(0x5649038d9e70), confidence: 10, text: .
BorrowedBox(0x5649038d9e90), confidence: 81, text: () leptess/low_level_ocr_v
BorrowedBox(0x5649038da040), confidence: 10, text: .
BorrowedBox(0x5649038da060), confidence: 73, text: &) Transcript of the Proclar
BorrowedBox(0x5649038da080), confidence: 13, text: .
BorrowedBox(0x564901555e80), confidence: 46, text: -+
BorrowedBox(0x564901555ea0), confidence: 22, text: b4
BorrowedBox(0x564901555ec0), confidence: 51, text: Whereas.
BorrowedBox(0x5649038d0d10), confidence: 23, text: Ol
BorrowedBox(0x5649038d0d30), confidence: 83, text: the
BorrowedBox(0x5649038d0d50), confidence: 44, text: twenty-secona
BorrowedBox(0x5649038d9bd0), confidence: 53, text: aay
BorrowedBox(0x5649038d9bf0), confidence: 82, text: Of
BorrowedBox(0x5649038d9c10), confidence: 88, text: September,
BorrowedBox(0x5649038d9c30), confidence: 78, text: N
BorrowedBox(0x5649038d9cb0), confidence: 89, text: the
BorrowedBox(0x5649038d9cd0), confidence: 74, text: yedr
BorrowedBox(0x5649038d9cf0), confidence: 82, text: Of
BorrowedBox(0x5649038d9d10), confidence: 0, text:
BorrowedBox(0x56490155a6a0), confidence: 57, text: Our
BorrowedBox(0x56490155a6c0), confidence: 65, text: L Oro
BorrowedBox(0x56490155a6e0), confidence: 24, text: Orie
BorrowedBox(0x56490155a700), confidence: 5, text: thousanao
BorrowedBox(0x56490155a720), confidence: 50, text: elgnt
BorrowedBox(0x56490395a180), confidence: 44, text: Nnunareo
BorrowedBox(0x56490395a1a0), confidence: 54, text: =1ale
BorrowedBox(0x56490395a1c0), confidence: 38, text: SIXty-two,
BorrowedBox(0x56490395a1e0), confidence: 82, text: d
BorrowedBox(0x56490395a200), confidence: 89, text: proclamation
BorrowedBox(0x5649038d9d90), confidence: 62, text: Was
BorrowedBox(0x5649038d9db0), confidence: 0, text: ISSueq
BorrowedBox(0x5649038d9dd0), confidence: 68, text: Dy
BorrowedBox(0x5649038d9df0), confidence: 89, text: the
BorrowedBox(0x5649038d9e10), confidence: 14, text: Presiaent
BorrowedBox(0x5649038d1d70), confidence: 82, text: Of
BorrowedBox(0x5649038d1d90), confidence: 83, text: the
BorrowedBox(0x5649038d1db0), confidence: 41, text: Lniteao
BorrowedBox(0x5649038d1dd0), confidence: 80, text: States.
BorrowedBox(0x5649038d1df0), confidence: 89, text: containing,
BorrowedBox(0x5649038d1e10), confidence: 37, text: dlTOng
BorrowedBox(0x5649038d1e30), confidence: 86, text: other
BorrowedBox(0x564903955b30), confidence: 90, text: things,
BorrowedBox(0x564903955b50), confidence: 89, text: the
BorrowedBox(0x564903955b70), confidence: 85, text: following,
BorrowedBox(0x564903955b90), confidence: 61, text: 10
BorrowedBox(0x564903955bb0), confidence: 33, text: WITt:
BorrowedBox(0x564903955bd0), confidence: 68, text: "1 hat
BorrowedBox(0x564903955bf0), confidence: 64, text: Ol
BorrowedBox(0x564903955c10), confidence: 87, text: the
BorrowedBox(0x56490284df10), confidence: 63, text: first
BorrowedBox(0x56490284df30), confidence: 43, text: aay
BorrowedBox(0x56490284df50), confidence: 89, text: Of
BorrowedBox(0x56490284df70), confidence: 90, text: January,
BorrowedBox(0x56490284df90), confidence: 65, text: N
BorrowedBox(0x56490284dfb0), confidence: 88, text: the
BorrowedBox(0x56490284dfd0), confidence: 59, text: yedar
BorrowedBox(0x56490284dff0), confidence: 82, text: Of
BorrowedBox(0x564903961e20), confidence: 86, text: Our
BorrowedBox(0x564903961e40), confidence: 44, text: | Orao
BorrowedBox(0x564903961e60), confidence: 66, text: Ore
BorrowedBox(0x564903961e80), confidence: 22, text: thousana
BorrowedBox(0x564903961ea0), confidence: 49, text: elgnht
BorrowedBox(0x564903961ec0), confidence: 33, text: Nnunareo
BorrowedBox(0x564903961ee0), confidence: 58, text: ana
BorrowedBox(0x564903961f00), confidence: 34, text: Sixty-three,
BorrowedBox(0x564903961f20), confidence: 49, text: ||
BorrowedBox(0x56490394b740), confidence: 29, text: PEIrSOns
BorrowedBox(0x56490394b760), confidence: 56, text: nelo
BorrowedBox(0x56490394b780), confidence: 63, text: as
BorrowedBox(0x56490394b7a0), confidence: 72, text: glaves
BorrowedBox(0x56490394b7c0), confidence: 41, text: within
BorrowedBox(0x56490394b7e0), confidence: 40, text: dally
BorrowedBox(0x56490394b800), confidence: 88, text: State
BorrowedBox(0x56490394b820), confidence: 62, text: OFr
BorrowedBox(0x56490394b840), confidence: 36, text: designatead
BorrowedBox(0x5649027f6300), confidence: 90, text: part
BorrowedBox(0x5649027f6320), confidence: 89, text: Of
BorrowedBox(0x5649027f6340), confidence: 67, text: A
BorrowedBox(0x5649027f6360), confidence: 88, text: State,
BorrowedBox(0x5649027f6380), confidence: 84, text: the
BorrowedBox(0x5649027f63a0), confidence: 89, text: people
BorrowedBox(0x5649027f63c0), confidence: 88, text: whereof
BorrowedBox(0x5649027f63e0), confidence: 59, text: snall
BorrowedBox(0x5649027f6400), confidence: 89, text: then
BorrowedBox(0x5649027f6420), confidence: 55, text: De
BorrowedBox(0x5649027f4d00), confidence: 78, text: N
BorrowedBox(0x5649027f4d20), confidence: 87, text: rebellion
BorrowedBox(0x5649027f4d40), confidence: 82, text: against
BorrowedBox(0x5649027f4d60), confidence: 89, text: the
BorrowedBox(0x5649027f4d80), confidence: 40, text: Lniteao
BorrowedBox(0x5649027f4da0), confidence: 40, text: States.
BorrowedBox(0x5649027f4dc0), confidence: 76, text: snall
BorrowedBox(0x5649027f4de0), confidence: 2, text: De
BorrowedBox(0x5649027f4e00), confidence: 87, text: then,
BorrowedBox(0x5649027f4e20), confidence: 37, text: thenceforwaraq.
BorrowedBox(0x5649038da7c0), confidence: 0, text: N0
BorrowedBox(0x5649038da7e0), confidence: 87, text: forever
BorrowedBox(0x5649038da800), confidence: 61, text: free:
BorrowedBox(0x5649038da820), confidence: 54, text: =1ale
BorrowedBox(0x5649038da840), confidence: 83, text: the
BorrowedBox(0x5649038da860), confidence: 10, text: = xecutive
BorrowedBox(0x5649038da880), confidence: 85, text: (sovernment
BorrowedBox(0x5649038da8a0), confidence: 87, text: Of
BorrowedBox(0x5649038da8c0), confidence: 88, text: the
BorrowedBox(0x5649038da8e0), confidence: 33, text: Lniteao
BorrowedBox(0x5649027f6180), confidence: 90, text: States,
BorrowedBox(0x5649027f61a0), confidence: 0, text: including
BorrowedBox(0x5649027f61c0), confidence: 89, text: the
BorrowedBox(0x5649027f61e0), confidence: 88, text: military
BorrowedBox(0x5649027f6200), confidence: 58, text: ana
BorrowedBox(0x5649027f6220), confidence: 30, text: Nnaval
BorrowedBox(0x5649027f6240), confidence: 73, text: authority
BorrowedBox(0x5649027f6260), confidence: 57, text: thereof,
BorrowedBox(0x5649027f6280), confidence: 25, text: will
BorrowedBox(0x5649027f62a0), confidence: 90, text: recognize
BorrowedBox(0x5649038cef00), confidence: 54, text: =1ale
BorrowedBox(0x5649038cef20), confidence: 58, text: maintain
BorrowedBox(0x5649038cef40), confidence: 79, text: the
BorrowedBox(0x5649038cef60), confidence: 80, text: freedom
BorrowedBox(0x5649038cef80), confidence: 89, text: Of
BorrowedBox(0x5649038cefa0), confidence: 36, text: sSuch
BorrowedBox(0x5649038cefc0), confidence: 5, text: PEISOnS,
BorrowedBox(0x5649038cefe0), confidence: 54, text: =1ale
BorrowedBox(0x5649038cf000), confidence: 28, text: Wil
BorrowedBox(0x5649038cf020), confidence: 79, text: 00
BorrowedBox(0x5649038cf040), confidence: 77, text: 10
BorrowedBox(0x5649015589d0), confidence: 20, text: aACT
BorrowedBox(0x5649015589f0), confidence: 58, text: OFr
BorrowedBox(0x564901558a10), confidence: 38, text: aC1S
BorrowedBox(0x564901558a30), confidence: 81, text: 10
BorrowedBox(0x564901558a50), confidence: 27, text: [EPIresSS
BorrowedBox(0x564901558a70), confidence: 36, text: sSuch
BorrowedBox(0x564901558a90), confidence: 0, text: PEISOnS,
BorrowedBox(0x564901558ab0), confidence: 75, text: OFr
BorrowedBox(0x564901558ad0), confidence: 85, text: ally
BorrowedBox(0x564901558af0), confidence: 87, text: Of
BorrowedBox(0x564901558b10), confidence: 45, text: them.,
BorrowedBox(0x564903961530), confidence: 78, text: N
BorrowedBox(0x564903961550), confidence: 74, text: dlly
BorrowedBox(0x564903961570), confidence: 75, text: efforts
BorrowedBox(0x564903961590), confidence: 90, text: they
BorrowedBox(0x5649039615b0), confidence: 41, text: [T1dy
BorrowedBox(0x5649039615d0), confidence: 86, text: make
BorrowedBox(0x5649039615f0), confidence: 91, text: for
BorrowedBox(0x564903961610), confidence: 69, text: thelr
BorrowedBox(0x564903961630), confidence: 86, text: actual
BorrowedBox(0x564903961650), confidence: 18, text: freeadom.
BorrowedBox(0x564903961670), confidence: 60, text: T hat
BorrowedBox(0x56490390b0c0), confidence: 20, text: tho
BorrowedBox(0x56490390b0e0), confidence: 10, text: —vocritive
BorrowedBox(0x56490390b100), confidence: 53, text: will
BorrowedBox(0x56490390b120), confidence: 42, text: (YY)
BorrowedBox(0x56490390b140), confidence: 16, text: tho
BorrowedBox(0x56490390b160), confidence: 57, text: firet
BorrowedBox(0x56490390b180), confidence: 21, text: N \/
BorrowedBox(0x56490390b1a0), confidence: 69, text: N1
BorrowedBox(0x56490390b1c0), confidence: 15, text: 1aniiarv
BorrowedBox(0x56490390b1e0), confidence: 51, text: atoracain
BorrowedBox(0x56490390b200), confidence: 64, text: N\/

@ -1,12 +1,22 @@
extern crate leptess;
use ocr_json_common::TextBox;
use leptess::{leptonica, tesseract};
use std::env;
use std::path::Path;
fn main() {
let mut api = tesseract::TessApi::new(None, "eng").unwrap();
/* TODO: preprox here */
let pix = leptonica::pix_read(Path::new("./test.png")).unwrap();
fn main() {
let mut ocr_rects = Vec::new();
let file_name = if env::args().count() == 2 {
env::args().nth(1).unwrap()
} else {
panic!("Please enter a target file path")
};
let image_path = Path::new(&file_name);
let mut api = tesseract::TessApi::new(Some("/usr/share/tessdata/"), "eng").unwrap();
let pix = leptonica::pix_read(image_path).unwrap();
api.set_image(&pix);
// detect bounding boxes for words
@ -14,13 +24,30 @@ fn main() {
.get_component_images(leptess::capi::TessPageIteratorLevel_RIL_WORD, true)
.unwrap();
println!("Found {} textline image components.", boxes.get_n());
let mut boxid = 0;
// run OCR on each word bounding box
for b in &boxes {
api.set_rectangle(&b);
let text = api.get_utf8_text().unwrap();
let confi = api.mean_text_conf();
println!("{:?}, confidence: {}, text: {}", b, confi, text);
let bref = b.as_ref();
/*
println!(
"[X: {}, Y: {}, W: {}, H: {}]: confidence: {}, text: {}",
bref.x, bref.y, bref.w, bref.h, confi, text
);*/
ocr_rects.push(TextBox {
id: boxid,
hint: text,
confidence: confi as u32,
x: bref.x,
y: bref.y,
height: bref.h as u32,
width: bref.w as u32,
});
boxid += 1;
}
let json = serde_json::to_string(&ocr_rects).unwrap();
println!("{}", json);
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 246 KiB

Loading…
Cancel
Save