commit a35b34a93ddb65914bff75e479e7f7ccd03500cd Author: Tait Hoyem Date: Tue Dec 7 10:37:13 2021 -0700 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5867c59 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,434 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bindgen" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da379dbebc0b76ef63ca68d8fc6e71c0f13e59432e0987e508c1820e6ab5239" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "clap", + "env_logger", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cexpr" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa66045b9cb23c2e9c1520732030608b02ee07e5cfaa5a521ec15ded7fa24c90" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "leptess" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1574614507481dca4c64e4acc20dfc19f1e432295abae08b1741a6760998aeca" +dependencies = [ + "tesseract-plumbing", + "thiserror", +] + +[[package]] +name = "leptonica-plumbing" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d9c9a9c02ff523c808960bab7bbe64a93201e7a86942f276429ac43306a87ed" +dependencies = [ + "leptonica-sys", + "thiserror", +] + +[[package]] +name = "leptonica-sys" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c443a458c294a73785deab60047336251e5775c6be55c78cc8bcdee861e584d" +dependencies = [ + "bindgen", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libc" +version = "0.2.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" + +[[package]] +name = "libloading" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afe203d669ec979b7128619bae5a63b7b42e9203c1b29146079ee05e2f604b52" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "memchr", + "version_check", +] + +[[package]] +name = "ocr" +version = "0.1.0" +dependencies = [ + "leptess", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pkg-config" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1a3ea4f0dd7f1f3e512cf97bf100819aa547f36a6eccac8dbaae839eb92363e" + +[[package]] +name = "proc-macro2" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb37d2df5df740e582f28f8560cf425f52bb267d872fe58358eadb554909f07a" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "shlex" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "syn" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "tesseract-plumbing" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49212e1ad9511a098143f1e990c5ab7401ef6c7bfb0e2c57ab385fa7f87d2a88" +dependencies = [ + "leptonica-plumbing", + "tesseract-sys", + "thiserror", +] + +[[package]] +name = "tesseract-sys" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92539cd2054a57b9736914fbd7c8d5e1379ae861959169675289261530f120f" +dependencies = [ + "bindgen", + "leptonica-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "which" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +dependencies = [ + "libc", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d220c14 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ocr" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +leptess = "0.13.1" diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..f1ba712 --- /dev/null +++ b/output.txt @@ -0,0 +1,332 @@ +Found 166 textline image components. +BorrowedBox(0x5649038d9ed0), confidence: 34, text: ldeas | talt.tech + +BorrowedBox(0x5649038da200), confidence: 10, text: . + +BorrowedBox(0x5649038da0e0), confidence: 74, text: Home | tait.tech + +BorrowedBox(0x5649038da140), confidence: 10, text: . + +BorrowedBox(0x5649038da750), confidence: 73, text: O lait Hoyem - Dashboard + +BorrowedBox(0x5649038d9fd0), confidence: 10, text: . + +BorrowedBox(0x5649038d9e50), confidence: 84, text: () Problem loading page + +BorrowedBox(0x5649038d9e70), confidence: 10, text: . + +BorrowedBox(0x5649038d9e90), confidence: 81, text: () leptess/low_level_ocr_v + +BorrowedBox(0x5649038da040), confidence: 10, text: . + +BorrowedBox(0x5649038da060), confidence: 73, text: &) Transcript of the Proclar + +BorrowedBox(0x5649038da080), confidence: 13, text: . + +BorrowedBox(0x564901555e80), confidence: 46, text: -+ + +BorrowedBox(0x564901555ea0), confidence: 22, text: b4 + +BorrowedBox(0x564901555ec0), confidence: 51, text: Whereas. + +BorrowedBox(0x5649038d0d10), confidence: 23, text: Ol + +BorrowedBox(0x5649038d0d30), confidence: 83, text: the + +BorrowedBox(0x5649038d0d50), confidence: 44, text: twenty-secona + +BorrowedBox(0x5649038d9bd0), confidence: 53, text: aay + +BorrowedBox(0x5649038d9bf0), confidence: 82, text: Of + +BorrowedBox(0x5649038d9c10), confidence: 88, text: September, + +BorrowedBox(0x5649038d9c30), confidence: 78, text: N + +BorrowedBox(0x5649038d9cb0), confidence: 89, text: the + +BorrowedBox(0x5649038d9cd0), confidence: 74, text: yedr + +BorrowedBox(0x5649038d9cf0), confidence: 82, text: Of + +BorrowedBox(0x5649038d9d10), confidence: 0, text: +BorrowedBox(0x56490155a6a0), confidence: 57, text: Our + +BorrowedBox(0x56490155a6c0), confidence: 65, text: L Oro + +BorrowedBox(0x56490155a6e0), confidence: 24, text: Orie + +BorrowedBox(0x56490155a700), confidence: 5, text: thousanao + +BorrowedBox(0x56490155a720), confidence: 50, text: elgnt + +BorrowedBox(0x56490395a180), confidence: 44, text: Nnunareo + +BorrowedBox(0x56490395a1a0), confidence: 54, text: =1ale + +BorrowedBox(0x56490395a1c0), confidence: 38, text: SIXty-two, + +BorrowedBox(0x56490395a1e0), confidence: 82, text: d + +BorrowedBox(0x56490395a200), confidence: 89, text: proclamation + +BorrowedBox(0x5649038d9d90), confidence: 62, text: Was + +BorrowedBox(0x5649038d9db0), confidence: 0, text: ISSueq + +BorrowedBox(0x5649038d9dd0), confidence: 68, text: Dy + +BorrowedBox(0x5649038d9df0), confidence: 89, text: the + +BorrowedBox(0x5649038d9e10), confidence: 14, text: Presiaent + +BorrowedBox(0x5649038d1d70), confidence: 82, text: Of + +BorrowedBox(0x5649038d1d90), confidence: 83, text: the + +BorrowedBox(0x5649038d1db0), confidence: 41, text: Lniteao + +BorrowedBox(0x5649038d1dd0), confidence: 80, text: States. + +BorrowedBox(0x5649038d1df0), confidence: 89, text: containing, + +BorrowedBox(0x5649038d1e10), confidence: 37, text: dlTOng + +BorrowedBox(0x5649038d1e30), confidence: 86, text: other + +BorrowedBox(0x564903955b30), confidence: 90, text: things, + +BorrowedBox(0x564903955b50), confidence: 89, text: the + +BorrowedBox(0x564903955b70), confidence: 85, text: following, + +BorrowedBox(0x564903955b90), confidence: 61, text: 10 + +BorrowedBox(0x564903955bb0), confidence: 33, text: WITt: + +BorrowedBox(0x564903955bd0), confidence: 68, text: "1 hat + +BorrowedBox(0x564903955bf0), confidence: 64, text: Ol + +BorrowedBox(0x564903955c10), confidence: 87, text: the + +BorrowedBox(0x56490284df10), confidence: 63, text: first + +BorrowedBox(0x56490284df30), confidence: 43, text: aay + +BorrowedBox(0x56490284df50), confidence: 89, text: Of + +BorrowedBox(0x56490284df70), confidence: 90, text: January, + +BorrowedBox(0x56490284df90), confidence: 65, text: N + +BorrowedBox(0x56490284dfb0), confidence: 88, text: the + +BorrowedBox(0x56490284dfd0), confidence: 59, text: yedar + +BorrowedBox(0x56490284dff0), confidence: 82, text: Of + +BorrowedBox(0x564903961e20), confidence: 86, text: Our + +BorrowedBox(0x564903961e40), confidence: 44, text: | Orao + +BorrowedBox(0x564903961e60), confidence: 66, text: Ore + +BorrowedBox(0x564903961e80), confidence: 22, text: thousana + +BorrowedBox(0x564903961ea0), confidence: 49, text: elgnht + +BorrowedBox(0x564903961ec0), confidence: 33, text: Nnunareo + +BorrowedBox(0x564903961ee0), confidence: 58, text: ana + +BorrowedBox(0x564903961f00), confidence: 34, text: Sixty-three, + +BorrowedBox(0x564903961f20), confidence: 49, text: || + +BorrowedBox(0x56490394b740), confidence: 29, text: PEIrSOns + +BorrowedBox(0x56490394b760), confidence: 56, text: nelo + +BorrowedBox(0x56490394b780), confidence: 63, text: as + +BorrowedBox(0x56490394b7a0), confidence: 72, text: glaves + +BorrowedBox(0x56490394b7c0), confidence: 41, text: within + +BorrowedBox(0x56490394b7e0), confidence: 40, text: dally + +BorrowedBox(0x56490394b800), confidence: 88, text: State + +BorrowedBox(0x56490394b820), confidence: 62, text: OFr + +BorrowedBox(0x56490394b840), confidence: 36, text: designatead + +BorrowedBox(0x5649027f6300), confidence: 90, text: part + +BorrowedBox(0x5649027f6320), confidence: 89, text: Of + +BorrowedBox(0x5649027f6340), confidence: 67, text: A + +BorrowedBox(0x5649027f6360), confidence: 88, text: State, + +BorrowedBox(0x5649027f6380), confidence: 84, text: the + +BorrowedBox(0x5649027f63a0), confidence: 89, text: people + +BorrowedBox(0x5649027f63c0), confidence: 88, text: whereof + +BorrowedBox(0x5649027f63e0), confidence: 59, text: snall + +BorrowedBox(0x5649027f6400), confidence: 89, text: then + +BorrowedBox(0x5649027f6420), confidence: 55, text: De + +BorrowedBox(0x5649027f4d00), confidence: 78, text: N + +BorrowedBox(0x5649027f4d20), confidence: 87, text: rebellion + +BorrowedBox(0x5649027f4d40), confidence: 82, text: against + +BorrowedBox(0x5649027f4d60), confidence: 89, text: the + +BorrowedBox(0x5649027f4d80), confidence: 40, text: Lniteao + +BorrowedBox(0x5649027f4da0), confidence: 40, text: States. + +BorrowedBox(0x5649027f4dc0), confidence: 76, text: snall + +BorrowedBox(0x5649027f4de0), confidence: 2, text: De + +BorrowedBox(0x5649027f4e00), confidence: 87, text: then, + +BorrowedBox(0x5649027f4e20), confidence: 37, text: thenceforwaraq. + +BorrowedBox(0x5649038da7c0), confidence: 0, text: N0 + +BorrowedBox(0x5649038da7e0), confidence: 87, text: forever + +BorrowedBox(0x5649038da800), confidence: 61, text: free: + +BorrowedBox(0x5649038da820), confidence: 54, text: =1ale + +BorrowedBox(0x5649038da840), confidence: 83, text: the + +BorrowedBox(0x5649038da860), confidence: 10, text: = xecutive + +BorrowedBox(0x5649038da880), confidence: 85, text: (sovernment + +BorrowedBox(0x5649038da8a0), confidence: 87, text: Of + +BorrowedBox(0x5649038da8c0), confidence: 88, text: the + +BorrowedBox(0x5649038da8e0), confidence: 33, text: Lniteao + +BorrowedBox(0x5649027f6180), confidence: 90, text: States, + +BorrowedBox(0x5649027f61a0), confidence: 0, text: including + +BorrowedBox(0x5649027f61c0), confidence: 89, text: the + +BorrowedBox(0x5649027f61e0), confidence: 88, text: military + +BorrowedBox(0x5649027f6200), confidence: 58, text: ana + +BorrowedBox(0x5649027f6220), confidence: 30, text: Nnaval + +BorrowedBox(0x5649027f6240), confidence: 73, text: authority + +BorrowedBox(0x5649027f6260), confidence: 57, text: thereof, + +BorrowedBox(0x5649027f6280), confidence: 25, text: will + +BorrowedBox(0x5649027f62a0), confidence: 90, text: recognize + +BorrowedBox(0x5649038cef00), confidence: 54, text: =1ale + +BorrowedBox(0x5649038cef20), confidence: 58, text: maintain + +BorrowedBox(0x5649038cef40), confidence: 79, text: the + +BorrowedBox(0x5649038cef60), confidence: 80, text: freedom + +BorrowedBox(0x5649038cef80), confidence: 89, text: Of + +BorrowedBox(0x5649038cefa0), confidence: 36, text: sSuch + +BorrowedBox(0x5649038cefc0), confidence: 5, text: PEISOnS, + +BorrowedBox(0x5649038cefe0), confidence: 54, text: =1ale + +BorrowedBox(0x5649038cf000), confidence: 28, text: Wil + +BorrowedBox(0x5649038cf020), confidence: 79, text: 00 + +BorrowedBox(0x5649038cf040), confidence: 77, text: 10 + +BorrowedBox(0x5649015589d0), confidence: 20, text: aACT + +BorrowedBox(0x5649015589f0), confidence: 58, text: OFr + +BorrowedBox(0x564901558a10), confidence: 38, text: aC1S + +BorrowedBox(0x564901558a30), confidence: 81, text: 10 + +BorrowedBox(0x564901558a50), confidence: 27, text: [EPIresSS + +BorrowedBox(0x564901558a70), confidence: 36, text: sSuch + +BorrowedBox(0x564901558a90), confidence: 0, text: PEISOnS, + +BorrowedBox(0x564901558ab0), confidence: 75, text: OFr + +BorrowedBox(0x564901558ad0), confidence: 85, text: ally + +BorrowedBox(0x564901558af0), confidence: 87, text: Of + +BorrowedBox(0x564901558b10), confidence: 45, text: them., + +BorrowedBox(0x564903961530), confidence: 78, text: N + +BorrowedBox(0x564903961550), confidence: 74, text: dlly + +BorrowedBox(0x564903961570), confidence: 75, text: efforts + +BorrowedBox(0x564903961590), confidence: 90, text: they + +BorrowedBox(0x5649039615b0), confidence: 41, text: [T1dy + +BorrowedBox(0x5649039615d0), confidence: 86, text: make + +BorrowedBox(0x5649039615f0), confidence: 91, text: for + +BorrowedBox(0x564903961610), confidence: 69, text: thelr + +BorrowedBox(0x564903961630), confidence: 86, text: actual + +BorrowedBox(0x564903961650), confidence: 18, text: freeadom. + +BorrowedBox(0x564903961670), confidence: 60, text: T hat + +BorrowedBox(0x56490390b0c0), confidence: 20, text: tho + +BorrowedBox(0x56490390b0e0), confidence: 10, text: —vocritive + +BorrowedBox(0x56490390b100), confidence: 53, text: will + +BorrowedBox(0x56490390b120), confidence: 42, text: (YY) + +BorrowedBox(0x56490390b140), confidence: 16, text: tho + +BorrowedBox(0x56490390b160), confidence: 57, text: firet + +BorrowedBox(0x56490390b180), confidence: 21, text: N \/ + +BorrowedBox(0x56490390b1a0), confidence: 69, text: N1 + +BorrowedBox(0x56490390b1c0), confidence: 15, text: 1aniiarv + +BorrowedBox(0x56490390b1e0), confidence: 51, text: atoracain + +BorrowedBox(0x56490390b200), confidence: 64, text: N\/ + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..79dac02 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,26 @@ +extern crate leptess; + +use leptess::{leptonica, tesseract}; +use std::path::Path; + +fn main() { + let mut api = tesseract::TessApi::new(None, "eng").unwrap(); + + let pix = leptonica::pix_read(Path::new("./test.png")).unwrap(); + api.set_image(&pix); + + // detect bounding boxes for words + let boxes = api + .get_component_images(leptess::capi::TessPageIteratorLevel_RIL_WORD, true) + .unwrap(); + + println!("Found {} textline image components.", boxes.get_n()); + + // run OCR on each word bounding box + for b in &boxes { + api.set_rectangle(&b); + let text = api.get_utf8_text().unwrap(); + let confi = api.mean_text_conf(); + println!("{:?}, confidence: {}, text: {}", b, confi, text); + } +} diff --git a/test.png b/test.png new file mode 100644 index 0000000..b7b8fc9 Binary files /dev/null and b/test.png differ