Tesseract4

OCR アプリケーション基礎編 2 †

　実用的な AI開発に向けて、文字認識エンジン「Tesseract」(テッセラクト)を使用した「OCR アプリケーション」を開発する。(その２)

OCR アプリケーション基礎編 2
- OCR プログラムの開発過程 2
- 更新履歴
参考資料

※ 最終更新:2022/01/17　

↑

OCR プログラムの開発過程 2 †

↑

設定ファイルを作成・編集する †

「PyYAML」パッケージを利用する。

Step 3 のシーケンスで帳票に対するデータの領域を定義し、出来上がったデータベースを利用して帳票の文字を検出する。
定義ファイルを「YAML」ファイルとして記述する。(キー名)
基本情報
・基本スケール(BasicCoordinates)　　　　　 ← 座標保存時の帳票サイズ　幅・高さ
・帳票ファイルのファイル名(FileName)
配置情報
・帳票名(KyeName)
・日付(Date)
・会社名(CompName)
・案件名(Title)
・税抜金額(B4Tax)
・税込み金額(TotalMoney)
配置情報の内容
・領域座標(Location)　　　　　　　　　　　 ← [x0,y0,x1,y1]
・前処理種別(PreProcess)　　　　　　　　　 ← 0=なし/1=白黒変換/2=罫線消去/4=印影消去
・検出テキスト(Text)

位置情報ファイル「output_test.yaml」

OCR YAML file test:
  BasicCoordinates:
  - 640
  - 480
  FileName: C:/filename
  KyeName:
    Location:
    - 10
    - 20
    - 30
    - 40
    PreProcess: 0
    Text: 請求書
  Date:
    Location:
    - 10
    - 20
    - 30
    - 40
    PreProcess: 0
    Text: 2022年01月11日
    :

記録する配置情報名の定義ファイル「output_table.yaml」
```
KeyTable:
- KyeName
- Date
- CompName
- Title
- B4Tax
- TotalMoney
```

サンプルプログラムの実行

(py37) $ cd ~/workspace_py37/tryocr
(py37) $ python3 mylib_yaml.py 
    :
['KyeName', 'Date', 'CompName', 'Title', 'B4Tax', 'TotalMoney'] 6
[640, 480] C:/filename
[10, 20, 30, 40] 0 請求書
[10, 20, 30, 40] 0 2022年01月11日
[10, 20, 30, 40] 0 会社名
[10, 20, 30, 40] 0 案件名
[10, 20, 30, 40] 0 税抜金額
[10, 20, 30, 40] 0 税込み金額

ソースコード

▼「mylib_yaml.py」

# -*- coding: utf-8 -*-
##------------------------------------------
##   My Library YAML file process
##
##               2022.01.11 Masahiro Izutsu
##------------------------------------------
## mylib_yaml.py
## 2022.01.14 追加書き込み対応

import yaml
import codecs
import os

class YamlProcess:
    logf = False
    out_path = None
    
    # 初期化
    #   out_path:   出力ファイル・パス
    #   flg:        ログ出力フラグ
    def __init__(self, out_path, flg = False):
        self.logf = flg
        self.out_path = out_path

    # セクション/キーの追加書き込み
    #   section:    セクション名
    #   key:        データ名
    #   data:       オブジェクト
    def write_append_key(self, section, key, data):
        if os.path.isfile(self.out_path):
            with codecs.open(self.out_path, 'r+', 'utf-8') as f:
                obj = None
                try:
                    obj = yaml.safe_load(f)
                except yaml.YAMLError as exc:
                    print(exc)

                if obj != None and section in obj:
                    obj[section][key] = data
                    f.seek(0)
                    yaml.dump(obj, f, encoding='utf-8', allow_unicode=True)                
                    if self.logf:
                        print(yaml.dump(obj))
                else:
                    new_data = ｛key: data｝
                    self.write_append(section, new_data)
        else:
            new_data = ｛key: data｝
            self.write_section(section, new_data)

    # セクションの追加書き込み
    #   section:    セクション名
    #   data:       オブジェクト
    def write_append(self, section, data):
        if os.path.isfile(self.out_path):
            with codecs.open(self.out_path, 'r+', 'utf-8') as f:
                try:
                    obj = yaml.safe_load(f)
                except yaml.YAMLError as exc:
                    print(exc)

                obj[section] = data
                f.seek(0)
                yaml.dump(obj, f, encoding='utf-8', allow_unicode=True)                

            if self.logf:
                print(yaml.dump(obj))
        else:
            self.write_section(section, data)

    # セクションの書き込み
    #   section:    セクション名
    #   data:       オブジェクト
    def write_section(self, section, data):
        obj = ｛section: data｝
        self.write_object(obj)

    # オブジェクトの書き込み
    #   obj:       オブジェクト
    def write_object(self, obj):
        with codecs.open(self.out_path, 'w', 'utf-8') as f:
            yaml.dump(obj, f, encoding='utf-8', allow_unicode=True)

        if self.logf:
            print(yaml.dump(obj))

    # セクション/サブ・セクション/キーの読み出し
    #   section:    セクション名
    #   sub_sec:    サブ・セクション名
    #   key:        データ名
    def load_section_sub_key(self, section, sub_sec, key):
        obj = self.load_section_key(section, sub_sec)
        data = None
        if obj != None and key in obj:
            data = obj[key]
        return data

    # セクション/キーの読み出し
    #   section:    セクション名
    #   key:        データ名
    def load_section_key(self, section, key):
        obj = self.load_section(section)
        data = None
        if obj != None and key in obj:
            data = obj[key]
        return data

    # セクションの読み出し
    #   戻り値:      セクション・オブジェクト
    def load_section(self, section):
        obj = self.load_object()
        data = None
        if obj != None and section in obj:
            data = obj[section]
        return data
        
    # オブジェクトの読み出し
    #   戻り値:      オブジェクト
    def load_object(self):
        with codecs.open(self.out_path, 'r', 'utf-8') as f:
            try:
                obj = yaml.safe_load(f)
            except yaml.YAMLError as exc:
                print(exc)
        return obj


##---------------
def main():
    from os.path import expanduser

    # 配置ファイル作成
    out_path1 = expanduser('./output_test.yaml')
    name1 = 'file1'
    myyaml1 = YamlProcess(out_path1, True)

    path = 'C:/filename'
    base_cord = [640, 480]
    locat = [10, 20, 30, 40]
    prepros = 0

    # 基本情報を書く
    data = ｛'.FileName': path, '.BasicCoordinates': base_cord｝
    myyaml1.write_append(name1, data)

    # 配置情報を書く
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '請求書'｝
    myyaml1.write_append_key(name1, 'KyeName', data)
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '2022年01月11日'｝
    myyaml1.write_append_key(name1, 'Date', data)
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '会社名'｝
    myyaml1.write_append_key(name1, 'CompName', data)
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '案件名'｝
    myyaml1.write_append_key(name1, 'Title', data)
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '税抜金額'｝
    myyaml1.write_append_key(name1, 'B4Tax', data)
    data = ｛'Location': locat, 'PreProcess': prepros, 'Text': '税込み金額'｝
    myyaml1.write_append_key(name1, 'TotalMoney', data)

    # キーテーブル作成
    out_path2 = expanduser('./output_table.yaml')
    myyaml2 = YamlProcess(out_path2, True)

    key_table = ['KyeName', 'Date', 'CompName', 'Title', 'B4Tax', 'TotalMoney']
    myyaml2.write_append('KeyTable', key_table)

    #----------------------------------
    
    # キーテーブルを得る
    table = myyaml2.load_section('KeyTable')
    print(table, len(table))

    data = myyaml1.load_section(name1)

    # 基本情報を得る
    base_size = data['.BasicCoordinates']
    file_name = data['.FileName']
    print(base_size, file_name)
    
    # 配置情報を得る
    location = []
    prepros = []
    text = []
    i = 0
    for tn in table:
        location.append(data[tn]['Location'])
        prepros.append(data[tn]['PreProcess'])
        text.append(data[tn]['Text'])
        print(location[i], prepros[i], text[i])
        i = i + 1

if __name__ == "__main__":
    main()

サイトの都合で「{」「}」(半角括弧)が「｛」「｝」(全角括弧)になっているので注意‼

↑

Step 4 位置情報を入力・編集・ファイルするプログラムを考える †

Step 3 のプログラムでキー入力による入力前処理の指定をマウス操作でできるようにする。
本プログラムはできるだけ簡単な操作で、基本となる「請求書」画像ファイルから OCR に入力する文字の書かれた範囲と処理内容を記述した「配置情報ファイル」を作成することを目的とする。
作成した「配置情報ファイル」は次のステップからの「請求書」自動読み取りアプリケーションで利用する。

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help		ヘルプ表示
-i, --image	images/sample0.png	入力画像ファイル
-l, --language	jpn	言語
--layout	6	tesseractレイアウト(0-13)
-t, --title	n	タイトル表示 (y/n)
--log	y	ログ出力フラグ (y/n)
--mlt	y	連続編集フラグ (y/n)

実行に必要なアプリケーション設定ファイル「tryocr.yaml」

KeyDisp:
- 請求書
- 日  付
- 会社名
- 案件名
- 税抜金額
- 税込金額
KeyTable:
- KyeName
- Date
- CompName
- Title
- B4Tax
- TotalMoney
Template:　　　　　　　　　　　　　　　　← 登録したテンプレートのキーファイルをアプリが書く
- sample0.png

アプリケーション起動後の操作方法 (概要)
・伝票の表示はモニター画面の縦方向いっぱいに表示する。(座標系は元画像のサイズとする）
・動作指定は画面下部のステータスエリアのマウス操作、領域指定は伝票表示エリアのマウスドラッグにより行う。
・OCR 入力の前処理「白黒変換」「罫線消去」「印影消去」の指定はステータスエリアのマウスクリックで指定する (複数指定可)。
・設定情報(本版では 6項目)は「Save」ボタンをクリックすると自動的に遷移するが、任意に選択することもできる。
・設定情報の「Save」を行った項目は表示が'緑'に変わる。
・すべての項目に対して「Save」して現在の伝票の指定は終了となる。

・「Save」(登録）完了の項目も再度指定可能。最後に指定した内容が記録に残る。
・「Next」ボタンは伝票登録作業を終了して次の伝票を指定する。
・「Quit」ボタン, 'Esc''q'キー, ウインドウのクローズボタンは、本アプリケーションを終了する。

・領域の指定は該当エリアの左上でマウスの右クリック、赤い座標が表示されたら右下までドラッグして指定完了。(領域を赤で表示)
・「Save」で登録を行うとその領域の表示は緑に変わる。

実行結果

(py37) cd ~/workspace_py37/tryocr
(py37) $ python3 tryocr_step4.py

--- TryOCR Test Program Step-4 ---
 OpenCV version 4.5.2 

TryOCR Test Program Step-4: Starting application...
   - Image File   :  images/sample0.png
   - Language     :  jpn
   - Layout       :  6
   - Program Title:  n
   - Log flag     :  y
   - Multi flag   :  y

 file: <images/sample0.png>
 Screen size: width x height = 1680 x 1050 (pixels)
    :
 Finished.

作成される配置情報ファイル「tryocr_tmpl.yaml」

sample0.png:　　　　　　　　　　　　　　← テンプレートとなる元画像のファイル名
  .BasicCoordinates:　　　　　　　　　　← 元画像のサイズ [幅,高さ]
  - 1240
  - 1754
  .FileName: images/sample0.png 　　　　← 元画像のファイルパス
  B4Tax:　　　　　　　　　　　　　　　　← '税抜き価格'のデータ・セクション
    Location: 　　　　　　　　　　　　　← 　　領域 {Xmin, Ymin, Xmax, Ymax]
    - 1103
    - 1227
    - 1190
    - 1281
    PreProcess: 2 　　　　　　　　　　　← 　　前処理 [0=なし, 1=白黒変換, 2=罫線消去, 4=印影消去]
    Text: \8,500
  CompName: 　　　　　　　　　　　　　　← '会社名'のデータ・セクション
    Location: 　　　　　　　　　　　　　← 　　領域 {Xmin, Ymin, Xmax, Ymax]
    - 725
    - 333
    - 951
    :　　　　　　　　　　　　　　　　　　↓続く

ソースコード

▼「tryocr_step4.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## TryOCR Test Programe Step-4  Ver 0.01
##   with tesseract & PyOCR & cvui
##          platform: linux / windows
##
##               2022.01.12 Masahiro Izutsu
##------------------------------------------
## tryocr_step4.py
##  2022.01.17  ver 0.01    エラー処理追加

## 前処理： '白黒２値', '罫線消去', '印影消去'
#   選択をマウスで

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
DEF_INPUT_FILE = expanduser('images/sample0.png')
CONFIG_FILE = expanduser('tryocr.yaml')
OUTPUT_FILE = expanduser('tryocr_templ.yaml')

# import処理
from PIL import Image
import sys

import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import os

# タイトル・バージョン情報
title = 'TryOCR Test Program Step-4  Ver 0.01'
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)

# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = DEF_INPUT_FILE,
            help = 'Absolute path to image file. Default value is \'' + DEF_INPUT_FILE + '\'')
    parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
            default = 'jpn',
            help = 'Language. Default value is \'jpn\'')
    parser.add_argument('--layout', metavar = 'LAYOUT',
            default = 6,
            help = 'Tesseract layout Default value is 6')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'n',
            help = 'Program title flag.(y/n) Default value is \'n\'')
    parser.add_argument('--log', metavar = 'LOG',
            default = 'y',
            help = 'Log flag.(y/n) Default value is \'y\'')
    parser.add_argument('--mlt', metavar = 'MULTI',
            default = 'y',
            help = 'Multi flag.(y/n) Default value is \'y\'')
    return parser

# モデル基本情報の表示
def display_info(image, lang, layout, titleflg, logflg, mltflg):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Language     : ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Layout       : ' + NOCOLOR, layout)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Log flag     : ' + NOCOLOR, logflg)
    print('   - ' + YELLOW + 'Multi flag   : ' + NOCOLOR, mltflg)

# 編集項目を探す
def ckeck_mode(mode):
    nxt = -1
    for n in range(len(mode)):
        if mode[n] == 0:
            nxt = n
            break
    return nxt

# 基本情報を書く
def write_base_info(myyaml, section, path, base_cord):
    data = {'.FileName': path, '.BasicCoordinates': base_cord}
    myyaml.write_append(section, data)

def write_base_info1(myyaml, section, path):
    myyaml.write_append_key(section, '.FileName', path)

def write_base_info2(myyaml, section, base_cord):
    myyaml.write_append_key(section, '.BasicCoordinates', base_cord)

# 配置情報を書く
def write_area_info(myyaml, section, key, locat, prepros, text):
    data = {'Location': locat, 'PreProcess': prepros, 'Text': text}
    myyaml.write_append_key(section, key, data)

# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
    locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
    prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
    text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
    return locat, prepros, text

# 登録セクション名(ファイル名) を書く
def write_section_name(myyaml, name):
    templ_lst = myyaml.load_section('Template')
    if templ_lst == None:
        templ_lst = []
    if name not in templ_lst:
        templ_lst.append(name)
    myyaml.write_append('Template', templ_lst)

# 画像注釈編集アプリケーション・メイン処理
def image_annotation(lena_frame_org, name, lang='jpn', layout=6, titleflg=False, logflag=False, mltflg=True):
    WINDOW_NAME = title
    ROI_WINDOW  = 'Cut-out area'
    ROI_POPUP   = 'OCR detection result Text'

    preprocess_mode = [0, 0, 0]
    set_mode = 0
    wlock1 = 0
    wlock2 = 0
    wlock3 = 0

    # 設定処理
    conf_file = CONFIG_FILE
    out_file = OUTPUT_FILE
    section_name = name
    myyaml_cnf = mylib_yaml.YamlProcess(conf_file)
    myyaml_out = mylib_yaml.YamlProcess(out_file)
    key_table = myyaml_cnf.load_section('KeyTable')
    key_disp = myyaml_cnf.load_section('KeyDisp')
    table_n = len(key_table)
    save_mode = [0] * table_n
    status_h = 90                               # ステータスラインの高さ
    
    # 日本語フォント指定
    if platform.system()=='Windows':
       fontPIL = 'meiryo.ttc'                   # メイリオ
    else:
       fontPIL = 'NotoSansCJK-Bold.ttc'         # ゴシック体

    # ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
    monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
    maxsize = monitor_height - 64 - 100

    # 画像の前処理
    imgpros = mylib_preprocess.ImagePreprocess(False)    # 初期化

    # OCR
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print(RED + "\nOCR tool Not found." + NOCOLOR)
        quit()
    tool = tools[0]

    # mylib_frame ライブラリ
    imgfr = mylib_frame.ImageFrame(lena_frame_org)       # 初期化
    imgfr.set_screen_size(monitor_width, monitor_height)

    lena_frame = imgfr.frame_resize(maxsize)
    lena_frame_h, lena_frame_w = lena_frame.shape[:2]

    # 画面ステータス領域 (画面下部 status_h pixel)の確保
    frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
    frame[:,:,:] = 200
    
    popup_frame = np.zeros((120, 500, 3), np.uint8)
    anchor = cvui.Point()
    roi = cvui.Rect(0, 0, 0, 0)
    working = False
    frame_h, frame_w = frame.shape[:2]
    outf = False

    org_h, org_w = imgfr.get_original_size()
    scale_h, scale_w = imgfr.get_scale()
    write_base_info2(myyaml_out, section_name, [org_w, org_h])
    if logflag:
        print('\n original h x w : {:=5} x {:=5}'.format(org_h, org_w))
        print(' display  h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
        print(' scale    h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
        print(' -----------')

    # Init cvui and tell it to create a OpenCV window, i.e. cv.namedWindow(WINDOW_NAME).
    cv2.namedWindow(WINDOW_NAME, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL) 
    cvui.init(WINDOW_NAME)

    # OCR 項目設定データ
    ocr_text = ''
    ocr_rect = [0, 0, 0, 0]

    # 設定データの保存場所
    text_save = []
    save_rect = []
    for n in range(table_n):
        text_save.append(ocr_text)
        save_rect.append(cvui.Rect(0, 0, 0, 0))

    # 登録済みの配置データ
    for n in range(table_n):
        # 配置情報の読み出し
        ocr_rect, pros, ocr_text = read_area_info(myyaml_out, name, key_table[n])
        save_mode[n] = 1 if ocr_rect != None and pros != None and ocr_text != None else 0
        if ocr_text == None:
            ocr_text = ''
        if ocr_rect == None:
            x0 = 0
            y0 = 0
            x1 = 0
            y1 = 0
        else:
            # オリジナル座標 → 表示座標
            x0, y0 = imgfr.get_org2res_xy(ocr_rect[0], ocr_rect[1])
            x1, y1 = imgfr.get_org2res_xy(ocr_rect[2], ocr_rect[3])

        # テキスト・領域の保存
        text_save[n] = ocr_text
        save_rect[n].x = x0
        save_rect[n].y = y0
        save_rect[n].width = x1 - x0
        save_rect[n].height = y1 - y0

        next = ckeck_mode(save_mode)
        if next < 0:
            set_mode = 0
            if logflag:
                print(' -Saved all parameters!!-')
        else:
            set_mode = next
    ## --

    # 処理ループ
    while (True):
        # 画面上部に画像を配置
        frame[0:lena_frame_h,:] = lena_frame
        frame[lena_frame_h:,:,:] = 200
        
        # マウス・イベント
        if cvui.mouse(cvui.LEFT_BUTTON, cvui.DOWN):
            if cvui.mouse().y < lena_frame_h:
                # マウスポインタにアンカーを配置
                anchor.x = cvui.mouse().x
                anchor.y = cvui.mouse().y

                # 作業中の通知(作業中はウインドウの更新しない)
                working = True

        if cvui.mouse(cvui.LEFT_BUTTON, cvui.IS_DOWN):
            if cvui.mouse().y < lena_frame_h:
                # 領域を設定
                width = cvui.mouse().x - anchor.x
                height = cvui.mouse().y - anchor.y
            
                roi.x = anchor.x + width if width < 0 else anchor.x
                roi.y = anchor.y + height if height < 0 else anchor.y
                roi.width = abs(width)
                roi.height = abs(height)

                # 座標とサイズを表示
                cvui.printf(frame, roi.x + 5, roi.y + 5, 0.3, 0xff0000, '(%d,%d)', roi.x, roi.y)
                cvui.printf(frame, cvui.mouse().x + 5, cvui.mouse().y + 5, 0.3, 0xff0000, 'w:%d, h:%d', roi.width, roi.height)

        if cvui.mouse(cvui.UP):
            if cvui.mouse().y < lena_frame_h:
                # 領域指定作業の終了
                working = False
                outf = True
                wlock1 = 0
                wlock2 = 0
                wlock3 = 0

        # 領域内を確認
        lenaRows, lenaCols, lenaChannels = lena_frame.shape
        if roi.x < 0:
            roi.x = 0
        if roi.y < 0:
            roi.y = 0
        if roi.x + roi.width > lena_frame_w:
            roi.width = lena_frame_w - roi.x
        if roi.y + roi.height > lena_frame_h:
            roi.height = lena_frame_h - roi.y
        
        # 設定領域をレンダリング
        cvui.rect(frame, roi.x, roi.y, roi.width, roi.height, 0xff0000)

        # 設定済み領域の表示
        for n in range(table_n):
            rc = save_rect[n]
            if rc.width > 0 and rc.height >0:
                cvui.rect(frame, rc.x, rc.y, rc.width, rc.height, 0x008000)

                # テキストの描画
                if len(text_save[n]) > 0:
                    myfunction.cv2_putText(img = frame,
                               text = text_save[n],
                               org = (rc.x, rc.y),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (0,160,0),
                               mode = 0)

        # -------------------------
        # ステータスライン - 前処理：
        fs = 12
        xs = 80
        rc = cvui.Rect(10, lena_frame_h+8, xs-10, fs + 6)
        myfunction.cv2_putText(frame, '前処理：', (rc.x, rc.y), fontPIL, fs, (100,100,100), 1)
        cr = (0,0,0) if preprocess_mode[0] == 0 else (0,0,240)
        myfunction.cv2_putText(frame, '白黒２値', (rc.x+xs, rc.y), fontPIL, fs, cr, 1)
        cr = (0,0,0) if preprocess_mode[1] == 0 else (0,0,240)
        myfunction.cv2_putText(frame, '罫線消去', (rc.x+xs*2, rc.y), fontPIL, fs, cr, 1)
        cr = (0,0,0) if preprocess_mode[2] == 0 else (0,0,240)
        myfunction.cv2_putText(frame, '印影消去', (rc.x+xs*3, rc.y), fontPIL, fs, cr, 1)
                
        crx = 0x202020 if preprocess_mode[0] == 0 else 0xe00000
        cvui.rect(frame, rc.x+xs-2, rc.y, rc.width, rc.height, crx)
        crx = 0x202020 if preprocess_mode[1] == 0 else 0xe00000
        cvui.rect(frame, rc.x+xs*2-2, rc.y, rc.width, rc.height, crx)
        crx = 0x202020 if preprocess_mode[2] == 0 else 0xe00000
        cvui.rect(frame, rc.x+xs*3-2, rc.y, rc.width, rc.height, crx)

        status0 = cvui.iarea(rc.x+xs, rc.y, rc.width, rc.height);
        status1 = cvui.iarea(rc.x+xs*2, rc.y, rc.width, rc.height);
        status2 = cvui.iarea(rc.x+xs*3, rc.y, rc.width, rc.height);
        if status0 == cvui.CLICK:
            preprocess_mode[0] = 1 if preprocess_mode[0] == 0 else 0
            outf = True
        if status1 == cvui.CLICK:
            preprocess_mode[1] = 1 if preprocess_mode[1] == 0 else 0
            outf = True
        if status2 == cvui.CLICK:
            preprocess_mode[2] = 1 if preprocess_mode[2] == 0 else 0
            outf = True

        # ステータスライン - 設定項目：
        rc = cvui.Rect(10, lena_frame_h+32, xs-10, fs+8)
        myfunction.cv2_putText(frame, '設定項目：', (rc.x, rc.y), fontPIL, fs, (100,100,100), 1)
        n = 1
        for disp in key_disp:
            cr = (0,240,240) if set_mode == n - 1 else (100,100,100) if save_mode[n - 1] == 0 else (0,160,0)
            myfunction.cv2_putText(frame, disp, (rc.x + n*xs, rc.y), fontPIL, fs,  cr, 1)
            n = n + 1
        
        status10 = cvui.iarea(rc.x+xs, rc.y, rc.width, rc.height);
        status11 = cvui.iarea(rc.x+2*xs, rc.y, rc.width, rc.height);
        status12 = cvui.iarea(rc.x+3*xs, rc.y, rc.width, rc.height);
        status13 = cvui.iarea(rc.x+4*xs, rc.y, rc.width, rc.height);
        status14 = cvui.iarea(rc.x+5*xs, rc.y, rc.width, rc.height);
        status15 = cvui.iarea(rc.x+6*xs, rc.y, rc.width, rc.height);

        if status10 == cvui.CLICK:
            set_mode = 0
        if status11 == cvui.CLICK:
            set_mode = 1
        if status12 == cvui.CLICK:
            set_mode = 2
        if status13 == cvui.CLICK:
            set_mode = 3
        if status14 == cvui.CLICK:
            set_mode = 4
        if status15 == cvui.CLICK:
            set_mode = 5

        # ステータスライン - 作業ボタン
        btn_x = 90
        btn_y = lena_frame_h + 54
        btn_w = 70
        btn_h = 32

        # 'Save'
        if cvui.button(frame, btn_x, btn_y, "&Save"):
            if len(ocr_text) > 0:
                save_mode[set_mode] = 1
                save_rect[set_mode] = roi
                text_save[set_mode] = ocr_text
                
                # 配置情報の記録
                prepros = preprocess_mode[0] + preprocess_mode[1]*2 + preprocess_mode[2]*4
                write_area_info(myyaml_out, section_name, key_table[set_mode], ocr_rect, prepros, ocr_text)
                write_section_name(myyaml_cnf, section_name)

                if logflag:
                    print('\n Parameter Saving!!')
                    print(' key: ', key_table[set_mode])
                    print(' area:', ocr_rect)
                    print(' text:', ocr_text)
                    print(' poos:', preprocess_mode)
                next = ckeck_mode(save_mode)
                if next < 0:
                    set_mode = 0
                    if logflag:
                        print(' -Saved all parameters!!-')
                else:
                    set_mode = next
                roi = cvui.Rect(0, 0, 0, 0)
                ocr_text = ''
                cv2.destroyWindow(ROI_WINDOW)
                cv2.destroyWindow(ROI_POPUP)

        # 'Next'
        if mltflg:
            if cvui.button(frame, btn_x + btn_w + 10, btn_y, "&Next"):
                key = ord('n')
                break

        # 'Quit'
        if cvui.button(frame, btn_x + (btn_w + 10) * 4, btn_y, "&Quit"):
            key = ord('q')
            break

        # -------------------------

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # ウインドウの更新
        cvui.update()

        # 画面の表示
        cv2.imshow(WINDOW_NAME, frame)
        if wlock1 < 10:
            cv2.moveWindow(WINDOW_NAME, 80, 0)
            wlock1 = wlock1 + 1
        else:
            wlock1 = 10
        
        # 得られた表示座標から元画像の位置を計算して画像を切り出す
        if outf and roi.area() > 50 and working == False:
            x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
            x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
            lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
            ocr_rect = [x0, y0, x1, y1]
            
            # 前処理
            if preprocess_mode[2] != 0:
                lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
            if preprocess_mode[1] != 0:
                lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
            if preprocess_mode[0] != 0:
                lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)

            # OCR入力画像表示 
            lenaRoi_h, lenaRoi_w = lenaRoi.shape[:2]
            cv2.namedWindow(ROI_WINDOW, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL) 
            cv2.imshow(ROI_WINDOW, lenaRoi)
            if wlock2 < 10:
                cv2.moveWindow(ROI_WINDOW, frame_w + 100, 0)
                wlock2 = wlock2 + 1
            else:
                wlock2 = 10

            # 切り出した領域を OCR
            # PILのイメージにする
            lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
            imgRoi = Image.fromarray(lenaRoi1)

            # txt is a Python string
            ocr_text = tool.image_to_string(imgRoi, lang=lang,
                    builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
            
            # テキストの描画
            if len(ocr_text)>0:
                msg = 'preprocess: {}  area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])

                popup_frame[:,:,:] = 0
                cv2.rectangle(popup_frame, (0, 88), (500, 105), (255,0,0), -1)
                myfunction.cv2_putText(img = popup_frame,
                               text = ocr_text,
                               org = (15, 104),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (255,255,255),
                               mode = 0)
                cv2.putText(popup_frame, msg, (0, 16), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

            else:
                msg = '  Cannot be converted !!\n'
                msg0 = 'もう一度指定してくささい ‼'
                popup_frame[:,:,:] = 0
                myfunction.cv2_putText(img = popup_frame,
                               text = msg0,
                               org = (120, 60),
                               fontFace = fontPIL,
                               fontScale = 16,
                               color = (240, 240, 0),
                               mode = 0)

            cv2.namedWindow(ROI_POPUP, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL) 
            cv2.imshow(ROI_POPUP, popup_frame)
            if wlock3 < 10:
                cv2.moveWindow(ROI_POPUP, frame_w + 100, lenaRoi_h + 100)
                wlock3 = wlock3 + 1
            else:
                wlock3 = 10
            if outf and logflag:
                print(' ', ocr_text)
                print(' ', msg)
            outf = False

        key = cv2.waitKey(10)
        if key == 27 or key == 113:                     # 'esc' or 'q'
            break
        elif key >= ord('0') and key <= ord('7'):       # 前処理モード変更
            mode = key - ord('0')
            preprocess_mode[2] = 1 if mode & 0x4 != 0 else 0
            preprocess_mode[1] = 1 if mode & 0x2 != 0 else 0
            preprocess_mode[0] = 1 if mode & 0x1 != 0 else 0
            outf = True

        if not mylib_gui._is_visible(title):            # 'Close' button
            break

    cv2.destroyAllWindows()
    if logflag:
        print(' -----------\n')

    return key

# ** main関数 **
def main():
    loop_flg = True
    myyaml = mylib_yaml.YamlProcess(OUTPUT_FILE)
    
    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    filename = ARGS.image
    lang = ARGS.language
    layout = int(ARGS.layout)
    titleflg = ARGS.title
    logflg = ARGS.log
    logflag = True if logflg == 'y' else False 
    mltflg = ARGS.mlt
    mltflag = True if mltflg == 'y' else False 

    # 情報表示
    display_info(filename, lang, layout, titleflg, logflg, mltflg)
    while(loop_flg):
        if logflag:
            print('\n file: <{}>'.format(filename))

        # OpenCV でイメージを読む
        frame = cv2.imread(filename)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 画像注釈
        name = os.path.basename(filename)
        write_base_info1(myyaml, name, filename)
        ret = image_annotation(frame, name, lang, layout, titleflg, logflag, mltflag)
        if ret == 27 or ret == 113 or ret == -1:
            loop_flg = False

        # 画像ファイルの選択
        if loop_flg:
            filename = filedialog.askopenfilename(
                    title = "画像ファイルを開く",
                    filetypes = [("Image file", ".bmp .png .jpg .tif"),
                                 ("Bitmap", ".bmp"),
                                 ("PNG", ".png"),
                                 ("JPEG", ".jpg")],     # ファイルフィルタ
                    initialdir = "./"                   # 自分自身のディレクトリ
                    )
            if len(filename) == 0:
                break

    if logflag:
        print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

Step 5 配置情報ファイルを利用して伝票内を一括処理する †

Step 4 のプログラムで作成した「配置情報ファイル」に従って自動的に OCR 出力を得る。
先ずは、Step4 で用いた帳票画像ファイルと、同じ画像の解像度の異なる画像に対して動作確認を行う。
次のステップでは異なる帳票画像を複数用意してテンプレート登録し、そのテンプレートの中から一致するものを探すアルゴリズムを作成する。

実行時に利用できるコマンドオプション (Step 4 と同じ)

(py37) $ python3 tryocr_step5.py -h

--- TryOCR Test Program Step-5 ---
 OpenCV version 4.5.2 

usage: tryocr_step5.py [-h] [-i IMAGE_FILE] [-l LANGUAGE] [--layout LAYOUT]
                       [-t TITLE] [--log LOG] [--mlt MULTI]

optional arguments:
  -h, --help            show this help message and exit
  -i IMAGE_FILE, --image IMAGE_FILE
                        Absolute path to image file. Default value is
                        'images/sample0.png'
  -l LANGUAGE, --language LANGUAGE
                        Language. Default value is 'jpn'
  --layout LAYOUT       Tesseract layout Default value is 6
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'y'
  --log LOG             Log flag.(y/n) Default value is 'y'
  --mlt MULTI           Multi flag.(y/n) Default value is 'y'

実行結果

(py37) cd ~/workspace_py37/tryocr
(py37) $ python3 tryocr_step5.py

--- TryOCR Test Program Step-5 ---
 OpenCV version 4.5.2 

TryOCR Test Program Step-5: Starting application...
   - Image File   :  images/sample0.png
   - Language     :  jpn
   - Layout       :  6
   - Program Title:  n
   - Log flag     :  y
   - Multi flag   :  y

 file: <images/sample0.png>
 Screen size: width x height = 1680 x 1050 (pixels)

 original h x w :  1754 x  1240
 display  h x w :   886 x   626
 scale    h x w : 0.505 x 0.505
 tp-size  h x w :  1754 x  1240
 tp-scale h x w : 1.000 x 1.000
 -----------
  請 求 書
  preprocess: [0, 0, 0]  area: (493, 99) - (753, 178)
  2021年10月11日
  preprocess: [0, 0, 0]  area: (984, 249) - (1202, 299)
  井筒政弘
  preprocess: [0, 0, 0]  area: (771, 354) - (941, 406)
  PC4-25600(DDR4-3200) 16GB SO-DIMM
  preprocess: [1, 1, 0]  area: (117, 764) - (588, 826)
  \8,500
  preprocess: [0, 1, 0]  area: (1103, 1227) - (1190, 1281)
  \8,500
  preprocess: [0, 1, 0]  area: (1099, 1316) - (1190, 1348)
 -----------


 Finished.

ソースコード

▼「tryocr_step5.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## TryOCR Test Programe Step-5  Ver 0.01
##   with tesseract & PyOCR & cvui
##          platform: linux / windows
##
##               2022.01.15 Masahiro Izutsu
##------------------------------------------
## tryocr_step5.py
##  2022.01.17  Ver 0.01    エラー処理追加

## 配置情報ファイルを使って処理する

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
DEF_INPUT_FILE = expanduser('images/sample1.png')
CONFIG_FILE = expanduser('tryocr.yaml')
MAPING_FILE = expanduser('tryocr_templ.yaml')

# import処理
from PIL import Image
import sys

import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import os

# タイトル・バージョン情報
title = 'TryOCR Test Program Step-5  Ver 0.01'
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)

# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = DEF_INPUT_FILE,
            help = 'Absolute path to image file. Default value is \'' + DEF_INPUT_FILE + '\'')
    parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
            default = 'jpn',
            help = 'Language. Default value is \'jpn\'')
    parser.add_argument('--layout', metavar = 'LAYOUT',
            default = 6,
            help = 'Tesseract layout Default value is 6')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'n',
            help = 'Program title flag.(y/n) Default value is \'n\'')
    parser.add_argument('--log', metavar = 'LOG',
            default = 'y',
            help = 'Log flag.(y/n) Default value is \'y\'')
    parser.add_argument('--mlt', metavar = 'MULTI',
            default = 'y',
            help = 'Multi flag.(y/n) Default value is \'y\'')
    return parser

# モデル基本情報の表示
def display_info(image, lang, layout, titleflg, logflg, mltflg):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Language     : ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Layout       : ' + NOCOLOR, layout)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Log flag     : ' + NOCOLOR, logflg)
    print('   - ' + YELLOW + 'Multi flag   : ' + NOCOLOR, mltflg)

# 基本情報を読む
def read_base_info1(myyaml, section):
    path = myyaml.load_section_key(section, '.FileName')
    return path

def read_base_info2(myyaml, section):
    base_cord = myyaml.load_section_key(section, '.BasicCoordinates')
    return base_cord

# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
    locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
    prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
    text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
    return locat, prepros, text

# 登録セクション名(ファイル名) を読む out: キー・リスト
def read_section_name(myyaml):
    templ_lst = myyaml.load_section('Template')
    return templ_lst

# 帳票 OCR・メイン処理
def image_ocr_process(lena_frame_org, name, lang='jpn', layout=6, titleflg=False, logflag=False, mltflg=True):
    WINDOW_NAME = title

    preprocess_mode = [0, 0, 0]
    set_mode = 0
    wlock1 = 0

    # 配置情報読み出し・設定処理
    conf_file = CONFIG_FILE
    map_file = MAPING_FILE
    myyaml_cnf = mylib_yaml.YamlProcess(conf_file, True)
    myyaml_map = mylib_yaml.YamlProcess(map_file, True)
    key_table = myyaml_cnf.load_section('KeyTable')
    key_disp = myyaml_cnf.load_section('KeyDisp')
    
    load_mode = [0] * len(key_table)
    status_h = 40                               # ステータスラインの高さ
    
    # 日本語フォント指定
    if platform.system()=='Windows':
       fontPIL = 'meiryo.ttc'                   # メイリオ
    else:
       fontPIL = 'NotoSansCJK-Bold.ttc'         # ゴシック体

    # ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
    monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
    maxsize = monitor_height - 64 - 100

    # 画像の前処理
    imgpros = mylib_preprocess.ImagePreprocess(False)    # 初期化

    # OCR
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print(RED + "\nOCR tool Not found." + NOCOLOR)
        quit()
    tool = tools[0]

    # mylib_frame ライブラリ
    imgfr = mylib_frame.ImageFrame(lena_frame_org)       # 初期化
    imgfr.set_screen_size(monitor_width, monitor_height)

    lena_frame = imgfr.frame_resize(maxsize)
    lena_frame_h, lena_frame_w = lena_frame.shape[:2]

    # 画面ステータス領域 (画面下部 status_h pixel)の確保
    frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
    frame[:,:,:] = 200
    
    popup_frame = np.zeros((120, 500, 3), np.uint8)
    anchor = cvui.Point()
    roi = cvui.Rect(0, 0, 0, 0)
    frame_h, frame_w = frame.shape[:2]
    outf = True

    org_h, org_w = imgfr.get_original_size()
    scale_h, scale_w = imgfr.get_scale()

    # テンプレート関連設定
    template_tbl = read_section_name(myyaml_cnf)
    section_name = template_tbl[0]              # !!! この版では最初のエントリを使う
    base_cord = read_base_info2(myyaml_map, section_name)
    base_scale_w = base_cord[0] / org_w
    base_scale_h = base_cord[1] / org_h
    table_n = len(key_table)

    if logflag:
        print('\n original h x w : {:=5} x {:=5}'.format(org_h, org_w))
        print(' display  h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
        print(' scale    h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
        print(' tp-size  h x w : {:=5} x {:=5}'.format(base_cord[1], base_cord[0]))
        print(' tp-scale h x w : {:.3f} x {:.3f}'.format(base_scale_h, base_scale_w))
        print(' -----------')

    # Init cvui and tell it to create a OpenCV window, i.e. cv.namedWindow(WINDOW_NAME).
    cv2.namedWindow(WINDOW_NAME, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL) 
    cvui.init(WINDOW_NAME)

    # OCR 項目設定データ
    ocr_text = ''
    ocr_rect = [0, 0, 0, 0]

    # 設定データの保存場所
    text_save = []
    rect_save = []
    for n in range(table_n):
        text_save.append(ocr_text)
        rect_save.append(ocr_rect)

    # メッセージ・ウインドウ設定
    msg_win_h = 100
    msg_win_w = 400
    msg_win_xof = 0 if lena_frame_w < msg_win_w else int((lena_frame_w - msg_win_w) / 2)
    msg_win_yof = 0 if lena_frame_h < msg_win_h else int((lena_frame_h - msg_win_h) / 2)
    msg_bg_color = (49, 52, 49)
    
    # 処理ループ
    while (True):
        # 画面上部に画像を配置
        frame[0:lena_frame_h,:] = lena_frame
        frame[lena_frame_h:,:,:] = 200

        if outf:
            cv2.rectangle(frame, (msg_win_xof, msg_win_yof), (msg_win_xof + msg_win_w, msg_win_yof + msg_win_h), (49, 52, 49), -1)
            myfunction.cv2_putText(img = frame,
                               text = 'ファイル名:  < {} >'.format(name),
                               org = (msg_win_xof + 40, msg_win_yof + 20),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (255,255,255),
                               mode = 1)
            myfunction.cv2_putText(img = frame,
                               text = '処理中...',
                               org = (msg_win_xof + 180, msg_win_yof + 60),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (240,240, 0),
                               mode = 1)

        else:
            for n in range(table_n):
                # 領域表示
                cvui.rect(frame, rect_save[n][0], rect_save[n][1], rect_save[n][2], rect_save[n][3], 0x008000)

                # テキストの描画
                if len(text_save[n]) > 0:
                    myfunction.cv2_putText(img = frame,
                               text = text_save[n],
                               org = (rect_save[n][0], rect_save[n][1]),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (0,160,0),
                               mode = 0)
    
        # -------------------------
        # ステータスライン - 作業ボタン
        btn_x = 90
        btn_y = lena_frame_h + 4
        btn_w = 70
        btn_h = 32

        # 'Next'
        if mltflg:
            if cvui.button(frame, btn_x + btn_w + 10, btn_y, "&Next"):
                key = ord('n')
                break

        # 'Quit'
        if cvui.button(frame, btn_x + (btn_w + 10) * 4, btn_y, "&Quit"):
            key = ord('q')
            break

        # -------------------------

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # ウインドウの更新
        cvui.update()

        # 画面の表示
        cv2.imshow(WINDOW_NAME, frame)
        if wlock1 < 10:
            cv2.moveWindow(WINDOW_NAME, 80, 0)
            wlock1 = wlock1 + 1
        else:
            wlock1 = 10

        key = cv2.waitKey(50)
        if key == 27 or key == 113:                     # 'esc' or 'q'
            break

        if not mylib_gui._is_visible(title):            # 'Close' button
            break

        # 登録セクションの処理
        if outf:
            for n in range(table_n):
                # 配置情報の読み出し
                ocr_rect, pros, ocr_text = read_area_info(myyaml_map, section_name, key_table[n])

                if pros == None:
                    pros = 0
                if ocr_text == None:
                    ocr_text = ''

                prepros = int(pros)
                preprocess_mode[2], mod = divmod(prepros, 4)
                preprocess_mode[1], preprocess_mode[0] = divmod(mod, 2)

                if ocr_rect == None:
                    roi.x = 0
                    roi.y = 0
                    roi.width = 0
                    roi.height = 0
                else:
                    # オリジナル座標 → 表示座標
                    x0, y0 = imgfr.get_org2res_xy(round(ocr_rect[0] / base_scale_w), round(ocr_rect[1] / base_scale_h))
                    x1, y1 = imgfr.get_org2res_xy(round(ocr_rect[2] / base_scale_w), round(ocr_rect[3] / base_scale_h))
                    roi.x = x0
                    roi.y = y0
                    roi.width = x1 - x0
                    roi.height = y1 - y0

                # 領域の保存
                rect_save[n] = [roi.x, roi.y, roi.width, roi.height]

                # 得られた表示座標から元画像の位置を計算して画像を切り出す
                if roi.area() > 50:
                    x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
                    x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
                    lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
                    ocr_rect = [x0, y0, x1, y1]

                    # 前処理
                    if preprocess_mode[2] != 0:
                        lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
                    if preprocess_mode[1] != 0:
                        lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
                    if preprocess_mode[0] != 0:
                        lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)

                    # 切り出した領域を OCR
                    # PILのイメージにする
                    lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
                    imgRoi = Image.fromarray(lenaRoi1)

                    # txt is a Python string
                    text = tool.image_to_string(imgRoi, lang=lang,
                            builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
                    text_save[n] = text
            
                    # テキストの保存
                    if len(text) > 0:
                        msg = 'preprocess: {}  area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])

                        if outf and logflag:
                            print(' ', msg)
                            print(' ', text)

        outf = False

    cv2.destroyAllWindows()
    if logflag:
        print(' -----------\n')

    return key

# ** main関数 **
def main():
    loop_flg = True
    
    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    filename = ARGS.image
    lang = ARGS.language
    layout = int(ARGS.layout)
    titleflg = ARGS.title
    logflg = ARGS.log
    logflag = True if logflg == 'y' else False 
    mltflg = ARGS.mlt
    mltflag = True if mltflg == 'y' else False 

    # 情報表示
    display_info(filename, lang, layout, titleflg, logflg, mltflg)
    while(loop_flg):
        if logflag:
            print('\n file: <{}>'.format(filename))

        # OpenCV でイメージを読む
        frame = cv2.imread(filename)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 帳票 OCR・メイン処理
        name = os.path.basename(filename)
        ret = image_ocr_process(frame, name, lang, layout, titleflg, logflag, mltflag)
        if ret == 27 or ret == 113 or ret == -1:
            loop_flg = False

        # 画像ファイルの選択
        if loop_flg:
            filename = filedialog.askopenfilename(
                    title = "画像ファイルを開く",
                    filetypes = [("Image file", ".bmp .png .jpg .tif"),
                                 ("Bitmap", ".bmp"),
                                 ("PNG", ".png"),
                                 ("JPEG", ".jpg")],     # ファイルフィルタ
                    initialdir = "./"                   # 自分自身のディレクトリ
                    )
            if len(filename) == 0:
                break

    if logflag:
        print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

→ 以降「OCR アプリケーション基礎編 3」へ続く

↑

更新履歴 †

2022/01/12 初版
2022/01/15 Step4・5 追加

↑

参考資料 †

PyYAML

Python 辞書型

その他

最新の20件

OCR アプリケーション基礎編 2 †

OCR プログラムの開発過程 2 †

設定ファイルを作成・編集する †

Step 4 位置情報を入力・編集・ファイルするプログラムを考える †

Step 5 配置情報ファイルを利用して伝票内を一括処理する †

更新履歴 †

参考資料 †