Tesseract5

OCR アプリケーション基礎編 3 †

　実用的な AI開発に向けて、文字認識エンジン「Tesseract」(テッセラクト)を使用した「OCR アプリケーション」を開発する。(その３)

OCR アプリケーション基礎編 3
- OCR プログラムの開発過程 3
- 更新履歴
参考資料

※ 最終更新:2022/01/28　

↑

OCR プログラムの開発過程 3 †

↑

OCR 文字列を検証する †

　OCR で得られた文字列の有効性を調べる方法を考える。

文字列から数字を取り出す

import re

text = "2022年1月17日"
result = re.findall(r"\d+", text)

# 実行結果
['2022', '1', '17']

年月日の数値の妥当性を調べる

import re
import datetime

yy = 2022
mm = 1
dd = 17
try:
    str = datetime.date(yy, mm, dd)
    string = str.strftime('%Y/%m/%d')
except ValueError:
    string = ''
    print('Invalid value error !!')

# 実行結果
2022/01/17

文字列から金額を取り出す

import re

texr = '\8,500'
money = ''
txt = text.replace(',','')
if len(txt) > 0:
    result = re.findall(r"\d+", text)
    if len(result) == 1:
        money = str(result[0])

# 実行結果
8500

文字列の空白を取る

import re

text = '請 求 書'
string = re.sub(r"[\u3000 \t]", "", text)

# 実行結果
請求書

サンプルプログラムの実行

(py37) $ cd ~/workspace_py37/tryocr
(py37) $ python3 mylib_text.py 
 get_str2num(): 2022年1月17日 →  ['2022', '1', '17']
 get_str2num(): 年月日 →  []
Invalid value error !!
 checkDate(): 2022 2 31 →  
 checkDate(): 2022 1 31 →  2022/01/31
 get_str2date(): 2022年1月17日 →  2022/01/17
Invalid value error !!
 get_str2date(): 2022年2月31日 →  
Invalid string error !!
 get_str2date(): 2月31日 →  
Invalid string error !!
 get_str2date(): 20,22年2月31日 →  
 get_str2mony():  \8,500 →  8500
 get_str2mony(): abc月日 →  
 remove_space(): 請 求 書 →  請求書

ソースコード

▼「mylib_text.py」

# -*- coding: utf-8 -*-
##------------------------------------------
##   My Library Text Convert
##
##               2022.01.17 Masahiro Izutsu
##------------------------------------------
## mylib_text.py

import re
import datetime
import csv

class TextConvert:
    logf = False
    
    # 初期化
    #   flg:        ログ出力フラグ
    def __init__(self, flg = False):
        self.logf = flg

    # 文字列から数字を取り出す  out: 数字のリスト  error=空のリスト
    def get_str2num(self, text):
        result = re.findall(r"\d+", text)
        return result

    # 年月日の数値の妥当性を調べる  out: 文字列  error=''
    def checkDate(self, year, month, day):
        try:
            str = datetime.date(int(year), int(month), int(day))
            string = str.strftime('%Y/%m/%d')
        except ValueError:
            string = ''
            if self.logf:
                print('Invalid value error !!')
        return string

    # 文字列が年月日かどうかを調べる  out: 文字列  error=''
    def get_str2date(self, text):
        result = self.get_str2num(text)
        if len(result) == 3:
            string = self.checkDate(result[0], result[1], result[2])
        else:
            string = ''
            if self.logf:
                print('Invalid string error !!')
        return string

    # 文字列から金額を取り出す  out: 文字列  error=''
    def get_str2mony(self, text):
        money = ''
        if len(text) > 0:
            txt = re.sub('[,.]', '', text)
            if len(txt) > 0:
                result = self.get_str2num(txt)
                if len(result) == 1:
                    money = str(result[0])
        return money

    # 文字列の空白を取る  out: 文字列  error=''
    def remove_space(self, text):
        string = re.sub(r"[\u3000 \t]", "", text)
        return string


##---------------
def main():
    my_text_convert = TextConvert(True)

    # 日付文字列
    s = '2022年1月17日'
    print(' get_str2num():', s, '→ ', my_text_convert.get_str2num(s))
    s = '年月日'
    print(' get_str2num():', s, '→ ', my_text_convert.get_str2num(s))
    year = 2022
    month = 2
    day = 31
    print(' checkDate():', year, month,day, '→ ',my_text_convert.checkDate(year, month,day))
    month = 1
    print(' checkDate():', year, month,day, '→ ',my_text_convert.checkDate(year, month,day))
    s = '2022年1月17日'
    print(' get_str2date():', s, '→ ', my_text_convert.get_str2date(s))
    s = '2022年2月31日'
    print(' get_str2date():', s, '→ ', my_text_convert.get_str2date(s))
    s = '2月31日'
    print(' get_str2date():', s, '→ ', my_text_convert.get_str2date(s))
    s = '20,22年2月31日'
    print(' get_str2date():', s, '→ ', my_text_convert.get_str2date(s))
    #----------------

    s = ' \8,500'
    print(' get_str2mony():', s, '→ ', my_text_convert.get_str2mony(s))
    s = 'abc月日'
    print(' get_str2mony():', s, '→ ', my_text_convert.get_str2mony(s))
    #----------------

    s = '請 求 書'
    print(' remove_space():', s, '→ ', my_text_convert.remove_space(s))


if __name__ == "__main__":
    main()

↑

CSV ファイルを扱う †

　CSV ファイル作成方法を考える。

文字コードの問題
encoding="utf_8_sig"とすることで BOM(Byte Order Mark) 付きのUTF-8でCSVファイルを書き出すことが出来る。
「BOM」をつければ、UTF-8のままExcelで開いても文字化けしない CSV を作ることが出来る。

CSV ファイル書き込み

row_data = ['INDEX', '姓', '名']
with open('sample.csv', 'w', encoding = 'utf_8_sig') as f:
    writer = csv.writer(f, lineterminator='\n')
    writer.writerow(row_data)

CSV ファイル追加書き込み

row_data = [1, '井筒', '政弘']
with open('sample.csv', 'a', encoding = 'utf_8_sig') as f:
    writer = csv.writer(f, lineterminator='\n')
    writer.writerow(row_data)

CSV ファイル読み出し

with open('sample.csv', 'r', encoding = 'utf_8_sig') as f:
    string = f.read()

print(type(string))
print(string)

CSV ファイルチェック (key = value があるかを調べる)

with open(self.path, 'r', encoding = self.encoding) as f:
    reader = csv.DictReader(f)
    s = [row for row in reader]
    s0 = next((x for x in s if x[key] == value), None)

サンプルプログラムの実行

(py37) $ cd ~/workspace_py37/tryocr
(py37) $ python3 mylib_csv.py 
<class 'str'>
INDEX,姓,名
1,井筒,政弘
2,新田,孝

ソースコード

▼「mylib_csv.py」

# -*- coding: utf-8 -*-
##------------------------------------------
##   My Library CSV file treatment
##
##               2022.01.18 Masahiro Izutsu
##------------------------------------------
## mylib_csv.py

import csv

class CSVtreatment:
    logf = False
    encoding = ''
    path = ''
    
    # 初期化
    #   flg:        ログ出力フラグ
    def __init__(self, path, encoding = 'utf_8_sig', flg = False):
        self.path = path 
        self.encoding = encoding
        self.logf = flg

    # CSV ファイル書き込み  in:[] 文字列リスト
    def write_csv(self, row_data):
        try:
            with open(self.path, 'w', encoding = self.encoding) as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerow(row_data)
        except FileNotFoundError as e:
            if self.logf:
                print(e)
        except csv.Error as e:
            if self.logf:
                print(e)

    # CSV ファイル追加書き込み  in:[] 文字列リスト
    def append_csv(self, row_data):
        try:
            with open(self.path, 'a', encoding = self.encoding) as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerow(row_data)
        except FileNotFoundError as e:
            if self.logf:
                print(e)
        except csv.Error as e:
            if self.logf:
                print(e)

    # CSV ファイル読み出し  out: 文字列
    def read_csv(self):
        try:
            with open(self.path, 'r', encoding = self.encoding) as f:
                string = f.read()
        except FileNotFoundError as e:
            string = ''
            if self.logf:
                print(e)
        except csv.Error as e:
            string = ''
            if self.logf:
                print(e)
            
        return string

    # CSV ファイルチェック (key = value があるかを調べる)
    def check_csv(self, key, value):
        try:
            with open(self.path, 'r', encoding = self.encoding) as f:
                reader = csv.DictReader(f)
                s = [row for row in reader]
                s0 = next((x for x in s if x[key] == value), None)
                if self.logf:
                    print('[mylib_csv:] ', s0)
        except FileNotFoundError as e:
            s0 = None
            if self.logf:
                print(e)
        except csv.Error as e:
            s0 = None
            if self.logf:
                print(e)
            
        return s0 != None


##---------------
def main():
    path = 'sample_csv_utf8sig.csv'
    encoding = 'utf_8_sig'
    my_csv_treatment = CSVtreatment(path, encoding, True)

    row_data = ['INDEX', '姓', '名']
    my_csv_treatment.write_csv(row_data)
    row_data = [1, '井筒', '政弘']
    my_csv_treatment.append_csv(row_data)
    row_data = [2, '新田', '孝']
    my_csv_treatment.append_csv(row_data)

    string = my_csv_treatment.read_csv()
    print(type(string))
    print(string)

    path = 'tryocr.csv'
    encoding = 'utf_8_sig'
    my_csv_treatment1 = CSVtreatment(path, encoding, True)

    string = my_csv_treatment1.read_csv()
    print(type(string))
    print(string)

    key = 'ファイル名'
    value = 'sample0.png'
    print (my_csv_treatment1.check_csv(key, value))


if __name__ == "__main__":
    main()

↑

Step 6 OCR から得られた各項目の妥当性の検査と CSV 出力 †

Step 5 のプログラムを拡張して OCR 結果のテキストを項目ごとにチェックし CSV ファイルとして出力する。
参照する伝票フォームは Step 4 のプログラムで作成したものを使う。フォームの登録は複数可能だが、本版では最初に登録されたフォームを使用する。
複数フォームによる検索対応は今後のステップアップのテーマとする。
登録フォームの設定に従って、入力伝票ファイルを自動的に OCR 処理を行い最終的に CSV ファイルに出力する。
同一ファイルを入力した場合(ファイル名が同じ)は同様に処理されるが、CSV ファイルには書き込まない。(重複書き込みなし)
既に CSV 出力したファイルが存在する場合は追記される。

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help		ヘルプ表示
-i, --image	images/sample0.png	入力画像ファイル
-l, --language	jpn	言語
--layout	6	tesseractレイアウト(0-13)
-t, --title	n	タイトル表示 (y/n)
--log	y	ログ出力フラグ (y/n)
--mlt	y	連続編集フラグ (y/n)
-o, --out	tryocr.csv	CSV 出力ファイル

(py37) $ python3 tryocr_step6.py -h

--- TryOCR Test Program Step-6  Ver 0.02 ---
 OpenCV version 4.5.3

usage: tryocr_step6.py [-h] [-i IMAGE_FILE] [-l LANGUAGE] [--layout LAYOUT]
                       [-t TITLE] [--log LOG] [--mlt MULTI] [-o CSV_OUT]

optional arguments:
  -h, --help            show this help message and exit
  -i IMAGE_FILE, --image IMAGE_FILE
                        Absolute path to image file. Default value is
                        'images/sample1.png'
  -l LANGUAGE, --language LANGUAGE
                        Language. Default value is 'jpn'
  --layout LAYOUT       Tesseract layout Default value is 6
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'n'
  --log LOG             Log flag.(y/n) Default value is 'y'
  --mlt MULTI           Multi flag.(y/n) Default value is 'y'
  -o CSV_OUT, --out CSV_OUT
                        CSV file path. Default value is tryocr.csv

実行結果

(py37) cd ~/workspace_py37/tryocr
(py37) $ python3 tryocr_step6.py


--- TryOCR Test Program Step-6  Ver 0.02 ---
 OpenCV version 4.5.3

TryOCR Test Program Step-6  Ver 0.02: Starting application...
   - Image File   :  images/sample1.png
   - Language     :  jpn
   - Layout       :  6
   - Program Title:  n
   - Log flag     :  y
   - Multi flag   :  y
   - Processed out:  tryocr.csv

 file: <images/sample1.png>
 Screen size: width x height = 2560 x 1440 (pixels)

 original h x w :  3508 x  2479
 display  h x w :  1276 x   902
 scale    h x w : 0.364 x 0.364
 tp-size  h x w :  1754 x  1239
 tp-scale h x w : 0.500 x 0.500
 -----------
QWindowsWindow::setGeometry: Unable to set geometry 98x69+81+31 (frame: 114x108+73+0) on QWidgetWindow/"TryOCR Test Program Step-6  Ver 0.02Window" on "\\.\DISPLAY1". Resulting geometry: 120x69+81+31 (frame: 136x108+73+0) margins: 8, 31, 8, 8 minimum size: 98x69 maximum size: 98x69 MINMAXINFO maxSize=0,0 maxpos=0,0 mintrack=114,108 maxtrack=114,108)
  preprocess: [0, 0, 0]  area: (973, 214) - (1512, 352)
  請 求 書
  preprocess: [0, 0, 0]  area: (1836, 506) - (2410, 575)
  2021年10月11日
  preprocess: [0, 0, 0]  area: (1410, 723) - (2369, 819)
  エムズファクトリー株式会社
  preprocess: [0, 0, 0]  area: (344, 646) - (1336, 720)
  ノートPC用メモリー
  preprocess: [0, 0, 0]  area: (2037, 2488) - (2380, 2538)
  \/.800
  preprocess: [0, 0, 0]  area: (2037, 2639) - (2380, 2694)
  \8,580
Already recorded !!
ファイル名,請求書,日  付,会社名,案件名,税抜金額,税込金額
sample1.png,請求書,2021/10/11,エムズファクトリー株式会社,ノートPC用メモリー,7800,8580
test_sample_001.png,請求書,2021/10/15,エムズファクトリー株式会社,DELL ノートパンソンコン,140000,154000
test_sample_002.png,請求書,2021/10/15,エムズファクトリー株式会社,DELL ノートパソコン 7520,230000,253000
sample0.png,請求書,2021/10/11,エムズファクトリー株式会社,ノートPC用メモリー,7800,8580

 -----------


 Finished.

ソースコード

▼「tryocr_step6.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## TryOCR Test Programe Step-6  Ver 0.02
##   with tesseract & PyOCR & cvui
##          platform: linux / windows
##
##               2022.01.15 Masahiro Izutsu
##------------------------------------------
## tryocr_step6.py
##  2022.01.17  ver 0.01    エラー処理追加
##  2022.01.19  ver 0.02    CSV出力

## 配置情報ファイルを使って処理する
## OCR 結果の項目を検査する

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
DEF_INPUT_FILE = expanduser('images/sample1.png')
DEF_CSV_PATH = expanduser('tryocr.csv')
CONFIG_FILE = expanduser('tryocr.yaml')
MAPING_FILE = expanduser('tryocr_templ.yaml')

# import処理
from PIL import Image
import sys

import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import mylib_text
import mylib_csv
import copy
import os

# タイトル・バージョン情報
title = 'TryOCR Test Program Step-6  Ver 0.02'
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)

# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = DEF_INPUT_FILE,
            help = 'Absolute path to image file. Default value is \'' + DEF_INPUT_FILE + '\'')
    parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
            default = 'jpn',
            help = 'Language. Default value is \'jpn\'')
    parser.add_argument('--layout', metavar = 'LAYOUT',
            default = 6,
            help = 'Tesseract layout Default value is 6')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'n',
            help = 'Program title flag.(y/n) Default value is \'n\'')
    parser.add_argument('--log', metavar = 'LOG',
            default = 'y',
            help = 'Log flag.(y/n) Default value is \'y\'')
    parser.add_argument('--mlt', metavar = 'MULTI',
            default = 'y',
            help = 'Multi flag.(y/n) Default value is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'CSV_OUT',
            default = DEF_CSV_PATH,
            help = 'CSV file path. Default value is '+DEF_CSV_PATH)
    return parser

# モデル基本情報の表示
def display_info(image, lang, layout, titleflg, logflg, mltflg, outpath):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Language     : ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Layout       : ' + NOCOLOR, layout)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Log flag     : ' + NOCOLOR, logflg)
    print('   - ' + YELLOW + 'Multi flag   : ' + NOCOLOR, mltflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# 基本情報を読む
def read_base_info1(myyaml, section):
    path = myyaml.load_section_key(section, '.FileName')
    return path

def read_base_info2(myyaml, section):
    base_cord = myyaml.load_section_key(section, '.BasicCoordinates')
    return base_cord

# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
    locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
    prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
    text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
    return locat, prepros, text

# 登録セクション名(ファイル名) を読む out: キー・リスト
def read_section_name(myyaml):
    templ_lst = myyaml.load_section('Template')
    return templ_lst

# 帳票 OCR・メイン処理
def image_ocr_process(lena_frame_org, filename, outpath, lang='jpn', layout=6, titleflg=False, logflag=False, mltflg=True):
    input_name = os.path.basename(filename)
    output_name = os.path.basename(outpath)
    WINDOW_NAME = title

    preprocess_mode = [0, 0, 0]
    set_mode = 0
    wlock1 = 0

    # 配置情報読み出し・設定処理
    conf_file = CONFIG_FILE
    map_file = MAPING_FILE
    myyaml_cnf = mylib_yaml.YamlProcess(conf_file, True)
    myyaml_map = mylib_yaml.YamlProcess(map_file, True)
    key_table = myyaml_cnf.load_section('KeyTable')
    key_disp = myyaml_cnf.load_section('KeyDisp')
    
    load_mode = [0] * len(key_table)
    status_h = 40                               # ステータスラインの高さ
    
    # 日本語フォント指定
    if platform.system()=='Windows':
       fontPIL = 'meiryo.ttc'                   # メイリオ
    else:
       fontPIL = 'NotoSansCJK-Bold.ttc'         # ゴシック体

    # ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
    monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
    maxsize = monitor_height - 64 - 100

    # 画像の前処理
    imgpros = mylib_preprocess.ImagePreprocess(False)    # 初期化

    # OCR
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print(RED + "\nOCR tool Not found." + NOCOLOR)
        quit()
    tool = tools[0]

    # mylib_frame ライブラリ
    imgfr = mylib_frame.ImageFrame(lena_frame_org)       # 初期化
    imgfr.set_screen_size(monitor_width, monitor_height)

    lena_frame = imgfr.frame_resize(maxsize)
    lena_frame_h, lena_frame_w = lena_frame.shape[:2]

    # 画面ステータス領域 (画面下部 status_h pixel)の確保
    frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
    frame[:,:,:] = 200

    btn_x = 90
    btn_y = lena_frame_h + 4
    btn_w = 70
    btn_h = 32
    
    popup_frame = np.zeros((120, 500, 3), np.uint8)
    anchor = cvui.Point()
    roi = cvui.Rect(0, 0, 0, 0)
    frame_h, frame_w = frame.shape[:2]
    outf = True
    csvf = False
    csv_outf = False
    out_mode = 0
    out_count = 0

    org_h, org_w = imgfr.get_original_size()
    scale_h, scale_w = imgfr.get_scale()

    # テンプレート関連設定
    template_tbl = read_section_name(myyaml_cnf)
    section_name = template_tbl[0]              # !!! この版では最初のエントリを使う
    base_cord = read_base_info2(myyaml_map, section_name)
    base_scale_w = base_cord[0] / org_w
    base_scale_h = base_cord[1] / org_h
    table_n = len(key_table)

    if logflag:
        print('\n original h x w : {:=5} x {:=5}'.format(org_h, org_w))
        print(' display  h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
        print(' scale    h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
        print(' tp-size  h x w : {:=5} x {:=5}'.format(base_cord[1], base_cord[0]))
        print(' tp-scale h x w : {:.3f} x {:.3f}'.format(base_scale_h, base_scale_w))
        print(' -----------')

    # Init cvui and tell it to create a OpenCV window, i.e. cv.namedWindow(WINDOW_NAME).
    cv2.namedWindow(WINDOW_NAME, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL) 
    cvui.init(WINDOW_NAME)

    # OCR 項目設定データの保存場所
    text_tmpl = [''] * table_n
    text_save = [''] * table_n
    text_cnvt = [''] * table_n
    rect_save = [0] * table_n
    csv_savef = False

    # メッセージ・ウインドウ
    def msg_window(out_text1, out_text2, color = (240,240, 0)):
        msg_win_h = 100
        msg_win_w = 400
        msg_win_xof = 0 if lena_frame_w < msg_win_w else int((lena_frame_w - msg_win_w) / 2)
        msg_win_yof = 0 if lena_frame_h < msg_win_h else int((lena_frame_h - msg_win_h) / 2)
        msg_bg_color = (49, 52, 49)

        cv2.rectangle(frame, (msg_win_xof, msg_win_yof), (msg_win_xof + msg_win_w, msg_win_yof + msg_win_h), (49, 52, 49), -1)
        myfunction.cv2_putText(img = frame, text = out_text1,
               org = (msg_win_xof + 40, msg_win_yof + 20),
               fontFace = fontPIL, fontScale = 12, color = (255,255,255), mode = 1)
        myfunction.cv2_putText(img = frame, text = out_text2,
               org = (msg_win_xof + 160, msg_win_yof + 60),
               fontFace = fontPIL, fontScale = 12, color = color, mode = 1)
    # --
    # 読み取りテキストの変換
    def text_convert(data):
        my_text_convert = mylib_text.TextConvert(True)
        s = [''] * table_n
        s[0] = my_text_convert.remove_space(data[0])    # 請求書
        s[1] = my_text_convert.get_str2date(data[1])    # 日  付
        s[2] = data[2]                                  # 会社名
        s[3] = data[3]                                  # 案件名
        s[4] = my_text_convert.get_str2mony(data[4])    # 税抜金額
        s[5] = my_text_convert.get_str2mony(data[5])    # 税込金額
        return s
    # --
    # CSV 出力
    def csv_output(row_data):
        encoding = 'utf_8_sig'
        key = 'ファイル名'
        my_csv_treatment = mylib_csv.CSVtreatment(outpath, encoding, False)
        s = my_csv_treatment.read_csv()
        if len(s) == 0:
            data = copy.copy(key_disp)
            data.insert(0, key)
            my_csv_treatment.write_csv(data)

        isSaved = my_csv_treatment.check_csv(key, input_name)
        if not isSaved:
            data = copy.copy(row_data)
            data.insert(0, input_name)
            my_csv_treatment.append_csv(data)

        if logflag:
            print('Already recorded !!')
            print(my_csv_treatment.read_csv())

        return not isSaved
    # --
    # OCR 処理
    def ocr_process():
        for n in range(table_n):
            # 配置情報の読み出し
            ocr_rect, pros, ocr_text = read_area_info(myyaml_map, section_name, key_table[n])
            if pros == None:
                pros = 0
            prepros = int(pros)
            preprocess_mode[2], mod = divmod(prepros, 4)
            preprocess_mode[1], preprocess_mode[0] = divmod(mod, 2)

            if ocr_text == None:
                ocr_text = ''
            text_tmpl[n] = ocr_text

            if ocr_rect == None:
                roi.x = 0
                roi.y = 0
                roi.width = 0
                roi.height = 0
            else:
                # オリジナル座標 → 表示座標
                x0, y0 = imgfr.get_org2res_xy(round(ocr_rect[0] / base_scale_w), round(ocr_rect[1] / base_scale_h))
                x1, y1 = imgfr.get_org2res_xy(round(ocr_rect[2] / base_scale_w), round(ocr_rect[3] / base_scale_h))
                roi.x = x0
                roi.y = y0
                roi.width = x1 - x0
                roi.height = y1 - y0
            rect_save[n] = [roi.x, roi.y, roi.width, roi.height]

            # 得られた表示座標から元画像の位置を計算して画像を切り出す
            if roi.area() > 50:
                x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
                x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
                lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
                ocr_rect = [x0, y0, x1, y1]

                # 前処理
                if preprocess_mode[2] != 0:
                    lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
                if preprocess_mode[1] != 0:
                    lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
                if preprocess_mode[0] != 0:
                    lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)

                # 切り出した領域を OCR
                # PILのイメージにする
                lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
                imgRoi = Image.fromarray(lenaRoi1)

                # txt is a Python string
                text = tool.image_to_string(imgRoi, lang=lang,
                            builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
            
                # テキストの保存
                text_save[n] = str(text)
                if len(text) > 0:
                    msg = 'preprocess: {}  area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])
                    if outf and logflag:
                        print(' ', msg)
                        print(' ', text)
    # --
    # テンプレートの一致を調べる  in; テキストリスト form_tx, read_tx 
    def form_check(form_tx, read_tx):
        my_text_convert = mylib_text.TextConvert(True)
        result = False
        s0 = my_text_convert.remove_space(form_tx[0])
        s1 = my_text_convert.remove_space(read_tx[0])
        if s0 == s1:                                   # 請求書項目が一致
            s = my_text_convert.get_str2date(read_tx[1])
            if len(s) > 0:                             # 日付が正しい
                s = my_text_convert.get_str2mony(read_tx[4])
                if len(s) > 1:                         # 税抜金額が２桁以上
                    s = my_text_convert.get_str2mony(read_tx[5])
                    if len(s) > 1:                     # 税込金額が２桁以上
                        result = True
        return result
    # --

    # 処理ループ
    while (True):
        # 画面上部に画像を配置
        frame[0:lena_frame_h,:] = lena_frame
        frame[lena_frame_h:,:,:] = 200

        if outf:
            msg_color = (240,240, 0)
            out_text1 = 'ファイル名:  < {} >'.format(input_name)
            out_text2 = '処理中...'
            msg_window(out_text1, out_text2, msg_color)

        if csvf:
            msg_color = (240,240, 0)
            out_text1 = 'ファイル名:  < {} > → << {} >>'.format(input_name, output_name)
            out_text2 = 'CSV 出力中...'
            msg_window(out_text1, out_text2, msg_color)
            out_count = out_count + 1

        if csv_outf:
            out_text1 = 'ファイル名:  < {} > → << {} >>'.format(input_name, output_name)
            if out_mode == 1:
                msg_color = (240,240, 0)
                out_text2 = 'CSV 出力完了 !!'
            elif out_mode == 2:
                msg_color = (240,240, 0)
                out_text2 = 'CSV 出力済み !!'
            elif out_mode == 3:
                msg_color = (0,0, 240)
                out_text1 = 'ファイル名:  < {} >'.format(input_name)
                out_text2 = 'フォーム不一致 !'
            else:
                msg_color = (255,255,255)
                out_text2 = '   -------    '
            
            msg_window(out_text1, out_text2, msg_color)

        if not outf:
            for n in range(table_n):
                # 領域表示
                cvui.rect(frame, rect_save[n][0], rect_save[n][1], rect_save[n][2], rect_save[n][3], 0x008000)

                # テキストの描画
                if len(text_save[n]) > 0:
                    myfunction.cv2_putText(img = frame,
                               text = text_save[n],
                               org = (rect_save[n][0], rect_save[n][1]),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (0,160,0),
                               mode = 0)
                if len(text_cnvt[n]) > 0:
                    myfunction.cv2_putText(img = frame,
                               text = text_cnvt[n],
                               org = (rect_save[n][0], rect_save[n][1] - 14),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = (0,0,160),
                               mode = 0)

        # ステータスライン - 作業ボタン
        if mltflg:
            if cvui.button(frame, btn_x + btn_w + 10, btn_y, "&Next"):
                key = ord('n')
                break

        if cvui.button(frame, btn_x + (btn_w + 10) * 4, btn_y, "&Quit"):
            key = ord('q')
            break

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画面の表示
        cvui.update()
        cv2.imshow(WINDOW_NAME, frame)
        if wlock1 < 10:
            cv2.moveWindow(WINDOW_NAME, 80, 0)
            wlock1 = wlock1 + 1
        else:
            wlock1 = 10

        # キー入力
        key = cv2.waitKey(50)
        if key == 27 or key == 113:                     # 'esc' or 'q'
            break

        if not mylib_gui._is_visible(title):            # 'Close' button
            break

        # 登録セクションの処理
        if outf and not csv_outf:
            ocr_process()
            ret = form_check(text_tmpl, text_save)
            if ret:
                text_cnvt = text_convert(text_save)
                outf = False
                csvf = True
                csv_outf = False
            else:
                out_mode = 3
                outf = True
                csvf = False
                csv_outf = True
            out_count = 0

        # CSV 出力処理
        if csvf:
            if out_count > 1:
                ret = csv_output(text_cnvt)
                out_mode = 1 if ret else 2
                csvf = False
                csv_outf = True

    cv2.destroyAllWindows()
    if logflag:
        print(' -----------\n')

    return key

# ** main関数 **
def main():
    loop_flg = True
    
    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    filename = ARGS.image
    lang = ARGS.language
    layout = int(ARGS.layout)
    titleflg = ARGS.title
    logflg = ARGS.log
    logflag = True if logflg == 'y' else False 
    mltflg = ARGS.mlt
    mltflag = True if mltflg == 'y' else False 
    outpath = ARGS.out

    # 情報表示
    display_info(filename, lang, layout, titleflg, logflg, mltflg, outpath)
    while(loop_flg):
        if logflag:
            print('\n file: <{}>'.format(filename))

        # OpenCV でイメージを読む
        frame = cv2.imread(filename)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 帳票 OCR・メイン処理
        ret = image_ocr_process(frame, filename, outpath, lang, layout, titleflg, logflag, mltflag)
        if ret == 27 or ret == 113 or ret == -1:
            loop_flg = False

        # 画像ファイルの選択
        if loop_flg:
            filename = filedialog.askopenfilename(
                    title = "画像ファイルを開く",
                    filetypes = [("Image file", ".bmp .png .jpg .tif"),
                                 ("Bitmap", ".bmp"),
                                 ("PNG", ".png"),
                                 ("JPEG", ".jpg")],     # ファイルフィルタ
                    initialdir = "./"                   # 自分自身のディレクトリ
                    )
            if len(filename) == 0:
                break

    if logflag:
        print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

→ 以降「OCR アプリケーションを作る 4」へ続く

↑

更新履歴 †

2022/01/16 初版
2022/01/18 CSV 追加
2022/01/19 Step 6 追加

↑

参考資料 †

文字列の検証

CSV ファイル

その他

最新の20件

OCR アプリケーション基礎編 3 †

OCR プログラムの開発過程 3 †

OCR 文字列を検証する †

CSV ファイルを扱う †

Step 6 OCR から得られた各項目の妥当性の検査と CSV 出力 †

更新履歴 †

参考資料 †