# -*- coding: utf-8 -*-
##------------------------------------------
## TryOCR Test Programe Step-6 Ver 0.02
## with tesseract & PyOCR & cvui
## platform: linux / windows
##
## 2022.01.15 Masahiro Izutsu
##------------------------------------------
## tryocr_step6.py
## 2022.01.17 ver 0.01 エラー処理追加
## 2022.01.19 ver 0.02 CSV出力
## 配置情報ファイルを使って処理する
## OCR 結果の項目を検査する
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
DEF_INPUT_FILE = expanduser('images/sample1.png')
DEF_CSV_PATH = expanduser('tryocr.csv')
CONFIG_FILE = expanduser('tryocr.yaml')
MAPING_FILE = expanduser('tryocr_templ.yaml')
# import処理
from PIL import Image
import sys
import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import mylib_text
import mylib_csv
import copy
import os
# タイトル・バージョン情報
title = 'TryOCR Test Program Step-6 Ver 0.02'
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)
# Parses arguments for the application
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = DEF_INPUT_FILE,
help = 'Absolute path to image file. Default value is \'' + DEF_INPUT_FILE + '\'')
parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
default = 'jpn',
help = 'Language. Default value is \'jpn\'')
parser.add_argument('--layout', metavar = 'LAYOUT',
default = 6,
help = 'Tesseract layout Default value is 6')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'n',
help = 'Program title flag.(y/n) Default value is \'n\'')
parser.add_argument('--log', metavar = 'LOG',
default = 'y',
help = 'Log flag.(y/n) Default value is \'y\'')
parser.add_argument('--mlt', metavar = 'MULTI',
default = 'y',
help = 'Multi flag.(y/n) Default value is \'y\'')
parser.add_argument('-o', '--out', metavar = 'CSV_OUT',
default = DEF_CSV_PATH,
help = 'CSV file path. Default value is '+DEF_CSV_PATH)
return parser
# モデル基本情報の表示
def display_info(image, lang, layout, titleflg, logflg, mltflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'Language : ' + NOCOLOR, lang)
print(' - ' + YELLOW + 'Layout : ' + NOCOLOR, layout)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Log flag : ' + NOCOLOR, logflg)
print(' - ' + YELLOW + 'Multi flag : ' + NOCOLOR, mltflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 基本情報を読む
def read_base_info1(myyaml, section):
path = myyaml.load_section_key(section, '.FileName')
return path
def read_base_info2(myyaml, section):
base_cord = myyaml.load_section_key(section, '.BasicCoordinates')
return base_cord
# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
return locat, prepros, text
# 登録セクション名(ファイル名) を読む out: キー・リスト
def read_section_name(myyaml):
templ_lst = myyaml.load_section('Template')
return templ_lst
# 帳票 OCR・メイン処理
def image_ocr_process(lena_frame_org, filename, outpath, lang='jpn', layout=6, titleflg=False, logflag=False, mltflg=True):
input_name = os.path.basename(filename)
output_name = os.path.basename(outpath)
WINDOW_NAME = title
preprocess_mode = [0, 0, 0]
set_mode = 0
wlock1 = 0
# 配置情報読み出し・設定処理
conf_file = CONFIG_FILE
map_file = MAPING_FILE
myyaml_cnf = mylib_yaml.YamlProcess(conf_file, True)
myyaml_map = mylib_yaml.YamlProcess(map_file, True)
key_table = myyaml_cnf.load_section('KeyTable')
key_disp = myyaml_cnf.load_section('KeyDisp')
load_mode = [0] * len(key_table)
status_h = 40 # ステータスラインの高さ
# 日本語フォント指定
if platform.system()=='Windows':
fontPIL = 'meiryo.ttc' # メイリオ
else:
fontPIL = 'NotoSansCJK-Bold.ttc' # ゴシック体
# ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
maxsize = monitor_height - 64 - 100
# 画像の前処理
imgpros = mylib_preprocess.ImagePreprocess(False) # 初期化
# OCR
tools = pyocr.get_available_tools()
if len(tools) == 0:
print(RED + "\nOCR tool Not found." + NOCOLOR)
quit()
tool = tools[0]
# mylib_frame ライブラリ
imgfr = mylib_frame.ImageFrame(lena_frame_org) # 初期化
imgfr.set_screen_size(monitor_width, monitor_height)
lena_frame = imgfr.frame_resize(maxsize)
lena_frame_h, lena_frame_w = lena_frame.shape[:2]
# 画面ステータス領域 (画面下部 status_h pixel)の確保
frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
frame[:,:,:] = 200
btn_x = 90
btn_y = lena_frame_h + 4
btn_w = 70
btn_h = 32
popup_frame = np.zeros((120, 500, 3), np.uint8)
anchor = cvui.Point()
roi = cvui.Rect(0, 0, 0, 0)
frame_h, frame_w = frame.shape[:2]
outf = True
csvf = False
csv_outf = False
out_mode = 0
out_count = 0
org_h, org_w = imgfr.get_original_size()
scale_h, scale_w = imgfr.get_scale()
# テンプレート関連設定
template_tbl = read_section_name(myyaml_cnf)
section_name = template_tbl[0] # !!! この版では最初のエントリを使う
base_cord = read_base_info2(myyaml_map, section_name)
base_scale_w = base_cord[0] / org_w
base_scale_h = base_cord[1] / org_h
table_n = len(key_table)
if logflag:
print('\n original h x w : {:=5} x {:=5}'.format(org_h, org_w))
print(' display h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
print(' scale h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
print(' tp-size h x w : {:=5} x {:=5}'.format(base_cord[1], base_cord[0]))
print(' tp-scale h x w : {:.3f} x {:.3f}'.format(base_scale_h, base_scale_w))
print(' -----------')
# Init cvui and tell it to create a OpenCV window, i.e. cv.namedWindow(WINDOW_NAME).
cv2.namedWindow(WINDOW_NAME, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL)
cvui.init(WINDOW_NAME)
# OCR 項目設定データの保存場所
text_tmpl = [''] * table_n
text_save = [''] * table_n
text_cnvt = [''] * table_n
rect_save = [0] * table_n
csv_savef = False
# メッセージ・ウインドウ
def msg_window(out_text1, out_text2, color = (240,240, 0)):
msg_win_h = 100
msg_win_w = 400
msg_win_xof = 0 if lena_frame_w < msg_win_w else int((lena_frame_w - msg_win_w) / 2)
msg_win_yof = 0 if lena_frame_h < msg_win_h else int((lena_frame_h - msg_win_h) / 2)
msg_bg_color = (49, 52, 49)
cv2.rectangle(frame, (msg_win_xof, msg_win_yof), (msg_win_xof + msg_win_w, msg_win_yof + msg_win_h), (49, 52, 49), -1)
myfunction.cv2_putText(img = frame, text = out_text1,
org = (msg_win_xof + 40, msg_win_yof + 20),
fontFace = fontPIL, fontScale = 12, color = (255,255,255), mode = 1)
myfunction.cv2_putText(img = frame, text = out_text2,
org = (msg_win_xof + 160, msg_win_yof + 60),
fontFace = fontPIL, fontScale = 12, color = color, mode = 1)
# --
# 読み取りテキストの変換
def text_convert(data):
my_text_convert = mylib_text.TextConvert(True)
s = [''] * table_n
s[0] = my_text_convert.remove_space(data[0]) # 請求書
s[1] = my_text_convert.get_str2date(data[1]) # 日 付
s[2] = data[2] # 会社名
s[3] = data[3] # 案件名
s[4] = my_text_convert.get_str2mony(data[4]) # 税抜金額
s[5] = my_text_convert.get_str2mony(data[5]) # 税込金額
return s
# --
# CSV 出力
def csv_output(row_data):
encoding = 'utf_8_sig'
key = 'ファイル名'
my_csv_treatment = mylib_csv.CSVtreatment(outpath, encoding, False)
s = my_csv_treatment.read_csv()
if len(s) == 0:
data = copy.copy(key_disp)
data.insert(0, key)
my_csv_treatment.write_csv(data)
isSaved = my_csv_treatment.check_csv(key, input_name)
if not isSaved:
data = copy.copy(row_data)
data.insert(0, input_name)
my_csv_treatment.append_csv(data)
if logflag:
print('Already recorded !!')
print(my_csv_treatment.read_csv())
return not isSaved
# --
# OCR 処理
def ocr_process():
for n in range(table_n):
# 配置情報の読み出し
ocr_rect, pros, ocr_text = read_area_info(myyaml_map, section_name, key_table[n])
if pros == None:
pros = 0
prepros = int(pros)
preprocess_mode[2], mod = divmod(prepros, 4)
preprocess_mode[1], preprocess_mode[0] = divmod(mod, 2)
if ocr_text == None:
ocr_text = ''
text_tmpl[n] = ocr_text
if ocr_rect == None:
roi.x = 0
roi.y = 0
roi.width = 0
roi.height = 0
else:
# オリジナル座標 → 表示座標
x0, y0 = imgfr.get_org2res_xy(round(ocr_rect[0] / base_scale_w), round(ocr_rect[1] / base_scale_h))
x1, y1 = imgfr.get_org2res_xy(round(ocr_rect[2] / base_scale_w), round(ocr_rect[3] / base_scale_h))
roi.x = x0
roi.y = y0
roi.width = x1 - x0
roi.height = y1 - y0
rect_save[n] = [roi.x, roi.y, roi.width, roi.height]
# 得られた表示座標から元画像の位置を計算して画像を切り出す
if roi.area() > 50:
x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
ocr_rect = [x0, y0, x1, y1]
# 前処理
if preprocess_mode[2] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
if preprocess_mode[1] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
if preprocess_mode[0] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)
# 切り出した領域を OCR
# PILのイメージにする
lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
imgRoi = Image.fromarray(lenaRoi1)
# txt is a Python string
text = tool.image_to_string(imgRoi, lang=lang,
builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
# テキストの保存
text_save[n] = str(text)
if len(text) > 0:
msg = 'preprocess: {} area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])
if outf and logflag:
print(' ', msg)
print(' ', text)
# --
# テンプレートの一致を調べる in; テキストリスト form_tx, read_tx
def form_check(form_tx, read_tx):
my_text_convert = mylib_text.TextConvert(True)
result = False
s0 = my_text_convert.remove_space(form_tx[0])
s1 = my_text_convert.remove_space(read_tx[0])
if s0 == s1: # 請求書項目が一致
s = my_text_convert.get_str2date(read_tx[1])
if len(s) > 0: # 日付が正しい
s = my_text_convert.get_str2mony(read_tx[4])
if len(s) > 1: # 税抜金額が2桁以上
s = my_text_convert.get_str2mony(read_tx[5])
if len(s) > 1: # 税込金額が2桁以上
result = True
return result
# --
# 処理ループ
while (True):
# 画面上部に画像を配置
frame[0:lena_frame_h,:] = lena_frame
frame[lena_frame_h:,:,:] = 200
if outf:
msg_color = (240,240, 0)
out_text1 = 'ファイル名: < {} >'.format(input_name)
out_text2 = '処理中...'
msg_window(out_text1, out_text2, msg_color)
if csvf:
msg_color = (240,240, 0)
out_text1 = 'ファイル名: < {} > → << {} >>'.format(input_name, output_name)
out_text2 = 'CSV 出力中...'
msg_window(out_text1, out_text2, msg_color)
out_count = out_count + 1
if csv_outf:
out_text1 = 'ファイル名: < {} > → << {} >>'.format(input_name, output_name)
if out_mode == 1:
msg_color = (240,240, 0)
out_text2 = 'CSV 出力完了 !!'
elif out_mode == 2:
msg_color = (240,240, 0)
out_text2 = 'CSV 出力済み !!'
elif out_mode == 3:
msg_color = (0,0, 240)
out_text1 = 'ファイル名: < {} >'.format(input_name)
out_text2 = 'フォーム不一致 !'
else:
msg_color = (255,255,255)
out_text2 = ' ------- '
msg_window(out_text1, out_text2, msg_color)
if not outf:
for n in range(table_n):
# 領域表示
cvui.rect(frame, rect_save[n][0], rect_save[n][1], rect_save[n][2], rect_save[n][3], 0x008000)
# テキストの描画
if len(text_save[n]) > 0:
myfunction.cv2_putText(img = frame,
text = text_save[n],
org = (rect_save[n][0], rect_save[n][1]),
fontFace = fontPIL,
fontScale = 12,
color = (0,160,0),
mode = 0)
if len(text_cnvt[n]) > 0:
myfunction.cv2_putText(img = frame,
text = text_cnvt[n],
org = (rect_save[n][0], rect_save[n][1] - 14),
fontFace = fontPIL,
fontScale = 12,
color = (0,0,160),
mode = 0)
# ステータスライン - 作業ボタン
if mltflg:
if cvui.button(frame, btn_x + btn_w + 10, btn_y, "&Next"):
key = ord('n')
break
if cvui.button(frame, btn_x + (btn_w + 10) * 4, btn_y, "&Quit"):
key = ord('q')
break
# タイトル描画
if (titleflg == 'y'):
cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=(200, 200, 0), lineType=cv2.LINE_AA)
# 画面の表示
cvui.update()
cv2.imshow(WINDOW_NAME, frame)
if wlock1 < 10:
cv2.moveWindow(WINDOW_NAME, 80, 0)
wlock1 = wlock1 + 1
else:
wlock1 = 10
# キー入力
key = cv2.waitKey(50)
if key == 27 or key == 113: # 'esc' or 'q'
break
if not mylib_gui._is_visible(title): # 'Close' button
break
# 登録セクションの処理
if outf and not csv_outf:
ocr_process()
ret = form_check(text_tmpl, text_save)
if ret:
text_cnvt = text_convert(text_save)
outf = False
csvf = True
csv_outf = False
else:
out_mode = 3
outf = True
csvf = False
csv_outf = True
out_count = 0
# CSV 出力処理
if csvf:
if out_count > 1:
ret = csv_output(text_cnvt)
out_mode = 1 if ret else 2
csvf = False
csv_outf = True
cv2.destroyAllWindows()
if logflag:
print(' -----------\n')
return key
# ** main関数 **
def main():
loop_flg = True
# Argument parsing and parameter setting
ARGS = parse_args().parse_args()
filename = ARGS.image
lang = ARGS.language
layout = int(ARGS.layout)
titleflg = ARGS.title
logflg = ARGS.log
logflag = True if logflg == 'y' else False
mltflg = ARGS.mlt
mltflag = True if mltflg == 'y' else False
outpath = ARGS.out
# 情報表示
display_info(filename, lang, layout, titleflg, logflg, mltflg, outpath)
while(loop_flg):
if logflag:
print('\n file: <{}>'.format(filename))
# OpenCV でイメージを読む
frame = cv2.imread(filename)
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# 帳票 OCR・メイン処理
ret = image_ocr_process(frame, filename, outpath, lang, layout, titleflg, logflag, mltflag)
if ret == 27 or ret == 113 or ret == -1:
loop_flg = False
# 画像ファイルの選択
if loop_flg:
filename = filedialog.askopenfilename(
title = "画像ファイルを開く",
filetypes = [("Image file", ".bmp .png .jpg .tif"),
("Bitmap", ".bmp"),
("PNG", ".png"),
("JPEG", ".jpg")], # ファイルフィルタ
initialdir = "./" # 自分自身のディレクトリ
)
if len(filename) == 0:
break
if logflag:
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
sys.exit(main())