# -*- coding: utf-8 -*-
##------------------------------------------
## FormOCR Program Ver 0.01
## with tesseract & PyOCR & cvui
## platform: linux / windows
##
## 2022.04.13 Masahiro Izutsu
##------------------------------------------
## formocr.py (tryocr_go.py ver 0.03)
title = 'FormOCR Program Ver 0.01'
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
DEF_CSV_PATH = expanduser('formocr.csv')
CONFIG_FILE = expanduser('formocr.yaml')
MAPING_FILE = expanduser('formocr_templ.yaml')
# import処理
from PIL import Image
import sys
import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import mylib_text
import mylib_csv
import copy
import os
import tkinter
from tkinter import messagebox
import datetime
import mylib_file
import mylib_tilt
import formocr_edit
# 入力パラメータの定義
def parse_args():
# タイトル・バージョン情報
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)
parser = argparse.ArgumentParser()
parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
default = 'jpn',
help = 'Language. Default value is \'jpn\'')
parser.add_argument('--layout', metavar = 'LAYOUT',
default = 6,
help = 'Tesseract layout Default value is 6')
parser.add_argument('--log', metavar = 'LOG',
default = 'n',
help = 'Log flag.(y/n) Default value is \'n\'')
parser.add_argument('-o', '--out', metavar = 'CSV_OUT',
default = DEF_CSV_PATH,
help = 'CSV file path. Default value is '+DEF_CSV_PATH)
parser.add_argument('--mlt', metavar = 'MULTI',
default = 'y',
help = 'Multi flag.(y/n) Default value is \'y\'')
parser.add_argument('--tilt', metavar = 'TILT',
default = 'n',
help = 'Tilt Check flag.(y/n) Default value is \'n\'')
return parser
# 入力パラメータの表示
def display_info(lang, layout, logflg, outpath, mltflg, tltflg):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Language : ' + NOCOLOR, lang)
print(' - ' + YELLOW + 'Layout : ' + NOCOLOR, layout)
print(' - ' + YELLOW + 'Log flag : ' + NOCOLOR, logflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
print(' - ' + YELLOW + 'Multi flag : ' + NOCOLOR, mltflg)
print(' - ' + YELLOW + 'Tilt chk flag: ' + NOCOLOR, tltflg)
print('')
# 基本情報を読む
def read_base_info1(myyaml, section):
path = myyaml.load_section_key(section, '.FileName')
return path
def read_base_info2(myyaml, section):
base_cord = myyaml.load_section_key(section, '.BasicCoordinates')
return base_cord
# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
offset = myyaml.load_section_sub_key(section, sub_sec, 'Offset')
return locat, prepros, text, offset
# 登録セクション名(ファイル名) を読む out: キー・リスト
def read_section_name(myyaml):
templ_lst = myyaml.load_section('Template')
return templ_lst
# OpenCV イメージファイル読む (日本語ファイル名対応)
def imread(filename):
if platform.system()=='Windows':
tmp_dir = os.getcwd()
# 1. 対象ファイルがあるディレクトリに移動
if len(filename.split("/")) > 1:
file_dir = "/".join(filename.split("/")[:-1])
os.chdir(file_dir)
# 2. 対象ファイルの名前を変更
tmp_name = "tmp_name"
os.rename(filename.split("/")[-1], tmp_name)
# 3. 対象ファイルを読み取る
img = cv2.imread(tmp_name)
# 4. 対象ファイルの名前を戻す
os.rename(tmp_name, filename.split("/")[-1])
# カレントディレクトリをもとに戻す
os.chdir(tmp_dir)
else:
img = cv2.imread(filename)
return img
# 帳票 OCR・メイン処理
def image_ocr_process(filename, outpath, lang='jpn', layout=6, logflag=False, regflag = True):
global section_name, key_table, input_name, key_disp
# -- 読み取りテキストの変換
def text_convert(data):
my_text_convert = mylib_text.TextConvert(False)
s = [''] * table_n
s[0] = my_text_convert.replace_key_strings(data[0]) # 請求書
s[1] = my_text_convert.get_str2date(data[1]) # 日 付
s[2] = my_text_convert.replace_company_strings(data[2]) # 会社名
s[3] = my_text_convert.replace_Title_strings(data[3]) # 案件名
s[4] = my_text_convert.get_str2mony(data[4]) # 税抜金額
s[5] = my_text_convert.get_str2mony(data[5]) # 税込金額
return s
# -- CSV 出力
def csv_output(row_data):
global section_name, key_table, input_name, key_disp
encoding = 'utf_8_sig'
key = 'ファイル名'
form ='適用フォーム'
my_csv_treatment = mylib_csv.CSVtreatment(outpath, encoding, False)
s = my_csv_treatment.read_csv()
if len(s) == 0:
data = copy.copy(key_disp)
data.insert(0, key)
data.append(form)
my_csv_treatment.write_csv(data)
isSaved = my_csv_treatment.check_csv(key, input_name)
if not isSaved:
data = copy.copy(row_data)
data.insert(0, input_name)
my_csv_treatment.append_csv(data)
if logflag:
print(' Output Data: ', row_data)
return not isSaved
# -- OCR 処理
def ocr_process():
global section_name, key_table, input_name, key_disp
my_text_convert = mylib_text.TextConvert(False)
sr = ' Check → '
result = False
form_ofsx = 0
form_ofsy = 0
for n in range(table_n):
# 配置情報の読み出し
ocr_rect, pros, ocr_text, offset = read_area_info(myyaml_map, section_name, key_table[n])
if offset == None:
ofsx = 0
ofsy = 0
else:
ofsx = offset[0]
ofsy = offset[1]
if pros == None:
pros = 0
prepros = int(pros)
preprocess_mode[3], mod = divmod(prepros, 8)
preprocess_mode[2], mod = divmod(mod, 4)
preprocess_mode[1], preprocess_mode[0] = divmod(mod, 2)
if ocr_text == None:
break
text_tmpl[n] = ocr_text
if ocr_rect == None:
break
else:
# オフセット補正
ocr_rect[0] = ocr_rect[0] + form_ofsx
ocr_rect[1] = ocr_rect[1] + form_ofsy
ocr_rect[2] = ocr_rect[2] + form_ofsx
ocr_rect[3] = ocr_rect[3] + form_ofsy
# オリジナル座標 → 表示座標
x0, y0 = imgfr.get_org2res_xy(round(ocr_rect[0] / base_scale_w), round(ocr_rect[1] / base_scale_h))
x1, y1 = imgfr.get_org2res_xy(round(ocr_rect[2] / base_scale_w), round(ocr_rect[3] / base_scale_h))
roi.x = x0
roi.y = y0
roi.width = x1 - x0
roi.height = y1 - y0
if roi.area() > 50:
# 得られた表示座標から元画像の位置を計算して画像を切り出す
#x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
#x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
# フォームの座標を元座標の座標系に変換して画像を切り出す
x0 = round(ocr_rect[0] / base_scale_w)
y0 = round(ocr_rect[1] / base_scale_h)
x1 = round(ocr_rect[2] / base_scale_w)
y1 = round(ocr_rect[3] / base_scale_h)
lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
ocr_rect = [x0, y0, x1, y1]
text = ''
if lenaRoi.shape[0] > 0 and lenaRoi.shape[1] > 0:
# 前処理
if preprocess_mode[3] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)
if preprocess_mode[2] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
if preprocess_mode[1] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
if preprocess_mode[0] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 5)
# 切り出した領域を OCR
# PILのイメージにする
lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
imgRoi = Image.fromarray(lenaRoi1)
text, x0, y0, x1, y1 = image2string(imgRoi, n, lang, layout)
# テキストの保存
text_save[n] = str(text)
if len(text) > 0:
if n == 0: # 請求書
# フォーム位置ずれ計算
form_ofsx = 0 if ofsx == 0 and ofsy == 0 else round(x0 * base_scale_w) - ofsx
form_ofsy = 0 if ofsx == 0 and ofsy == 0 else round(y0 * base_scale_h) - ofsy
tmpl = my_text_convert.replace_key_strings(text_tmpl[n])
text = my_text_convert.replace_key_strings(text_save[n])
if logflag:
print(YELLOW + ' * tmpl text:', text_tmpl[n], '→', tmpl)
print(' * ocr text:', text_save[n], '→', text, NOCOLOR)
if text == tmpl:
sr = sr + GREEN + ' {}:Yes'.format(n)
else:
sr = sr + YELLOW + ' {}:No'.format(n)
break
if n == 1: # 日 付
text = my_text_convert.get_str2date(text_save[n])
if logflag:
print(YELLOW + ' * ocr text:', text_save[n], '→', text, NOCOLOR)
if len(text) > 0:
sr = sr + GREEN + ' {}:Yes'.format(n)
else:
sr = sr + YELLOW + ' {}:No'.format(n)
break
if n == 2: # 会社名
text = my_text_convert.replace_company_strings(text_save[n])
if logflag:
print(YELLOW + ' * ocr text:', text_save[n], '→', text, NOCOLOR)
sr = sr + GREEN + ' {}:Yes'.format(n)
if n == 3: # 案件名
text = my_text_convert.replace_Title_strings(text_save[n])
if logflag:
print(YELLOW + ' * ocr text:', text_save[n], '→', text, NOCOLOR)
sr = sr + GREEN + ' {}:Yes'.format(n)
if n == 4: # 税抜金額
text = my_text_convert.get_str2mony(text_save[n])
if logflag:
print(YELLOW + ' * ocr text:', text_save[n], '→', text, NOCOLOR)
if len(text) > 1: # 2桁以上
sr = sr + GREEN + ' {}:Yes'.format(n)
else:
sr = sr + YELLOW + ' {}:No'.format(n)
break
if n == 5: # 税込金額
text0 = my_text_convert.get_str2mony(text_save[n - 1])
text = my_text_convert.get_str2mony(text_save[n])
if logflag:
print(YELLOW + ' * ocr text:', text_save[n], '→', text, NOCOLOR)
print(YELLOW + ' * total mony:', text_save[n - 1], '→', text0)
print(' * b4tax:', text_save[n], '→', text, NOCOLOR)
if len(text) > 1: # 2桁以上
mony0 = int(text0)
mony1 = int(text)
if mony1 > mony0: # 税込金額のほうが大きい
sr = sr + GREEN + ' {}:Yes all match'.format(n)
result = True
else:
sr = sr + YELLOW + ' {}:No*'.format(n)
break
else:
sr = sr + YELLOW + ' {}:No'.format(n)
break
if logflag:
if n == 0:
print(' text ofset : ', x0, y0)
print(' form value : ', ofsx, ofsy)
print(' base scale : {:.3f} x {:.3f}'.format(base_scale_w, base_scale_h))
print(' form Ofset : ', form_ofsx, form_ofsy)
msg = 'preprocess: {} area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])
print(' ', msg)
print(' ', text)
else:
break
else:
break
if result:
break
if logflag:
print(sr + NOCOLOR)
return result
# -- 画像データから OCR テキスト
def image2string(img, mode, lang, layout):
text = ''
x0 = 0
y0 = 0
x1 = 0
y1 = 0
if mode == 0:
line_and_word_boxes = tool.image_to_string(img, lang=lang,
builder=pyocr.builders.LineBoxBuilder(tesseract_layout=layout))
for lw_box in line_and_word_boxes:
text = lw_box.content # 1行前提で処理
position = lw_box.position
x0 = position[0][0]
y0 = position[0][1]
x1 = position[1][0]
y1 = position[1][1]
break
else:
lng = 'eng' if mode == 4 or mode == 5 else lang # 金額は英語で
text = tool.image_to_string(img, lang=lng,
builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
return text, x0, y0, x1, y1
# -- テンプレートの一致を調べる in; テキストリスト form_tx, read_tx
def form_check(form_tx, read_tx):
cnv_form_tx = text_convert(form_tx)
cnv_read_tx = text_convert(read_tx)
result = False
if cnv_form_tx[0] == cnv_read_tx[0]: # 請求書項目が一致
if len(cnv_read_tx[1]) > 0: # 日付が正しい
if len(cnv_read_tx[4]) > 1: # 税抜金額が2桁以上
if len(cnv_read_tx[5]) > 1: # 税込金額が2桁以上
mony0 = int(cnv_read_tx[4])
mony1 = int(cnv_read_tx[5])
if mony1 > mony0: # 税込金額のほうが大きい
result = True
return result
# --
if logflag:
print('\n file: <{}>'.format(filename))
# OpenCV でイメージを読む
lena_frame_org = imread(filename)
key = -1
if lena_frame_org is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
return key
input_name = os.path.basename(filename)
output_name = os.path.basename(outpath)
WINDOW_NAME = title
print(' << {} >> '.format(input_name))
preprocess_mode = [0, 0, 0, 0]
set_mode = 0
wlock1 = 0
# 配置情報読み出し・設定処理
conf_file = CONFIG_FILE
map_file = MAPING_FILE
myyaml_cnf = mylib_yaml.YamlProcess(conf_file, False)
myyaml_map = mylib_yaml.YamlProcess(map_file, False)
key_table = myyaml_cnf.load_section('KeyTable')
key_disp = myyaml_cnf.load_section('KeyDisp')
load_mode = [0] * len(key_table)
status_h = 40 # ステータスラインの高さ
# 日本語フォント指定
if platform.system()=='Windows':
fontPIL = 'meiryo.ttc' # メイリオ
else:
fontPIL = 'NotoSansCJK-Bold.ttc' # ゴシック体
# ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
maxsize = monitor_height - 64 - 100
# 画像の前処理
imgpros = mylib_preprocess.ImagePreprocess(False) # 初期化
# OCR
tools = pyocr.get_available_tools()
if len(tools) == 0:
print(RED + "\nOCR tool Not found." + NOCOLOR)
quit()
tool = tools[0]
# mylib_frame ライブラリ
imgfr = mylib_frame.ImageFrame(lena_frame_org) # 初期化
imgfr.set_screen_size(monitor_width, monitor_height)
lena_frame = imgfr.frame_resize(maxsize)
lena_frame_h, lena_frame_w = lena_frame.shape[:2]
# 画面ステータス領域 (画面下部 status_h pixel)の確保
frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
frame[:,:,:] = 200
btn_x = 90
btn_y = lena_frame_h + 4
btn_w = 70
btn_h = 32
popup_frame = np.zeros((120, 500, 3), np.uint8)
anchor = cvui.Point()
roi = cvui.Rect(0, 0, 0, 0)
frame_h, frame_w = frame.shape[:2]
csvf = False
out_mode = 0
out_count = 0
org_h, org_w = imgfr.get_original_size()
scale_h, scale_w = imgfr.get_scale()
# 登録テンプレートを検索
template_tbl = read_section_name(myyaml_cnf)
ret = False
if template_tbl != None:
if input_name in template_tbl: # 登録があればテーブルの最後に
template_tbl.remove(input_name)
template_tbl.append(input_name)
template_tbl.reverse() # 最新の登録から検索
for section_name in template_tbl:
base_cord = read_base_info2(myyaml_map, section_name)
if base_cord != None:
base_scale_w = base_cord[0] / org_w
base_scale_h = base_cord[1] / org_h
table_n = len(key_table)
if logflag:
print('\n Check Form : < {} >'.format(section_name))
print(' original h x w : {:=5} x {:=5}'.format(org_h, org_w))
print(' display h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
print(' scale h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
print(' tp-size h x w : {:=5} x {:=5}'.format(base_cord[1], base_cord[0]))
print(' tp-scale h x w : {:.3f} x {:.3f}'.format(base_scale_h, base_scale_w))
print(' -----------')
# OCR 項目設定データの保存場所
text_tmpl = [''] * table_n
text_save = [''] * table_n
text_cnvt = [''] * table_n
csv_savef = False
ret = ocr_process()
if logflag:
if ret:
print('\n Template Form: ', section_name)
else:
print(YELLOW + ' ** Form mismatch !! **' + NOCOLOR +' < {} >'.format(section_name))
if ret:
break
template_tbl.reverse() # 順序をもとに戻す
my_file = mylib_file.FileTreatment(False)
key = ord('q')
if ret: # フォームが一致
ss = '*' if input_name == section_name else ''
print(GREEN + ' matching form: {} {}'.format(section_name, ss) + NOCOLOR)
text_cnvt = text_convert(text_save)
text_cnvt.append(section_name)
if logflag:
print(' Outout file: ', output_name)
ret = csv_output(text_cnvt)
if not ret:
print(GREEN + ' ** Already recorded !! **' + NOCOLOR)
# -err フォルダ内のファイルの場合もとに戻す
if my_file.ckeck_erfolder(filename):
my_file.move_file(filename)
# 一致したフォームをテーブルの最後に置き直す
template_tbl.remove(section_name)
template_tbl.append(section_name)
myyaml_cnf.write_append('Template', template_tbl)
key = ord('n')
else: # 一致するフォームがない
print(RED + ' -- No matching form ! -- << {} >> '.format(input_name) + NOCOLOR)
is_errfol = my_file.ckeck_erfolder(filename)
ret = False
if is_errfol or not regflag:
msg = '< {} >\n\n登録されていません. 編集しますか?'.format(input_name)
ret = messagebox.askyesno('確認', msg)
if ret:
key = formocr_edit.image_annotation(filename, lang, layout, logflag, False)
else:
# -err フォルダに移動する
if not is_errfol:
my_file.move_file(filename)
key = ord('n')
return key
# ** main関数 **
def main():
loop_flg = True
# Tk オブジェクトのインスタンス作成
root = tkinter.Tk()
# Tk のウィンドウを非表示にする
root.withdraw()
# 入力パラメータの処理
ARGS = parse_args().parse_args()
lang = ARGS.language
layout = int(ARGS.layout)
logflg = ARGS.log
logflag = True if logflg == 'y' else False
outpath = ARGS.out
mltflg = ARGS.mlt
mltflag = True if mltflg == 'y' else False
tltflg = ARGS.tilt
tltflag = True if tltflg == 'y' else False
# 情報表示
display_info(lang, layout, logflg, outpath, mltflg, tltflg)
# 登録テンプレートを検索
myyaml_cnf = mylib_yaml.YamlProcess(CONFIG_FILE, False)
template_tbl = read_section_name(myyaml_cnf)
regflag = False if template_tbl == None else True
if mltflag:
# 指定フォルダ内の画像ファイルを一括処理する
dirname = filedialog.askdirectory(initialdir = './', title = '対象ディレクトリを選択', mustexist = True)
if len(dirname) > 0:
if tltflag:
t = datetime.datetime.now()
print(' >> tilt check >>', t.time().strftime('%X'))
mylib_tilt.tilt_image_fol(dirname, 100, 200, 20, 'non', logflag, True)
t = datetime.datetime.now()
print('\n >> OCR process >>', t.time().strftime('%X'))
my_file = mylib_file.FileTreatment(False)
img_ext = {'.bmp', '.png', '.jpeg', '.jpg', '.tif'}
imglist = my_file.get_file_list_sel(dirname + '/*', img_ext)
for filename in imglist:
retryf = True
while (retryf):
ret = image_ocr_process(filename, outpath, lang, layout, logflag, regflag)
if ret == 27 or ret == 113 or ret == -1:
retryf = False
break
elif ret == ord('s'):
print(' ** Run again !! **')
retryf = True
else:
retryf = False
if ret == 27 or ret == 113:
break
else:
# 指定画像ファイルの処理
filename = filedialog.askopenfilename(title = '画像ファイルを開く', filetypes = [("Image file", ".bmp .png .jpeg .jpg .tif"), ("Bitmap", ".bmp"), ("PNG", ".png"), ("JPEG", ".jpeg"), ("JPG", ".jpg")], initialdir = './')
if len(filename) > 0:
if tltflag:
print(' >> tilt check >>')
mylib_tilt.tilt_image(filename, 100, 200, 20, 'non', logflag, True)
print('\n >> OCR process >>')
retryf = True
while (retryf):
ret = image_ocr_process(filename, outpath, lang, layout, logflag, regflag)
if ret == 27 or ret == 113 or ret == -1:
retryf = False
break
elif ret == ord('s'):
print(' ** Run again !! **')
retryf = True
else:
retryf = False
t = datetime.datetime.now()
print('\n Finished. ', t.time().strftime('%X'))
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print(RED + ' Keyboard Interrupt !!' + NOCOLOR)
# -*- coding: utf-8 -*-
##------------------------------------------
## FormOCR Template Edit Programe Ver 0.01
## with tesseract & PyOCR & cvui
## platform: linux / windows
##
## 2022.04.13 Masahiro Izutsu
##------------------------------------------
## formocr_edit.py (tryocr_edit.py ver 0.03)
title = 'FormOCR Template Edit Program Ver 0.02'
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
LINE_WORD_BOX_COLOR = (0, 0, 240)
WORD_BOX_COLOR = (255, 0, 0)
CONTENTS_COLOR = (0, 128, 0)
from os.path import expanduser
CONFIG_FILE = expanduser('formocr.yaml')
OUTPUT_FILE = expanduser('formocr_templ.yaml')
# import処理
from PIL import Image
import sys
import pyocr
import pyocr.builders
import cv2
import cvui
import argparse
import myfunction
import numpy as np
import mylib_gui
import mylib_frame
import mylib_preprocess
import mylib_screen
import platform
from tkinter import filedialog
import mylib_yaml
import mylib_text
import os
import tkinter
# 入力パラメータの定義
def parse_args():
# タイトル・バージョン情報
print(GREEN)
print('--- {} ---'.format(title))
print(' OpenCV version {} '.format(cv2.__version__))
print(NOCOLOR)
parser = argparse.ArgumentParser()
parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
default = 'jpn',
help = 'Language. Default value is \'jpn\'')
parser.add_argument('--layout', metavar = 'LAYOUT',
default = 6,
help = 'Tesseract layout Default value is 6')
parser.add_argument('--log', metavar = 'LOG',
default = 'y',
help = 'Log flag.(y/n) Default value is \'y\'')
parser.add_argument('--mlt', metavar = 'MULTI',
default = 'y',
help = 'Multi flag.(y/n) Default value is \'y\'')
return parser
# 入力パラメータの表示
def display_info(lang, layout, logflg, mltflg):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Language : ' + NOCOLOR, lang)
print(' - ' + YELLOW + 'Layout : ' + NOCOLOR, layout)
print(' - ' + YELLOW + 'Log flag : ' + NOCOLOR, logflg)
print(' - ' + YELLOW + 'Multi flag : ' + NOCOLOR, mltflg)
print('')
# 編集項目を探す
def ckeck_mode(mode):
nxt = -1
for n in range(len(mode)):
if mode[n] != 1:
nxt = n
break
return nxt
# 基本情報を書く
def write_base_info(myyaml, section, path, base_cord):
data = {'.FileName': path, '.BasicCoordinates': base_cord}
myyaml.write_append(section, data)
def write_base_info1(myyaml, section, path):
myyaml.write_append_key(section, '.FileName', path)
def write_base_info2(myyaml, section, base_cord):
myyaml.write_append_key(section, '.BasicCoordinates', base_cord)
# 基本情報を読む
def read_base_info1(myyaml, section):
path = myyaml.load_section_key(section, '.FileName')
return path
def read_base_info2(myyaml, section):
base_cord = myyaml.load_section_key(section, '.BasicCoordinates')
return base_cord
# 配置情報を書く
def write_area_info(myyaml, section, key, locat, prepros, text):
data = {'Location': locat, 'PreProcess': prepros, 'Text': text}
myyaml.write_append_key(section, key, data)
# 配置情報を書く2
def write_area_info2(myyaml, section, key, locat, prepros, text, offset):
data = {'Location': locat, 'PreProcess': prepros, 'Text': text, 'Offset':offset}
myyaml.write_append_key(section, key, data)
# 配置情報を読む
def read_area_info(myyaml, section, sub_sec) :
locat = myyaml.load_section_sub_key(section, sub_sec, 'Location')
prepros = myyaml.load_section_sub_key(section, sub_sec, 'PreProcess')
text = myyaml.load_section_sub_key(section, sub_sec, 'Text')
offset = myyaml.load_section_sub_key(section, sub_sec, 'Offset')
return locat, prepros, text, offset
# 登録セクション名(ファイル名) を書く
def write_section_name(myyaml, name):
templ_lst = myyaml.load_section('Template')
if templ_lst == None: # 登録テーブルがない(新規)
templ_lst = []
if name not in templ_lst: # セクション名(フォーム)の登録がない
templ_lst.append(name)
else: # 登録済みの場合は最後にする
templ_lst.remove(name)
templ_lst.append(name)
myyaml.write_append('Template', templ_lst)
# 登録セクション名(ファイル名) を読む out: キー・リスト
def read_section_name(myyaml):
templ_lst = myyaml.load_section('Template')
return templ_lst
# 画像ファイルの選択
def select_image_file(title, initdir):
fname = filedialog.askopenfilename(title = title,
filetypes = [("Image file", ".bmp .png .jpeg .jpg .tif"),
("Bitmap", ".bmp"), ("PNG", ".png"),
("JPEG", ".jpeg"), ("JPG", ".jpg")],
initialdir = initdir)
return fname
# OpenCV イメージファイル読む (日本語ファイル名対応)
def imread(filename):
if platform.system()=='Windows':
tmp_dir = os.getcwd()
# 1. 対象ファイルがあるディレクトリに移動
if len(filename.split("/")) > 1:
file_dir = "/".join(filename.split("/")[:-1])
os.chdir(file_dir)
# 2. 対象ファイルの名前を変更
tmp_name = "tmp_name"
os.rename(filename.split("/")[-1], tmp_name)
# 3. 対象ファイルを読み取る
img = cv2.imread(tmp_name)
# 4. 対象ファイルの名前を戻す
os.rename(tmp_name, filename.split("/")[-1])
# カレントディレクトリをもとに戻す
os.chdir(tmp_dir)
else:
img = cv2.imread(filename)
return img
# 画像注釈編集アプリケーション・メイン処理
def image_annotation(filename, lang='jpn', layout=6, logflag=False, mltflg=False, loadflg=False):
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
# -- 読み取りテキストの変換
def ocr_text_convert(text, mode):
my_text_convert = mylib_text.TextConvert(False)
s = ''
if mode ==0:
s = my_text_convert.replace_key_strings(text) # 請求書
elif mode == 1:
s = my_text_convert.get_str2date(text) # 日 付
elif mode == 2:
s = my_text_convert.replace_company_strings(text) # 会社名
elif mode == 3:
s = my_text_convert.replace_Title_strings(text) # 案件名
elif mode == 4:
s = my_text_convert.get_str2mony(text) # 税抜金額
else:
s = my_text_convert.get_str2mony(text) # 税込金額
return s
# -- 前処理モードの保存
def save_preprosess_mode(prmode, n):
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
for i in range(4):
save_preprocess_mode[n * 4 + i] = preprocess_mode[i]
# -- 前処理モードの呼び出し
def load_preprocess_mode(n):
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
prmode = [0] * 4
for i in range(4):
prmode[i] = save_preprocess_mode[n * 4 + i]
return prmode
# -- 登録済みの配置データ
def load_annotation_data(sec_name, base_w, base_h, svmode = 1):
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
# save_rect: 表示座標
# ocr_rect: 基準(フォーム)座標
# form_rect: フォームオフセット値
base_scale_w = base_w / org_w
base_scale_h = base_h / org_h
form_ofsx = 0
form_ofsy = 0
for n in range(table_n):
# 配置情報の読み出し
rect, pros, ocr_text, offset = read_area_info(myyaml_out, sec_name, key_table[n])
save_mode[n] = 1 if rect != None and pros != None and ocr_text != None else 0
if offset == None:
ofsx = 0
ofsy = 0
else:
ofsx = offset[0]
ofsy = offset[1]
if pros == None:
pros = 0
prepros = int(pros)
preprocess_mode[3], mod = divmod(prepros, 8)
preprocess_mode[2], mod = divmod(mod, 4)
preprocess_mode[1], preprocess_mode[0] = divmod(mod, 2)
save_preprosess_mode(preprocess_mode, n)
if ocr_text == None:
ocr_text = ''
if rect == None:
x0 = 0
y0 = 0
x1 = 0
y1 = 0
roi.x = 0
roi.y = 0
roi.width = 0
roi.height = 0
else:
# オフセット補正
rect[0] = rect[0] + form_ofsx
rect[1] = rect[1] + form_ofsy
rect[2] = rect[2] + form_ofsx
rect[3] = rect[3] + form_ofsy
# オリジナル座標 → 表示座標
x0, y0 = imgfr.get_org2res_xy(round(rect[0] / base_scale_w), round(rect[1] / base_scale_h))
x1, y1 = imgfr.get_org2res_xy(round(rect[2] / base_scale_w), round(rect[3] / base_scale_h))
roi.x = x0
roi.y = y0
roi.width = x1 - x0
roi.height = y1 - y0
# 領域の保存
save_rect[n].x = x0
save_rect[n].y = y0
save_rect[n].width = x1 - x0
save_rect[n].height = y1 - y0
# 得られた表示座標から元画像の位置を計算して画像を切り出す
if roi.area() > 50:
# 得られた表示座標から元画像の位置を計算して画像を切り出す
#x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
#x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
# フォームの座標を元座標の座標系に変換して画像を切り出す
x0 = round(rect[0] / base_scale_w)
y0 = round(rect[1] / base_scale_h)
x1 = round(rect[2] / base_scale_w)
y1 = round(rect[3] / base_scale_h)
lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
ocr_rect = [x0, y0, x1, y1]
if lenaRoi.shape[0] > 0 and lenaRoi.shape[1] > 0:
# 前処理
if preprocess_mode[3] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)
if preprocess_mode[2] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
if preprocess_mode[1] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
if preprocess_mode[0] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 5)
# 切り出した領域を OCR
# PILのイメージにする
lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
imgRoi = Image.fromarray(lenaRoi1)
text, x0, y0, x1, y1 = image2string(imgRoi, n, lang, layout)
# フォーム位置ずれ計算
if n == 0 :
form_ofsx = 0 if ofsx == 0 and ofsy == 0 else round(x0 * base_scale_w) - ofsx
form_ofsy = 0 if ofsx == 0 and ofsy == 0 else round(y0 * base_scale_h) - ofsy
ocr_text = str(text)
if logflag:
if n == 0:
print(' Check Form : < {} >'.format(sec_name))
print(' text ofset : ', x0, y0)
print(' form value : ', ofsx, ofsy)
print(' base scale : {:.3f} x {:.3f}'.format(base_scale_w, base_scale_h))
print(' form Ofset : ', form_ofsx, form_ofsy)
print(YELLOW + ' OCR text:' + NOCOLOR, ocr_text, ' → ', ocr_text_convert(ocr_text, n))
# テキストの保存
text_save[n] = ocr_text_convert(ocr_text, n)
if len(text_save[n]) > 0:
save_mode[n] = svmode
set_mode = 0
preprocess_mode = load_preprocess_mode(0)
next = ckeck_mode(save_mode)
chenge_mode(next)
# -- 画像データから OCR テキスト
def image2string(img, mode, lang, layout):
text = ''
x0 = 0
y0 = 0
x1 = 0
y1 = 0
if mode == 0:
line_and_word_boxes = tool.image_to_string(img, lang=lang,
builder=pyocr.builders.LineBoxBuilder(tesseract_layout=layout))
for lw_box in line_and_word_boxes:
text = lw_box.content # 1行前提で処理
position = lw_box.position
x0 = position[0][0]
y0 = position[0][1]
x1 = position[1][0]
y1 = position[1][1]
break
else:
lng = 'eng' if mode == 4 or mode == 5 else lang # 金額は英語で
text = tool.image_to_string(img, lang=lng,
builder=pyocr.builders.TextBuilder(tesseract_layout=layout))
return text, x0, y0, x1, y1
# -- 登録済みデータの消去
def clear_annotation_data():
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
for n in range(table_n):
text_save[n] = ''
save_rect[n].x = 0
save_rect[n].y = 0
save_rect[n].width = 0
save_rect[n].height = 0
save_mode[n] = 0
set_mode = 0
# -- モード変更
def chenge_mode(mode):
global set_mode, ocr_text, ocr_rect, roi, save_mode, preprocess_mode, save_preprocess_mode, save_rect, text_save
if mode < 0:
mode = 0
save_preprosess_mode(preprocess_mode, set_mode)
set_mode = mode
ocr_rect[0], ocr_rect[1] = imgfr.get_res2org_xy(save_rect[set_mode].x, save_rect[set_mode].y)
ocr_rect[2], ocr_rect[3] = imgfr.get_res2org_xy(save_rect[set_mode].x + save_rect[set_mode].width, save_rect[set_mode].y + save_rect[set_mode].height)
preprocess_mode = load_preprocess_mode(set_mode)
roi = cvui.Rect(0, 0, 0, 0)
ocr_text = ''
## --
if logflag:
print('\n file: <{}>'.format(filename))
# OpenCV でイメージを読む
lena_frame_org = imread(filename)
key = -1
if lena_frame_org is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
return key
# 設定処理
conf_file = CONFIG_FILE
out_file = OUTPUT_FILE
base_savef = False # 基本情報記録フラグ
section_name = os.path.basename(filename)
myyaml_cnf = mylib_yaml.YamlProcess(conf_file)
myyaml_out = mylib_yaml.YamlProcess(out_file)
key_table = myyaml_cnf.load_section('KeyTable')
key_disp = myyaml_cnf.load_section('KeyDisp')
table_n = len(key_table)
save_mode = [0] * table_n
save_preprocess_mode = [0] * 4 * table_n # 2次元配列がうまく行かないので1次元で処理
status_h = 90 # ステータスラインの高さ
# ファイル名が ASCII 文字のみの場合はファイル名をウィンドウタイトルに表示
WINDOW_NAME = 'FormOCR Editter : {}'.format(section_name) if section_name.isascii() else 'FormOCR Editter'
ROI_WINDOW = 'Cut-out area'
ROI_POPUP = 'OCR detection result Text'
preprocess_mode = [0, 0, 0, 0]
set_mode = 0
wlock1 = 0
wlock2 = 0
wlock3 = 0
# 日本語フォント指定
if platform.system()=='Windows':
fontPIL = 'meiryo.ttc' # メイリオ
else:
fontPIL = 'NotoSansCJK-Bold.ttc' # ゴシック体
# ディスプレイ解像度を得る (Ubuntu の場合 height - 64)
monitor_height, monitor_width = mylib_screen.get_display_size(logflag)
maxsize = monitor_height - 64 - 180
# 画像の前処理
imgpros = mylib_preprocess.ImagePreprocess(False) # 初期化
# OCR
tools = pyocr.get_available_tools()
if len(tools) == 0:
print(RED + "\nOCR tool Not found." + NOCOLOR)
quit()
tool = tools[0]
# mylib_frame ライブラリ
imgfr = mylib_frame.ImageFrame(lena_frame_org) # 初期化
imgfr.set_screen_size(monitor_width, monitor_height)
lena_frame = imgfr.frame_resize(maxsize)
lena_frame_h, lena_frame_w = lena_frame.shape[:2]
# 画面ステータス領域 (画面下部 status_h pixel)の確保
frame = np.zeros((lena_frame_h + status_h, lena_frame_w, 3), np.uint8)
frame[:,:,:] = 200
popup_frame = np.zeros((120, 500, 3), np.uint8)
anchor = cvui.Point()
roi = cvui.Rect(0, 0, 0, 0)
working = False
frame_h, frame_w = frame.shape[:2]
outf = False
org_h, org_w = imgfr.get_original_size()
scale_h, scale_w = imgfr.get_scale()
if logflag:
print('\n original h x w : {:=5} x {:=5}'.format(org_h, org_w))
print(' display h x w : {:=5} x {:=5}'.format(lena_frame_h, lena_frame_w))
print(' scale h x w : {:.3f} x {:.3f}'.format(scale_h, scale_w))
print(' -----------')
# Init cvui and tell it to create a OpenCV window, i.e. cv.namedWindow(WINDOW_NAME).
cv2.namedWindow(WINDOW_NAME, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL)
cvui.init(WINDOW_NAME)
# OCR 項目設定データ
ocr_text = ''
ocr_rect = [0, 0, 0, 0]
ocr_form_ofset = [0, 0] # モード0 のテキスト位置(設定時のみのフォーム固有のデータ)
# 設定データの保存場所
text_save = []
save_rect = []
for n in range(table_n):
text_save.append('')
save_rect.append(cvui.Rect(0, 0, 0, 0))
# 登録済みの配置データ
template_tbl = read_section_name(myyaml_cnf)
if template_tbl != None:
load_annotation_data(section_name, org_w, org_h)
# 処理ループ
while (True):
# 画面上部に画像を配置
frame[0:lena_frame_h,:] = lena_frame
frame[lena_frame_h:,:,:] = 200
# マウス・イベント
if cvui.mouse(cvui.LEFT_BUTTON, cvui.DOWN):
if cvui.mouse().y < lena_frame_h:
# マウスポインタにアンカーを配置
anchor.x = cvui.mouse().x
anchor.y = cvui.mouse().y
# 作業中の通知(作業中はウインドウの更新しない)
working = True
if cvui.mouse(cvui.LEFT_BUTTON, cvui.IS_DOWN):
if cvui.mouse().y < lena_frame_h:
# 領域を設定
width = cvui.mouse().x - anchor.x
height = cvui.mouse().y - anchor.y
roi.x = anchor.x + width if width < 0 else anchor.x
roi.y = anchor.y + height if height < 0 else anchor.y
roi.width = abs(width)
roi.height = abs(height)
# 座標とサイズを表示
cvui.printf(frame, roi.x + 5, roi.y + 5, 0.3, 0xff0000, '(%d,%d)', roi.x, roi.y)
cvui.printf(frame, cvui.mouse().x + 5, cvui.mouse().y + 5, 0.3, 0xff0000, 'w:%d, h:%d', roi.width, roi.height)
if cvui.mouse(cvui.UP):
if cvui.mouse().y < lena_frame_h:
# 領域指定作業の終了
working = False
outf = True
wlock1 = 0
wlock2 = 0
wlock3 = 0
# 領域内を確認
lenaRows, lenaCols, lenaChannels = lena_frame.shape
if roi.x < 0:
roi.x = 0
if roi.y < 0:
roi.y = 0
if roi.x + roi.width > lena_frame_w:
roi.width = lena_frame_w - roi.x
if roi.y + roi.height > lena_frame_h:
roi.height = lena_frame_h - roi.y
# 設定領域をレンダリング
cvui.rect(frame, roi.x, roi.y, roi.width, roi.height, 0xff0000)
# 設定済み領域の表示
for n in range(table_n):
cor0 = 0x008000 if save_mode[n] == 1 else 0x800000
cor1 = (0, 160, 0) if save_mode[n] == 1 else (0, 0, 160)
rc = save_rect[n]
if rc.width > 0 and rc.height >0:
cvui.rect(frame, rc.x, rc.y, rc.width, rc.height, cor0)
# テキストの描画
if len(text_save[n]) > 0:
myfunction.cv2_putText(img = frame, text = text_save[n], org = (rc.x, rc.y), fontFace = fontPIL, fontScale = 12, color = cor1, mode = 0)
# -------------------------
# ステータスライン - 前処理:
fs = 12
xs = 80
rc = cvui.Rect(10, lena_frame_h+8, xs-10, fs + 6)
myfunction.cv2_putText(frame, '前処理:', (rc.x, rc.y), fontPIL, fs, (100,100,100), 1)
cr = (0,0,0) if preprocess_mode[3] == 0 else (0,0,240)
myfunction.cv2_putText(frame, '白黒2値', (rc.x+xs, rc.y), fontPIL, fs, cr, 1)
cr = (0,0,0) if preprocess_mode[0] == 0 else (0,0,240)
myfunction.cv2_putText(frame, '黒色抽出', (rc.x+xs*2, rc.y), fontPIL, fs, cr, 1)
cr = (0,0,0) if preprocess_mode[1] == 0 else (0,0,240)
myfunction.cv2_putText(frame, '罫線消去', (rc.x+xs*3, rc.y), fontPIL, fs, cr, 1)
cr = (0,0,0) if preprocess_mode[2] == 0 else (0,0,240)
myfunction.cv2_putText(frame, '印影消去', (rc.x+xs*4, rc.y), fontPIL, fs, cr, 1)
crx = 0x202020 if preprocess_mode[3] == 0 else 0xe00000
cvui.rect(frame, rc.x+xs-2, rc.y, rc.width, rc.height, crx)
crx = 0x202020 if preprocess_mode[0] == 0 else 0xe00000
cvui.rect(frame, rc.x+xs*2-2, rc.y, rc.width, rc.height, crx)
crx = 0x202020 if preprocess_mode[1] == 0 else 0xe00000
cvui.rect(frame, rc.x+xs*3-2, rc.y, rc.width, rc.height, crx)
crx = 0x202020 if preprocess_mode[2] == 0 else 0xe00000
cvui.rect(frame, rc.x+xs*4-2, rc.y, rc.width, rc.height, crx)
status3 = cvui.iarea(rc.x+xs, rc.y, rc.width, rc.height);
status0 = cvui.iarea(rc.x+xs*2, rc.y, rc.width, rc.height);
status1 = cvui.iarea(rc.x+xs*3, rc.y, rc.width, rc.height);
status2 = cvui.iarea(rc.x+xs*4, rc.y, rc.width, rc.height);
if status3 == cvui.CLICK:
preprocess_mode[3] = 1 if preprocess_mode[3] == 0 else 0
outf = True
if status0 == cvui.CLICK:
preprocess_mode[0] = 1 if preprocess_mode[0] == 0 else 0
outf = True
if status1 == cvui.CLICK:
preprocess_mode[1] = 1 if preprocess_mode[1] == 0 else 0
outf = True
if status2 == cvui.CLICK:
preprocess_mode[2] = 1 if preprocess_mode[2] == 0 else 0
outf = True
# ステータスライン - 設定項目:
rc = cvui.Rect(10, lena_frame_h+32, xs-10, fs+8)
myfunction.cv2_putText(frame, '設定項目:', (rc.x, rc.y), fontPIL, fs, (100,100,100), 1)
n = 1
for disp in key_disp:
cr = (0,160,160) if set_mode == n - 1 else (100,100,100) if save_mode[n - 1] == 0 else (0,160,0)
myfunction.cv2_putText(frame, disp, (rc.x + n*xs, rc.y), fontPIL, fs, cr, 1)
n = n + 1
status10 = cvui.iarea(rc.x+xs, rc.y, rc.width, rc.height);
status11 = cvui.iarea(rc.x+2*xs, rc.y, rc.width, rc.height);
status12 = cvui.iarea(rc.x+3*xs, rc.y, rc.width, rc.height);
status13 = cvui.iarea(rc.x+4*xs, rc.y, rc.width, rc.height);
status14 = cvui.iarea(rc.x+5*xs, rc.y, rc.width, rc.height);
status15 = cvui.iarea(rc.x+6*xs, rc.y, rc.width, rc.height);
if status10 == cvui.CLICK:
chenge_mode(0)
if status11 == cvui.CLICK:
chenge_mode(1)
if status12 == cvui.CLICK:
chenge_mode(2)
if status13 == cvui.CLICK:
chenge_mode(3)
if status14 == cvui.CLICK:
chenge_mode(4)
if status15 == cvui.CLICK:
chenge_mode(5)
# ステータスライン - 作業ボタン
btn_x = 90
btn_y = lena_frame_h + 54
btn_w = 70
btn_h = 32
# 'Load'
if loadflg:
if cvui.button(frame, btn_x - btn_w - 10, btn_y, "&Load"):
fname = select_image_file('画像ファイルを開く', './')
if len(fname) > 0:
clear_annotation_data()
sec_name = os.path.basename(fname)
base = read_base_info2(myyaml_out, sec_name)
if base !=None:
load_annotation_data(sec_name, base[0], base[1], 2)
# 'Save'
if cvui.button(frame, btn_x, btn_y, "&Save"):
smod = set_mode
if ocr_text == '':
ocr_text = text_save[set_mode]
if roi.width == 0 or roi.height == 0:
roi = save_rect[set_mode]
if len(ocr_text) > 0:
ocr_save_text = ocr_text_convert(ocr_text, set_mode)
save_mode[set_mode] = 1
save_rect[set_mode] = roi
text_save[set_mode] = ocr_save_text
# 基本情報の記録
if not base_savef:
write_base_info1(myyaml_out, section_name, filename)
write_base_info2(myyaml_out, section_name, [org_w, org_h])
base_savef = True
# 配置情報の記録
prepros = preprocess_mode[0] + preprocess_mode[1]*2 + preprocess_mode[2]*4 + preprocess_mode[3]*8
if set_mode == 0:
write_area_info2(myyaml_out, section_name, key_table[set_mode], ocr_rect, prepros, ocr_save_text, ocr_form_ofset)
else:
write_area_info(myyaml_out, section_name, key_table[set_mode], ocr_rect, prepros, ocr_save_text)
write_section_name(myyaml_cnf, section_name)
if logflag:
print('\n Parameter Saving!!')
print(' key: ', key_table[set_mode])
print(' area:', ocr_rect)
print(' text:', ocr_save_text)
print(' pros:', preprocess_mode)
if set_mode == 0:
print(' ofset:', ocr_form_ofset)
next = ckeck_mode(save_mode)
chenge_mode(next)
cv2.destroyWindow(ROI_WINDOW)
cv2.destroyWindow(ROI_POPUP)
# 'Next'
if mltflg:
if cvui.button(frame, btn_x + btn_w + 10, btn_y, "&Next"):
key = ord('n')
break
# 'Quit'
if cvui.button(frame, btn_x + (btn_w + 10) * 4, btn_y, "&Quit"):
next = ckeck_mode(save_mode)
if next < 0:
key = ord('s')
if logflag:
print(GREEN + ' - Saved all parameters !! -' + NOCOLOR)
else:
if logflag:
print(YELLOW + ' - Program quit !! -' + NOCOLOR)
key = ord('q')
break
# -------------------------
# ウインドウの更新
cvui.update()
# 画面の表示
cv2.imshow(WINDOW_NAME, frame)
if wlock1 < 10:
cv2.moveWindow(WINDOW_NAME, 80, 0)
wlock1 = wlock1 + 1
else:
wlock1 = 10
# 得られた表示座標から元画像の位置を計算して画像を切り出す
if outf and roi.area() > 50 and working == False:
x0, y0 = imgfr.get_res2org_xy(roi.x, roi.y)
x1, y1 = imgfr.get_res2org_xy(roi.x + roi.width, roi.y + roi.height)
lenaRoi = lena_frame_org[y0 : y1, x0 : x1]
ocr_rect = [x0, y0, x1, y1]
# 前処理
if preprocess_mode[3] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 1)
if preprocess_mode[2] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 4)
if preprocess_mode[1] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 2)
if preprocess_mode[0] != 0:
lenaRoi = imgpros.image_processing_execution(lenaRoi, 5)
# OCR入力画像表示
lenaRoi_h, lenaRoi_w = lenaRoi.shape[:2]
cv2.namedWindow(ROI_WINDOW, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL)
cv2.imshow(ROI_WINDOW, lenaRoi)
if wlock2 < 10:
cv2.moveWindow(ROI_WINDOW, frame_w + 100, 0)
wlock2 = wlock2 + 1
else:
wlock2 = 10
# 切り出した領域を OCR
# PILのイメージにする
lenaRoi1 = cv2.cvtColor(lenaRoi, cv2.COLOR_RGB2BGR)
imgRoi = Image.fromarray(lenaRoi1)
lng = 'eng' if set_mode == 4 or set_mode == 5 else lang # 金額は英語で
ocr_text, x0, y0, x1, y1 = image2string(imgRoi, set_mode, lang, layout)
ocr_form_ofset[0] = x0
ocr_form_ofset[1] = y0
# テキストの描画
if len(ocr_text)>0:
msg = 'preprocess: {} area: ({}, {}) - ({}, {})'.format(preprocess_mode, ocr_rect[0], ocr_rect[1], ocr_rect[2], ocr_rect[3])
popup_frame[:,:,:] = 0
cv2.rectangle(popup_frame, (0, 88), (500, 105), (255,0,0), -1)
myfunction.cv2_putText(img = popup_frame,
text = ocr_text,
org = (15, 104),
fontFace = fontPIL,
fontScale = 12,
color = (255,255,255),
mode = 0)
cv2.putText(popup_frame, msg, (0, 16), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
else:
msg = ' Cannot be converted !!\n'
msg0 = 'もう一度指定してくささい ‼'
popup_frame[:,:,:] = 0
myfunction.cv2_putText(img = popup_frame,
text = msg0,
org = (120, 60),
fontFace = fontPIL,
fontScale = 16,
color = (240, 240, 0),
mode = 0)
cv2.namedWindow(ROI_POPUP, flags=cv2.WINDOW_AUTOSIZE | cv2.WINDOW_GUI_NORMAL)
cv2.imshow(ROI_POPUP, popup_frame)
if wlock3 < 10:
cv2.moveWindow(ROI_POPUP, frame_w + 100, lenaRoi_h + 100)
wlock3 = wlock3 + 1
else:
wlock3 = 10
if outf and logflag:
print(' ', ocr_text)
print(' ', msg)
outf = False
key = cv2.waitKey(10)
if key == 27 or key == ord('q') or key == ord('s'): # 'esc' or 'q' or 's'
break
if not mylib_gui._is_visible(WINDOW_NAME): # 'Close' button
break
cv2.destroyAllWindows()
if logflag:
print(' -----------\n')
return key
# ** main関数 **
def main():
loop_flg = True
# Tk オブジェクトのインスタンス作成
root = tkinter.Tk()
# Tk のウィンドウを非表示にする
root.withdraw()
# 入力パラメータの処理
ARGS = parse_args().parse_args()
lang = ARGS.language
layout = int(ARGS.layout)
logflg = ARGS.log
logflag = True if logflg == 'y' else False
mltflg = ARGS.mlt
mltflag = True if mltflg == 'y' else False
# 情報表示
display_info(lang, layout, logflg, mltflg)
while(loop_flg):
# 画像ファイルの選択
if loop_flg:
filename = select_image_file('画像ファイルを開く', './')
if len(filename) == 0:
break
ret = image_annotation(filename, lang, layout, logflag, mltflag, True)
if ret == 27 or ret == ord('q') or ret == ord('s') or ret == -1:
loop_flg = False
if logflag:
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
sys.exit(main())