# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Image Classification
##
## model: squeezenet1.1
##
## 2021.04.12 Masahiro Izutsu
##------------------------------------------
## image_classification.py
## 2021.06.23 fps display
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
WINDOW_WIDTH = 640
TEXT_COLOR = (255, 255, 255) # white text
from os.path import expanduser
MODEL_DEF = expanduser('~/model/public/FP32/squeezenet1.1.xml')
# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version
# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction
import mylib
# タイトル・バージョン情報
title = 'Image Classification'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# Parses arguments for the application
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = 'cam',
help = 'Absolute path to image file or cam for camera stream.')
parser.add_argument('-m', '--model', type=str,
default = MODEL_DEF,
help = 'Model Path to an .xml file with a trained model.'
'Default value is '+MODEL_DEF)
parser.add_argument('-d', '--device', default='CPU', type=str,
help = 'Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-l', '--label', metavar = 'LABEL', default = './synset_words_jp.txt',
help = 'Absolute path to labels file.'
'Default value is c./synset_words_jp.txt')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'y',
help = 'Program title flag.(y/n) Default value is \'y\'')
parser.add_argument('-s', '--speed', metavar = 'SPEED',
default = 'y',
help = 'Speed display flag.(y/n) Default calue is \'y\'')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# モデル基本情報の表示
def display_info(image, model, device, label, titleflg, speedflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'Model : ' + NOCOLOR, model)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Label : ' + NOCOLOR, label)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Speed flag : ' + NOCOLOR, speedflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ** main関数 **
def main():
# 日本語フォント指定
fontPIL = 'NotoSansCJK-Bold.ttc'
# Argument parsing and parameter setting
ARGS = parse_args().parse_args()
input_stream = ARGS.image
label_path = ARGS.label
titleflg = ARGS.title
speedflg = ARGS.speed
if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
input_stream = 0
isstream = True
else:
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
model = ARGS.model
device = ARGS.device
outpath = ARGS.out
# ラベル読み込み
labels = np.loadtxt(label_path, dtype='str', delimiter='\n')
# モデルの読み込み
ie = IECore()
net = ie.read_network(model = model, weights = model[:-4] + '.bin')
exec_net = ie.load_network(network = net, device_name = device)
# 入力データと出力データのキーを取得
input_blob = net.input_info['data'].name
out_blob = next(iter(net.outputs))
# 情報表示
display_info(input_stream, model, device, label_path, titleflg, speedflg, outpath)
# 入力準備
if (isstream):
# カメラ
cap = cv2.VideoCapture(input_stream)
ret, frame = cap.read()
loopflg = cap.isOpened()
img_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
img_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else:
# 画像ファイル読み込み
frame = cv2.imread(input_stream)
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# アスペクト比を固定してリサイズ
img_h, img_w = frame.shape[:2]
if (img_w > WINDOW_WIDTH):
height = round(img_h * (WINDOW_WIDTH / img_w))
frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
loopflg = True # 1回ループ
img_width = frame.shape[1]
img_height = frame.shape[0]
# 処理結果の記録 step1
if (outpath != 'non'):
if (isstream):
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))
# 計測値初期化
fpsWithTick = mylib.fpsWithTick()
frame_count = 0
fps_total = 0
fpsWithTick.get() # fps計測開始
# メインループ
while (loopflg):
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# 入力データフォーマットへ変換
img = cv2.resize(frame, (227, 227)) # HeightとWidth変更
img = img.transpose((2, 0, 1)) # HWC > CHW
img = np.expand_dims(img, axis=0) # CHW > BCHW
# 推論実行
out = exec_net.infer(inputs={input_blob : img})
# 出力から必要なデータのみ取り出し
out = out[out_blob]
# 不要な次元を削減
out = np.squeeze(out)
# 降順でベスト3のインデックスを抽出
index_order = np.argsort(out)[::-1][:3]
# テキスト表示位置y座標初期値
text_y = img_height - 80
# ベスト3のインデックスについてラベルと値を表示
for index in index_order:
# 左側の文字列10文字は取り除いてラベルを取得
label = labels[index]
label = label[10:]
# outを百分率にして小数点2桁以下は丸めて、文字列化
value = out[index] * 100
value = round(value, 1)
value = str(value) + '% '
# 文字の表示
# cv2.putText(frame, value + label, (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (240, 180, 0), 2)
myfunction.cv2_putText(img = frame,
text = value + label,
org = (10, text_y),
fontFace = fontPIL,
fontScale = 18,
color = (240, 180, 0),
mode = 0)
# テキスト表示位置y座標増加
text_y = text_y + 30
# FPSを計算する
fps = fpsWithTick.get()
st_fps = 'fps: {:>6.2f}'.format(fps)
if (speedflg == 'y'):
cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
# タイトル描画
if (titleflg == 'y'):
cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)
# 画像表示
window_name = title + " (hit 'q' or 'esc' key to exit)"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.imshow(window_name, frame)
# 処理結果の記録 step2
if (outpath != 'non'):
if (isstream):
outvideo.write(frame)
else:
cv2.imwrite(outpath, frame)
# 何らかのキーが押されたら終了
breakflg = False
while(True):
key = cv2.waitKey(1)
prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
if key == 27 or key == 113 or (prop_val < 0.0): # 'esc' or 'q'
breakflg = True
break
if (isstream):
break
if ((breakflg == False) and isstream):
# 次のフレームを読み出す
ret, frame = cap.read()
if ret == False:
break
loopflg = cap.isOpened()
else:
loopflg = False
# 終了処理
if (isstream):
cap.release()
# 処理結果の記録 step3
if (outpath != 'non'):
if (isstream):
outvideo.release()
cv2.destroyAllWindows()
print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
sys.exit(main())