# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Real-time sentiment analysis
##
## model: face-detection-retail-0004
## emotions-recognition-retail-0003
##
## 2021.03.25 Masahiro Izutsu
##------------------------------------------
## 2021.06.23 fps display
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
WINDOW_WIDTH = 640
TEXT_COLOR = (255, 255, 255) # white text
from os.path import expanduser
MODEL_DEF_DETECT = expanduser('~/model/intel/FP32/face-detection-retail-0004.xml')
MODEL_DEF_EMO = expanduser('~/model/intel/FP32/emotions-recognition-retail-0003.xml')
# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction
import mylib
from pngoverlay import PNGOverlay
# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version
# タイトル・バージョン情報
title = 'Real-time sentiment analysis 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# Parses arguments for the application
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = 'cam',
help = 'Absolute path to image file or cam for camera stream.')
parser.add_argument('-m_dt', '--m_detector', type = str,
default = MODEL_DEF_DETECT,
help = 'Detector Path to an .xml file with a trained model.'
'Default value is '+MODEL_DEF_DETECT)
parser.add_argument('-m_re', '--m_recognition', type = str,
default = MODEL_DEF_EMO,
help = 'Recognition Path to an .xml file with a trained model.'
'Default value is '+MODEL_DEF_EMO)
parser.add_argument('-d', '--device', default = 'CPU', type = str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
default = 'jp',
help = 'Language.(jp/en) Default value is \'jp\'')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'y',
help = 'Program title flag.(y/n) Default value is \'y\'')
parser.add_argument('-s', '--speed', metavar = 'SPEED',
default = 'y',
help = 'Speed display flag.(y/n) Default calue is \'y\'')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# モデル基本情報の表示
def display_info(image, detector, recognition, device, lang, titleflg, speedflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'm_detect : ' + NOCOLOR, detector)
print(' - ' + YELLOW + 'm_recognition: ' + NOCOLOR, recognition)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Language : ' + NOCOLOR, lang)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Speed flag : ' + NOCOLOR, speedflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ** main関数 **
def main():
# 日本語フォント指定
fontPIL = 'NotoSansCJK-Bold.ttc'
# Argument parsing and parameter setting
ARGS = parse_args().parse_args()
input_stream = ARGS.image
lang = ARGS.language
titleflg = ARGS.title
speedflg = ARGS.speed
if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
input_stream = 0
isstream = True
else:
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
model_detector=ARGS.m_detector
model_recognition=ARGS.m_recognition
device = ARGS.device
outpath = ARGS.out
# 感情ラベル
if (lang == 'jp'):
list_emotion = ['平静', '嬉しい', '悲しい', '驚き', '怒り']
else:
list_emotion = ['neutral', 'happy', 'sad', 'surprise', 'anger']
# 感情色ラベル
color_emotion = [(255, 255, 0), ( 0, 255, 0), ( 0, 255, 255), (255, 0, 255), ( 0, 0, 255)]
bkcolor_emotion = [(120, 120, 70), ( 70, 120, 70), ( 70, 120, 120), (120, 70, 120), ( 70, 70, 120)]
textcolor_emotion = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255)]
# インスタンス生成
icon_neutral = PNGOverlay('../image/icon_neutral.png')
icon_happy = PNGOverlay('../image/icon_happy.png')
icon_sad = PNGOverlay('../image/icon_sad.png')
icon_surprise = PNGOverlay('../image/icon_surprise.png')
icon_anger = PNGOverlay('../image/icon_anger.png')
# インスタンス変数をリストにまとめる
icon_emotion = [icon_neutral, icon_happy, icon_sad, icon_surprise, icon_anger]
# モデルの読み込み (顔検出)
ie = IECore()
net = ie.read_network(model=model_detector, weights=model_detector[:-4] + '.bin')
exec_net = ie.load_network(network=net, device_name=device)
# モデルの読み込み (感情分類)
net_emotion = ie.read_network(model=model_recognition, weights=model_recognition[:-4] + '.bin')
exec_net_emotion = ie.load_network(network=net_emotion, device_name=device)
# 情報表示
display_info(input_stream, model_detector, model_recognition, device, lang, titleflg, speedflg, outpath)
# 入力準備
if (isstream):
# カメラ
cap = cv2.VideoCapture(input_stream)
ret, frame = cap.read()
loopflg = cap.isOpened()
else:
# 画像ファイル読み込み
frame = cv2.imread(input_stream)
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# アスペクト比を固定してリサイズ
img_h, img_w = frame.shape[:2]
if (img_w > WINDOW_WIDTH):
height = round(img_h * (WINDOW_WIDTH / img_w))
frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
loopflg = True # 1回ループ
# 処理結果の記録 step1
if (outpath != 'non'):
if (isstream):
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))
# 計測値初期化
fpsWithTick = mylib.fpsWithTick()
frame_count = 0
fps_total = 0
fpsWithTick.get() # fps計測開始
# メインループ
while (loopflg):
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# 入力データフォーマットへ変換
img = cv2.resize(frame, (300, 300)) # サイズ変更
img = img.transpose((2, 0, 1)) # HWC > CHW
img = np.expand_dims(img, axis=0) # 次元合せ
# 推論実行
out = exec_net.infer(inputs={'data': img})
# 出力から必要なデータのみ取り出し
out = out['detection_out']
out = np.squeeze(out) #サイズ1の次元を全て削除
# 検出されたすべての顔領域に対して1つずつ処理
for detection in out:
# conf値の取得
confidence = float(detection[2])
# バウンディングボックス座標を入力画像のスケールに変換
xmin = int(detection[3] * frame.shape[1])
ymin = int(detection[4] * frame.shape[0])
xmax = int(detection[5] * frame.shape[1])
ymax = int(detection[6] * frame.shape[0])
# conf値が0.5より大きい場合のみ感情推論とバウンディングボックス表示
if confidence > 0.5:
# 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる
if xmin < 0:
xmin = 0
if ymin < 0:
ymin = 0
if xmax > frame.shape[1]:
xmax = frame.shape[1]
if ymax > frame.shape[0]:
ymax = frame.shape[0]
# 顔領域のみ切り出し
frame_face = frame[ ymin:ymax, xmin:xmax ]
# 入力データフォーマットへ変換
img = cv2.resize(frame_face, (64, 64)) # サイズ変更
img = img.transpose((2, 0, 1)) # HWC > CHW
img = np.expand_dims(img, axis=0) # 次元合せ
# 推論実行
out = exec_net_emotion.infer(inputs={'data': img})
# 出力から必要なデータのみ取り出し
out = out['prob_emotion']
out = np.squeeze(out) #不要な次元の削減
# 出力値が最大のインデックスを得る
emoid = np.argmax(out)
emotion = list_emotion[emoid]
# 文字列描画
cv2.rectangle(frame, (10, frame.shape[0] - 242), (100, frame.shape[0] - 218), bkcolor_emotion[emoid], -1)
# cv2.putText(frame, list_emotion[index_max], (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 4)
myfunction.cv2_putText(img = frame,
text = emotion,
org = (20, frame.shape[0] - 220),
fontFace = fontPIL,
fontScale = 20,
color = textcolor_emotion[emoid],
mode = 0)
# バウンディングボックス表示
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color=(240, 180, 0), thickness=3)
# 棒グラフ表示
str_emotion = ['neu', 'hap', 'sad', 'sur', 'ang']
text_x = 10
text_y = frame.shape[0] - 180
rect_x = 80
rect_y = frame.shape[0] - 200
for i in range(5):
cv2.putText(frame, str_emotion[i], (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (240, 180, 0), 2)
cv2.rectangle(frame, (rect_x, rect_y), (rect_x + int(300 * out[i]), rect_y + 20), color=(240, 180, 0), thickness=-1)
text_y = text_y + 40
rect_y = rect_y + 40
# 顔アイコン表示
icon_emotion[emoid].show(frame, frame.shape[1] - 110, frame.shape[0] - 110)
# 1つの顔で終了
break
# FPSを計算する
fps = fpsWithTick.get()
st_fps = 'fps: {:>6.2f}'.format(fps)
if (speedflg == 'y'):
cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
# タイトル描画
if (titleflg == 'y'):
cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)
# 画像表示
window_name = title + " (hit 'q' or 'esc' key to exit)"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.imshow(window_name, frame)
# 処理結果の記録 step2
if (outpath != 'non'):
if (isstream):
outvideo.write(frame)
else:
cv2.imwrite(outpath, frame)
# 何らかのキーが押されたら終了
breakflg = False
while(True):
key = cv2.waitKey(1)
prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
if key == 27 or key == 113 or (prop_val < 0.0): # 'esc' or 'q'
breakflg = True
break
if (isstream):
break
if ((breakflg == False) and isstream):
# 次のフレームを読み出す
ret, frame = cap.read()
if ret == False:
break
loopflg = cap.isOpened()
else:
loopflg = False
# 終了処理
if (isstream):
cap.release()
# 処理結果の記録 step3
if (outpath != 'non'):
if (isstream):
outvideo.release()
cv2.destroyAllWindows()
print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
sys.exit(main())