NCAppVol6 - PukiWiki

[ トップ ] [ 一覧 | 検索 | 履歴 | ログイン ]

最新の20件

2024-04-19

RevYOLOv5_2

2024-04-18

私的AI研究会

2024-04-16

RevYOLOv5

2024-04-14

ミーティング履歴

2024-04-10

YOLOv7_Colab3

2024-03-18

PyLearn

2024-03-17

2024-03-15

2024-03-05

RecentDeleted

2024-03-02

OpenVINOv2

2024-03-01

Anaconda1

2024-02-16

ProjectEnv3

2024-02-15

2024-02-14

SendMail

2024-01-21

GanFOMM

2024-01-18

ハードウェアTips

2024-01-17

進捗メモ-mizutu

私的AI研究会 > NCAppVol6

Neural Compute Application を作る６ †

　OpenVINO™ ツールキットの学習済みモデルとサンプルプログラムを参考に Neural Compute Application を製作してみる。

Neural Compute Application を作る６
- メガネ・帽子バーチャル試着アプリケーション
- 更新履歴
参考資料

※ 最終更新:2021/09/30　

メガネ・帽子バーチャル試着アプリケーション †

　顔認識の推論モデルを使って「メガネ」「帽子」の試着アプリケーションを作成してみる。

　　

位置決めの原理とツール作成 †

AI 推論モデルを使って検出した目の位置に「メガネ」や「帽子」のアイテム画像を表示する位置を決める。
アイテム画像の原点に対してどの位置に２つの目ががあると良いかの座標を決める。アイテム画像に対して左目と右目がどこにあるべきかを示す座標のことを「EyePoint」とする。
EyePointはアイテム画像外にあっても問題ないため、画像を編集する必要はない。
事前に画像毎のEyePointが分かっていれば、目を検出したときに計算でアイテム画像を表示する位置や大きさ、角度が決められる。
アイテム画像に対し、どこをEyePointにすると良さそうなのかは感覚的な内容なので人が決める必要がある。
背景とアイテム画像を表示して、EyePointにすべき座標をマウスクリックすると、座標が表示されるような測定ツールを作成する。
使い方
- Toolウィンドウ上部のトラックバーをマウスでドラッグし拡大・縮小
- Toolウィンドウ内のアイテム画像をマウスでドラッグし移動
- Eye Pointウィンドウの表示数値がEyePointの座標となる

ソースコード
背景画像のサイズ(800x530) は重要。この値を前提に下記のコードは作成されている。

▼「virtual_fitting_eyepoint_tool.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Virtual Fitting Application eyepoint tool
##
##               2021.04.19 Masahiro Izutsu
##------------------------------------------
## python3 virtual_fitting_ipoint_tool.py [-item (0-7)]

# 応用プログラミングで補助ツール作成
# https://jellyware.jp/aicorex/contents/out_c09_tool.html
#==================================================
# 使い方
#==================================================
# Toolウィンドウ上部のトラックバーをマウスでドラッグし拡大・縮小
# Toolウィンドウ内のアイテム画像をマウスでドラッグし移動
# Eye Pointウィンドウの表示数値がEyePointの座標となる

#==================================================
# import
#==================================================
import cv2
import numpy as np
import argparse
from pngoverlay import PNGOverlay

#==================================================
# 設定
#==================================================
# 背景画像
image_background = '../../Images/photo_m.jpg'

# アイテム画像
ITEM_PATH = '../../Images/parts/'    # ~/ で指定すると home ディレクトリの展開ができない
ITEM_LIST = ['glass01.png','glass02.png','glass03.png','glass04.png','glass05.png','cap01.png','cap02.png','cap03.png','cap04.png','cap05.png']

# トラックバー最大値（アイテム画像の拡大縮小の分解能）
track_position_max = 1000

# トラックバー最大時のアイテム画像の倍率
scale_rate = 2

# ディープラーニング推論した目の位置の座標（背景画像を変更したら要修正）
eye_left_x, eye_left_y, eye_right_x, eye_right_y = 366, 162, 432, 162 # y座標は同じ位置になるように微修正済

# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-item', '--itemindex', type=int, default = 0,
                        help = 'Item Index number.')
    return parser

#==================================================
# マウス、トラックバーのコールバック
#==================================================
# マウスのコールバック関数
def callback_mouse(event, x, y, flags, param):
    global item_x, item_y, flag_mouse_drag, cursor_offset_x, cursor_offset_y

    if event == cv2.EVENT_LBUTTONDOWN:
        if x >= item_x - item.width/2 and x <= item_x + item.width/2 and y >= item_y - item.height/2 and  y <= item_y + item.height/2:
            flag_mouse_drag = True
            cursor_offset_x = item_x - x
            cursor_offset_y = item_y - y

    elif event == cv2.EVENT_MOUSEMOVE:
        if flag_mouse_drag == True:
            item_x = x + cursor_offset_x
            item_y = y + cursor_offset_y

    elif event == cv2.EVENT_LBUTTONUP:
        flag_mouse_drag = False

# トラックバーのコールバック関数
def changeTrack(val):
    global track_position
    track_position = cv2.getTrackbarPos('scale', 'Tool')

    #0はエラーになるので強制的に1にする
    if track_position <= 0:
        track_position = 1

# マウスコールバック関数の登録
cv2.namedWindow('Tool')
cv2.setMouseCallback('Tool', callback_mouse)

# トラックバーの生成とコールバック登録
track_position = int(track_position_max/2) # トラックバー初期位置
cv2.createTrackbar('scale', 'Tool', track_position, track_position_max, changeTrack)

#==================================================
# 準備
#==================================================

# Argument parsing and parameter setting
ARGS = parse_args().parse_args()
item_index = ARGS.itemindex
image_item = ITEM_PATH + ITEM_LIST[item_index]

print(image_item)

# 透過PNG画像のインスタンス生成
item = PNGOverlay(image_item)

# EyePoint確認用の別ウィンドウにも透過PNG画像インスタンス生成
item2 = PNGOverlay(image_item)

# マウスカーソルとアイテム画像の中心画像のオフセット
cursor_offset_x = 0
cursor_offset_y = 0

# マウスドラッグ中を示すフラグ
flag_mouse_drag = False

# アイテム画像の中心座標
scale = track_position/track_position_max * scale_rate
item.resize(scale)
item_x = int(item.width/2)
item_y = int(item.height/2)

#==================================================
# メインループ
#==================================================
while True:
    key = cv2.waitKey(1)
    if key != -1:
        break

    # 背景画像読み込み
    frame = cv2.imread(image_background)

    # 透過PNGを描画
    scale = track_position/track_position_max * scale_rate
    item.resize(scale)
    item.show(frame, item_x , item_y)
    cv2.imshow('Tool', frame)

    # EyePointの計算
    item_origin_x = item_x - int(item.width/2)  #frameに対するアイテム画像の原点座標x
    item_origin_y = item_y - int(item.height/2) #frameに対するアイテム画像の原点座標y
    EyePoint_left_x = int((eye_left_x - item_origin_x) / scale)
    EyePoint_left_y = int((eye_left_y - item_origin_y) / scale)
    EyePoint_right_x = int((eye_right_x - item_origin_x) / scale)
    EyePoint_right_y = int((eye_right_y - item_origin_y) / scale)

    # EyePointの表示文字列
    text_left = 'L : ' + str(EyePoint_left_x) + ', ' + str(EyePoint_left_y)
    text_right = 'R : ' + str(EyePoint_right_x) + ', ' + str(EyePoint_right_y)

    # EyePoint確認用の別ウィンドウ
    frame2 = np.zeros((item2.height + 300, item2.width, 3), np.uint8) + 255 # 白画生成
    item2.show(frame2, int(item2.width/2) , int(item2.height/2))
    cv2.circle(frame2, (EyePoint_left_x, EyePoint_left_y), 10, (0, 0, 255), thickness=-1)
    cv2.circle(frame2, (EyePoint_right_x, EyePoint_right_y), 10, (0, 0, 255), thickness=-1)
    cv2.putText(frame2, text_left, (20, frame2.shape[0] - 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.putText(frame2, text_right, (20, frame2.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.imshow('Eye Point', frame2)

#==================================================
# 終了処理
#==================================================
cv2.destroyAllWindows()

スケールの計算 †

メガネや帽子の「アイテム画像」とカメラに映る顔などの「入力画像」の目の距離(eye_distance) を比較してスケールを決める。
距離の計算
スケール(比率) の計算

比率(スケール) = 対象「カメラに映った目の距離」/ 基準「EyePoint距離」

座標計算 †

アイテムを表示する位置を実際の目の位置に合わせる。図のように、左目位置と左EyePoint位置を一致させる。
アイテム画像の座標(PNGOverlay) は指定座標に対し、画像の中心が来るように描画する。
相対座標の計算アイテム画像（中心）をカメラ画像frameのどこに表示するかを決める。
- EyePoint_leftを基準とした座標
  EyePointの各座標は、アイテム画像（左上）からの数値を表わす。
  アイテム画像（左上）を基準とした場合の EyePoint_leftの座標は(EPL_x, EPL_y)
  EyePoint_leftを基準とした場合のアイテム画像（左上）の座標は(-EPL_x, -EPL_y)
  
  アイテム画像の幅と高さはitem.width, item.heightで取得できる。
  EyePoint_leftを基準とした場合のアイテム画像（中心）の座標は以下で求めることができる。
```
# アイテム座標（EyePoint_left基準） 
item_x_EPL = item.width/2 - EPL_x
item_y_EPL = item.height/2 - EPL_y
```
- eye_left基準とした座標
  座標(item_x_EPL, item_y_EPL)に「EyePoint」と「eye」の関連を加えれば求まる。
  EyePoint_left と eye_left の座標は同じ。
  
  左目以外の座標はスケールを考慮する必要がある。
  eye_leftを基準とした場合のアイテム画像（中心）の座標は以下で求める。
```
# アイテム座標（左目基準） 
item_x_eyeleft = item_x_EPL * item_scale
item_y_eyeleft = item_y_EPL * item_scale
```
- frame原点を基準とした座標
  座標(item_x_eyeleft, item_y_eyeleft)に「frame」と「eye」の関連を加えれば求まる。
  eyeはframeをディープラーニングした結果の座標。
  eye_leftの位置はframe原点から座標。
  これを加えることにより、「frame」基準のアイテム座標位置が決まる。

角度計算 †

三角関数 sinθ、cosθ、tanθは、角度θさえ分かれば直角三角形の２辺の比を求めることが出来る。
逆に直角三角形の２辺の比が分かることで角度θの値を求めることができる。

座標に適用すると原点(0, 0)に対し、点の座標(a, b)が分かっている場合は、arctanを使って図の角度θを求めることができる。

Pythonプログラムでarctanを使う場合はmath.atan()もしくはmath.atan2()という関数を使う。今回のように-90～+90度の範囲を超える可能性がある場合はmath.atan2()を使う。
math.atan2は引数が２つあり、b, a の順に記述するので注意。
```
math.atan2(b, a)
```
戻り値の角度の単位は度(degree)ではなく、radian。
radianからdegreeへの変換はPythonの場合、math.degrees()という関数を使う。
```
# atan2 確認コード 
import math

angle = math.atan2(1.00, 1.73)
print(math.degrees(angle))
```
実際の目eyeの座標に当てはめ。eye_leftを基準として考えた相対座標で求める。

# 目の角度
```
eye_angle = math.atan2(eye_right_y - eye_left_y, eye_right_x - eye_left_x)
```
角度を持ったEyePoint アイテム画像にも対応できるようにする。

EyePointの角度のにコード
```
# EyePointの角度 
EP_angle = math.atan2(EPR_y - EPL_y, EPR_x - EPL_x)
```
最初からEyePointが傾いている場合は、eyeに対して足りない角度だけ対応する。コードで書くと単純な引き算。
```
# アイテムの回転角度 
item_angle = eye_angle - EP_angle
```

PNGOverlayの回転
OpenCVの座標系の回転は時計回りが正なのに対し、PNGoverlayの回転方向は反時計回りが正。
例：赤いメガネが画像中心を基準として反時計回りに60度回転

import cv2
import numpy as np
from pngoverlay import PNGOverlay
 
# 白画の生成 
img = np.zeros((600, 800, 3), np.uint8) + 255
 
# PNGOverlayインスタンス生成 
item = PNGOverlay('image/6629_trim_small.png')
 
# 透過PNGを描画 
item.rotate(60)
item.show(img, 400 , 300)
 
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

回転移動 †

左側は「目」も「アイテム画像」もeye_leftを基準として回転、右側は「アイテム画像」はアイテム画像の中心を基準として回転してる。角度としては合っているが、どこを中心として回転するかでずれが生じる。
頭に帽子を載せていて首を曲げたとき、左目を中心とすると帽子は単にその場で回転だけではなく、移動も伴う。この左目を中心とする回転に対する移動量が分かればよい。

緑の点がアイテム画像の中心を表す。
回転移動の計算
原点を中心に点(x1, y2)から角度θで回転したときの点(x2, y2)の座標は以下の式で表せる。

Pythonのコード
```
# (x1, y1)をitem_angle回転させた座標 
x2 = x1 * math.cos(theta) - y1 * math.sin(theta)
y2 = x1 * math.sin(theta) + y1 * math.cos(thetae)
```
(x1, y1) は(item_x_eyeleft, item_y_eyeleft)に相当するので、コードの中の「アイテム座標（左目基準）」と「アイテム座標」の間で計算する必要がある。
これで、首を傾けても合うようになる。
ただし、90度近くまで回転させるとディープラーニングのランドマーク回帰自体が正しく検出できないため、表示も正しくなくなる。

アイテムを準備 †

メガネと帽子の試着アイテムを5種類づつ用意する。
プログラムのコマンドラインからインデックス番号で選択できるようにする。

動作環境 †

プログラムの場所：~/workspace/apps/

プログラムの実行：python3 virtual_fitting.py

使用する学習済みモデル
- face-detection-retail-0005（顔検出）
- landmarks-regression-retail-0009（特徴点抽出）

「virtual_fitting.py」 †

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help	-	ヘルプ表示
-item, --itemindex	0	試着アイテム種別 (0-9)
-i, --image	cam	カメラ(cam)または動画・静止画像ファイル
-m_dt, --m_detector	必須指定	IR フォーマットの顔検出モデル
-m_lm, --m_landmarks	必須指定	IR フォーマット特徴点抽出モデル
-d, --device	必須指定	デバイス指定 (CPU/MYRIAD)
-t, --title	y	タイトル表示 (y/n)
-s, --speed	y	スピード計測表示 (y/n)
-o, --out	non	処理結果を出力する場合のファイルパス

$ python3 virtual_fitting.py -h

---  Virtual Fitting ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

usage: virtual_fitting.py [-h] [-item ITEMINDEX] [-i IMAGE_FILE]
                          [-m_dt M_DETECTOR] [-m_lm M_LANDMARKS] [-d DEVICE]
                          [-t TITLE] [-s SPEED] [-o IMAGE_OUT]

optional arguments:
  -h, --help            show this help message and exit
  -item ITEMINDEX, --itemindex ITEMINDEX
                        Item Index number (0-9)
  -i IMAGE_FILE, --image IMAGE_FILE
                        Absolute path to image file or cam for camera stream.
  -m_dt M_DETECTOR, --m_detector M_DETECTOR
                        Detector Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/face-detection-
                        retail-0005.xml
  -m_lm M_LANDMARKS, --m_landmarks M_LANDMARKS
                        Landmarks Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/landmarks-regression-
                        retail-0009.xml
  -d DEVICE, --device DEVICE
                        Optional. Specify a target device to infer on. CPU,
                        GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
                        look for a suitable plugin for the device specified.
                        Default value is CPU
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'y'
  -s SPEED, --speed SPEED
                        Speed display flag.(y/n) Default calue is 'y'
  -o IMAGE_OUT, --out IMAGE_OUT
                        Processed image file path. Default value is 'non'

実行結果

$ python3 virtual_fitting.py -item 0 -i ~/Images/photo.jpg

---  Virtual Fitting ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

virtual_fitting.py:161: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
  input_blob_face = next(iter(net_face.inputs))
 Virtual Fitting: Starting application...
   - Item File    :  ../../Images/parts/glass01.png
   - Image File   :  /home/mizutu/Images/photo.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-retail-0005.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/landmarks-regression-retail-0009.xml
   - Device       :  CPU
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:      47.50

 Finished.

$ python3 virtual_fitting.py -item 3 -i ~/Images/photo4.jpg

---  Virtual Fitting ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

virtual_fitting.py:161: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
  input_blob_face = next(iter(net_face.inputs))
 Virtual Fitting: Starting application...
   - Item File    :  ../../Images/parts/glass04.png
   - Image File   :  /home/mizutu/Images/photo4.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-retail-0005.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/landmarks-regression-retail-0009.xml
   - Device       :  CPU
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:      44.50

 Finished.

その他のコマンド入力例

$ cd ~/workspace/apps

◦ CPU

$ python3 virtual_fitting.py -item 0 -i ~/Images/photo.jpg

$ python3 virtual_fitting.py -item 0 -i ~/Videos/emotion2.mp4

$ python3 virtual_fitting.py -item 0 -i ~/Videos/emotion3.mp4

$ python3 virtual_fitting.py -item 0 -i ~/Videos/head_pose.mp4

$ python3 virtual_fitting.py -item 0 -i cam

◦ NCS2(MYRIAD)

$ python3 virtual_fitting.py -item 0 -m_dt ~/model/intel/FP16/face-detection-retail-0005.xml -m_lm ~/model/intel/FP16/landmarks-regression-retail-0009.xml -d MYRIAD -i ~/Images/photo.jpg

$ python3 virtual_fitting.py -item 0 -m_dt ~/model/intel/FP16/face-detection-retail-0005.xml -m_lm ~/model/intel/FP16/landmarks-regression-retail-0009.xml -d MYRIAD -i ~/Videos/emotion2.mp4

$ python3 virtual_fitting.py -item 0 -m_dt ~/model/intel/FP16/face-detection-retail-0005.xml -m_lm ~/model/intel/FP16/landmarks-regression-retail-0009.xml -d MYRIAD -i ~/Videos/emotion3.mp4

$ python3 virtual_fitting.py -item 0 -m_dt ~/model/intel/FP16/face-detection-retail-0005.xml -m_lm ~/model/intel/FP16/landmarks-regression-retail-0009.xml -d MYRIAD -i ~/Videos/head_pose.mp4

$ python3 virtual_fitting.py -item 0 -m_dt ~/model/intel/FP16/face-detection-retail-0005.xml -m_lm ~/model/intel/FP16/landmarks-regression-retail-0009.xml -d MYRIAD -i cam

ソースコード

▼「virtual_fitting.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Virtual Fitting Application
##
## model: face-detection-retail-0005
##        landmarks-regression-retail-0009
##
##               2021.04.18 Masahiro Izutsu
##------------------------------------------
##   2021.06.23 fps display

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
WINDOW_WIDTH = 640
TEXT_COLOR = (255, 255, 255)   # white text
ITEM_PATH = '../../Images/parts/'    # ~/ で指定すると home ディレクトリの展開ができない
ITEM_LIST = ['glass01.png','glass02.png','glass03.png','glass04.png','glass05.png','cap01.png','cap02.png','cap03.png','cap04.png','cap05.png']

            # EPL_x, EPL_y, EPR_x, EPR_y
ITEM_PARAM = [
                [163, 262, 367, 262],[172, 244, 362, 244],[174, 262, 362, 262],[167, 266, 365, 266],[163, 254, 379, 254],
                [225, 455, 339, 455],[237, 455, 356, 455],[243, 404, 365, 404],[236, 450, 346, 450],[226, 427, 329, 427]
             ]

from os.path import expanduser
MODEL_DEF_FACE = expanduser('~/model/intel/FP32/face-detection-retail-0005.xml')
MODEL_DEF_MARK = expanduser('~/model/intel/FP32/landmarks-regression-retail-0009.xml')

# モジュール読み込み 
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import cv2
import numpy as np
import argparse
import math
from pngoverlay import PNGOverlay
import mylib

# タイトル・バージョン情報
title = ' Virtual Fitting'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)

# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-item', '--itemindex', type=int, default = 0,
            help = 'Item Index number (0-9)')
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = 'cam',
            help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument('-m_dt', '--m_detector', type=str,
            default = MODEL_DEF_FACE,
            help = 'Detector Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_FACE)
    parser.add_argument('-m_lm', '--m_landmarks', type=str,
            default = MODEL_DEF_MARK,
            help = 'Landmarks Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_MARK)
    parser.add_argument('-d', '--device', default='CPU', type=str,
            help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
                   'acceptable. The demo will look for a suitable plugin for the device specified. '
                   'Default value is CPU')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'y',
            help = 'Program title flag.(y/n) Default value is \'y\'')
    parser.add_argument('-s', '--speed', metavar = 'SPEED',
            default = 'y',
            help = 'Speed display flag.(y/n) Default calue is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
            default = 'non',
            help = 'Processed image file path. Default value is \'non\'')
    return parser

# モデル基本情報の表示
def display_info(item, image, detector, landmarks, device, titleflg, speedflg, outpath):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Item File    : ' + NOCOLOR, item)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'm_detect     : ' + NOCOLOR, detector)
    print('   - ' + YELLOW + 'm_recognition: ' + NOCOLOR, landmarks)
    print('   - ' + YELLOW + 'Device       : ' + NOCOLOR, device)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Speed flag   : ' + NOCOLOR, speedflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    item_index = ARGS.itemindex
    input_stream = ARGS.image
    titleflg = ARGS.title
    speedflg = ARGS.speed
    if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    model_detector=ARGS.m_detector
    model_landmarks=ARGS.m_landmarks
    device = ARGS.device
    outpath = ARGS.out

    #--------------------------------------------
    # PNGOverlayインスタンス生成
    item_path = ITEM_PATH + ITEM_LIST[item_index]

    item = PNGOverlay(item_path)
    
    # EyePoint情報
    EPL_x = ITEM_PARAM[item_index][0]
    EPL_y = ITEM_PARAM[item_index][1]
    EPR_x = ITEM_PARAM[item_index][2]
    EPR_y = ITEM_PARAM[item_index][3]

    # EyePoint距離
    EP_distance = math.sqrt((EPR_x - EPL_x) ** 2 + (EPR_y - EPL_y) ** 2)

    # EyePointの角度
    EP_angle = math.atan2(EPR_y - EPL_y, EPR_x - EPL_x)

    # アイテム座標（EyePoint_left基準）
    item_x_EPL = item.width/2 - EPL_x
    item_y_EPL = item.height/2 - EPL_y
    #--------------------------------------------

    # モデルの読み込み （顔検出）
    ie = IECore()
    net_face = ie.read_network(model = model_detector, weights = model_detector[:-4] + '.bin')
    exec_net_face = ie.load_network(network = net_face, device_name = device)

    # 入出力設定（顔検出）
    input_blob_face = next(iter(net_face.inputs))
    out_blob_face  = next(iter(net_face.outputs))

    # モデルの読み込み（landmarks）
    net_landmarks = ie.read_network(model = model_landmarks, weights = model_landmarks[:-4] + '.bin')
    exec_net_landmarks = ie.load_network(network = net_landmarks, device_name=device)

    # 入出力設定（landmarks）
    input_blob_landmarks = next(iter(net_landmarks.inputs))
    out_blob_landmarks = next(iter(net_landmarks.outputs))

    # 情報表示
    display_info(item_path, input_stream, model_detector, model_landmarks, device, titleflg, speedflg, outpath)

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # 処理結果の記録 step1
    if (outpath != 'non'):
        if (isstream):
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
            outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))

    # 計測値初期化
    fpsWithTick = mylib.fpsWithTick()
    frame_count = 0
    fps_total = 0
    fpsWithTick.get()                       # fps計測開始

    # メインループ 
    while (loopflg):
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 入力データフォーマットへ変換 
        img = cv2.resize(frame, (300, 300)) # HeightとWidth変更
        img = img.transpose((2, 0, 1))      # HWC > CHW
        img = np.expand_dims(img, axis=0)   # CHW > BCHW

        # 推論実行
        out = exec_net_face.infer(inputs={input_blob_face: img})

        # 出力から必要なデータのみ取り出し
        out = out[out_blob_face]

        # 不要な次元を削減
        out = np.squeeze(out)

        # 検出されたすべての顔領域に対して１つずつ処理
        for detection in out:
            # conf値の取得
            confidence = float(detection[2])

            # バウンディングボックス座標を入力画像のスケールに変換
            xmin = int(detection[3] * frame.shape[1])
            ymin = int(detection[4] * frame.shape[0])
            xmax = int(detection[5] * frame.shape[1])
            ymax = int(detection[6] * frame.shape[0])

            # conf値が0.5より大きい場合のみLandmarks推論とバウンディングボックス表示
            if confidence > 0.5:
                # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > frame.shape[1]:
                    xmax = frame.shape[1]
                if ymax > frame.shape[0]:
                    ymax = frame.shape[0]

                #--------------------------------------------------
                #  ディープラーニングLandmarks推定
                #--------------------------------------------------
                # 顔領域のみ切り出し
                img_face = frame[ ymin:ymax, xmin:xmax ]

                # 入力データフォーマットへ変換
                img = cv2.resize(img_face, (48, 48)) # HeightとWidth変更
                img = img.transpose((2, 0, 1))       # HWC > CHW
                img = np.expand_dims(img, axis=0)    # CHW > BCHW

                # 推論実行
                out = exec_net_landmarks.infer(inputs={input_blob_landmarks: img})

                # 出力から必要なデータのみ取り出し
                out = out[out_blob_landmarks]

                # 不要な次元を削減
                out = np.squeeze(out)

                # 目の座標を顔画像のスケールに変換し、オフセット考慮
                eye_left_x = int(out[0] * img_face.shape[1]) + xmin
                eye_left_y = int(out[1] * img_face.shape[0]) + ymin
                eye_right_x = int(out[2] * img_face.shape[1]) + xmin
                eye_right_y = int(out[3] * img_face.shape[0]) + ymin

                #--------------------------------------------------
                # アイテムのスケール・座標・角度対応
                #--------------------------------------------------
                # 目の距離 
                eye_distance = math.sqrt((eye_right_x - eye_left_x) ** 2 + (eye_right_y - eye_left_y) ** 2)

                # アイテムのスケール 
                item_scale =  eye_distance / EP_distance

                # 目の角度 
                eye_angle = math.atan2(eye_right_y - eye_left_y, eye_right_x - eye_left_x)

                # アイテムの回転角度 
                item_angle = eye_angle - EP_angle

                # アイテム座標（左目基準） 
                item_x_eyeleft = item_x_EPL * item_scale
                item_y_eyeleft = item_y_EPL * item_scale

                # アイテム座標（左目基準）をitem_angle回転させた座標 
                x2 = item_x_eyeleft * math.cos(item_angle) - item_y_eyeleft * math.sin(item_angle)
                y2 = item_x_eyeleft * math.sin(item_angle) + item_y_eyeleft * math.cos(item_angle)

                # アイテム座標 
                item_x = x2 + eye_left_x
                item_y = y2 + eye_left_y

                # アイテム描画 
                item.resize(item_scale)  # スケール 
                item.rotate(-math.degrees(item_angle)) # 角度 
                item.show(frame, int(item_x), int(item_y)) # 座標 

        # FPSを計算する
        fps = fpsWithTick.get()
        st_fps = 'fps: {:>6.2f}'.format(fps)
        if (speedflg == 'y'):
            cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
            cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + "  (hit 'q' or 'esc' key to exit)"
        cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
        cv2.imshow(window_name, frame)

        # 処理結果の記録 step2
        if (outpath != 'non'):
            if (isstream):
                outvideo.write(frame)
            else:
                cv2.imwrite(outpath, frame)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if key == 27 or key == 113 or (prop_val < 0.0):     # 'esc' or 'q'
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()

        # 処理結果の記録 step3
        if (outpath != 'non'):
            if (isstream):
                outvideo.release()

    cv2.destroyAllWindows()

    print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

更新履歴 †

2021/04/20 初版
2021/06/23 fps計測表示対応版
2021/09/30 ソース修正

参考資料 †

プログラムの作成と理論引用

サンプル画像の入手
- 「illust image」メガネ
- 「illust image」帽子

Last-modified: 2021-09-30 (木) 20:30:35