NCAppVol1

コマンドオプション	デフォールト設定	意味
-h, --help		ヘルプ表示
-i, --image	cam	カメラ(cam)または入力画像ファイル
-l, --language	jp	言語 (en/jp)
-t, --title	y	タイトル表示 (y/n)

▼「emotion.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Emotion Recognition
##
## model: face-detection-adas-0001
##        emotions-recognition-retail-0003
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
## emotion.py

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
DEVICE = "MYRIAD"
MODULE_FACE = '../FP16/face-detection-adas-0001'
MODULE_AGE  = '../FP16/emotions-recognition-retail-0003'
WINDOW_WIDTH = 640
TEXT_COLOR = (255, 255, 255)   # white text

# モジュール読み込み 
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction

# タイトル・バージョン情報
title = 'Emotion Recognition'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser(description = 'Image classifier using \
                         Intel® Neural Compute Stick 2.' )
    parser.add_argument( '-i', '--image', metavar = 'IMAGE_FILE', 
                        type=str, default = 'cam',
                        help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument( '-l', '--language', metavar = 'LANGUAGE', default = 'jp',
                        help = 'Language.')
    parser.add_argument( '-t', '--title', metavar = 'TITLE', default = 'y',
                        help = 'Language.')
    return parser

# モデル基本情報の表示
def display_info(image, lang, input_blob, out_blob, input_blob_emo, out_blob_emo, titleflg):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Plugin:       ' + NOCOLOR + 'Myriad')
    print('   - ' + YELLOW + 'Image File:   ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Language:     ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Input Shape1: ' + NOCOLOR, input_blob)
    print('   - ' + YELLOW + 'Output Shape1:' + NOCOLOR, out_blob)
    print('   - ' + YELLOW + 'Input Shape2: ' + NOCOLOR, input_blob_emo)
    print('   - ' + YELLOW + 'Output Shape2:' + NOCOLOR, out_blob_emo)
    print('   - ' + YELLOW + 'Program Title:' + NOCOLOR, titleflg)

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.image
    lang = ARGS.language
    titleflg = ARGS.title
    if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    
    # 感情ラベル
    if (lang == 'jp'):
        list_emotion = ['平静', '嬉しい', '悲しい', '驚き', '怒り']
    else:
        list_emotion = ['neutral', 'happy', 'sad', 'surprise', 'anger']

    # 感情色ラベル
    color_emotion =     [(255, 255,   0), (  0, 255,   0), (  0, 255, 255), (255,   0, 255), (  0,   0, 255)]
    bkcolor_emotion =   [(120, 120,  70), ( 70, 120,  70), ( 70, 120, 120), (120,  70, 120), ( 70,  70, 120)]
    textcolor_emotion = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255)]

    # モデルの読み込み （顔検出）face-detection-adas-0001
    ie = IECore()
    net = ie.read_network(model = MODULE_FACE + '.xml', weights = MODULE_FACE + '.bin')
    exec_net = ie.load_network(network = net, device_name = DEVICE)

    # 入出力設定（顔検出）
    input_blob = net.input_info['data'].name
    out_blob = next(iter(net.outputs))
    n, c, h, w = net.input_info[input_blob].input_data.shape

    # モデルの読み込み（感情検出）emotions-recognition-retail-0003
    net_emo = ie.read_network(model = MODULE_AGE + '.xml', weights = MODULE_AGE + '.bin')
    exec_net_emo = ie.load_network(network = net_emo, device_name=DEVICE)

    # 入出力設定（感情）
    input_blob_emo = net.input_info['data'].name
    out_blob_emo = next(iter(net_emo.outputs))
    n_emo, c_emo, h_emo, w_emo = net.input_info[input_blob_emo].input_data.shape

    # 情報表示
    display_info(input_stream, lang, input_blob, out_blob, input_blob_emo, out_blob_emo, titleflg)

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # メインループ 
    while (loopflg):
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 入力データフォーマットへ変換 
        img = cv2.resize(frame, (w, h))   # サイズ変更 
        img = img.transpose((2, 0, 1))    # HWC > CHW 
        img = np.expand_dims(img, axis=0) # 次元合せ 

        # 推論実行 
        out = exec_net.infer(inputs={'data': img})

        # 出力から必要なデータのみ取り出し 
        out = out['detection_out']
        out = np.squeeze(out) #サイズ1の次元を全て削除 

        # 検出されたすべての顔領域に対して１つずつ処理 
        for detection in out:
            # conf値の取得 
            confidence = float(detection[2])

            # バウンディングボックス座標を入力画像のスケールに変換 
            xmin = int(detection[3] * frame.shape[1])
            ymin = int(detection[4] * frame.shape[0])
            xmax = int(detection[5] * frame.shape[1])
            ymax = int(detection[6] * frame.shape[0])

            # conf値が0.5より大きい場合のみバウンディングボックス表示 
            if confidence > 0.5:
                # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる 
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > frame.shape[1]:
                    xmax = frame.shape[1]
                if ymax > frame.shape[0]:
                    ymax = frame.shape[0]

                # 顔領域のみ切り出し 
                frame_face = frame[ ymin:ymax, xmin:xmax ]

                # 入力データフォーマットへ変換 
                img = cv2.resize(frame_face, (64, 64))    # サイズ変更 
                img = img.transpose((2, 0, 1))            # HWC > CHW 
                img = np.expand_dims(img, axis=0)         # 次元合せ 

                # 推論実行 
                out = exec_net_emo.infer(inputs={'data': img})

                # 出力から必要なデータのみ取り出し 
                out = out['prob_emotion']
                out = np.squeeze(out)                     # 不要な次元の削減 

                # 出力値が最大のインデックスを得る 
                emoid = np.argmax(out)
                emotion = list_emotion[emoid]

                # バウンディングボックス(顔領域)表示 
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymin), bkcolor_emotion[emoid], -1)
#                cv2.putText(frame, emotion, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA)
                myfunction.cv2_putText(img = frame,
                               text = emotion,
                               org = (xmin+2, ymin-4),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = textcolor_emotion[emoid],
                               mode = 0)
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymax), color_emotion[emoid], thickness = 1)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + '  (hit key to exit)'
        cv2.imshow(window_name, frame)
        cv2.moveWindow(window_name, 10, 40)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if ((key != -1) or (prop_val < 0.0)):
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()
    cv2.destroyAllWindows()

    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

「emotion2.py」 †

▲　モデル対応版

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help	-	ヘルプ表示
-i, --image	cam	カメラ(cam)または動画・静止画像ファイル
-m_dt, --m_detector	必須指定	IR フォーマットの顔検出モデル
-m_re, --m_recognition	必須指定	IR フォーマット顔識別モデル
-d, --device	必須指定	デバイス指定 (CPU/MYRIAD)
-l, --language	jp	言語 (en/jp)
-t, --title	y	タイトル表示 (y/n)
-s, --speed	y	スピード計測表示 (y/n)
-o, --out	non	処理結果を出力する場合のファイルパス

$ python3 emotion2.py -h

--- Emotion Recognition 2 ---
4.5.1-openvino
OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2

usage: emotion2.py [-h] [-i IMAGE_FILE] [-m_dt M_DETECTOR]
                   [-m_re M_RECOGNITION] [-d DEVICE] [-l LANGUAGE] [-t TITLE]
                   [-s SPEED] [-o IMAGE_OUT]

optional arguments:
  -h, --help            show this help message and exit
  -i IMAGE_FILE, --image IMAGE_FILE
                        Absolute path to image file or cam for camera stream.
  -m_dt M_DETECTOR, --m_detector M_DETECTOR
                        Detector Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/face-detection-
                        adas-0001.xml
  -m_re M_RECOGNITION, --m_recognition M_RECOGNITION
                        Emotion Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/emotions-recognition-
                        retail-0003.xml
  -d DEVICE, --device DEVICE
                        Optional. Specify a target device to infer on. CPU,
                        GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
                        look for a suitable plugin for the device specified.
                        Default value is CPU
  -l LANGUAGE, --language LANGUAGE
                        Language.(jp/en) Default value is 'jp'
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'y'
  -s SPEED, --speed SPEED
                        Speed display flag.(y/n) Default calue is 'y'
  -o IMAGE_OUT, --out IMAGE_OUT
                        Processed image file path. Default value is 'non'

実行結果

$ python3 emotion2.py -i ~/Images/emo2.jpg

--- Emotion Recognition 2 ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Emotion Recognition 2: Starting application...
   - Image File   :  /home/mizutu/Images/emo2.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-adas-0001.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/emotions-recognition-retail-0003.xml
   - Device       :  CPU
   - Language     :  jp
   - Input Shape1 :  data
   - Output Shape1:  detection_out
   - Input Shape2 :  data
   - Output Shape2:  prob_emotion
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:      19.00

 Finished.

その他の実行例

$ python3 emotion2.py -i ~/Images/emo1.jpg

--- Emotion Recognition 2 ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Emotion Recognition 2: Starting application...
   - Image File   :  /home/mizutu/Images/emo1.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-adas-0001.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/emotions-recognition-retail-0003.xml
   - Device       :  CPU
   - Language     :  jp
   - Input Shape1 :  data
   - Output Shape1:  detection_out
   - Input Shape2 :  data
   - Output Shape2:  prob_emotion
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:      22.40

 Finished.

その他のコマンド入力例

$ cd ~/workspace/apps

◦ CPU

$ python3 emotion2.py -i ~/Images/emo2.jpg

$ python3 emotion2.py -i ~/Images/emo1.jpg

$ python3 emotion2.py -i ~/Images/emo3.jpg

$ python3 emotion2.py -i ~/Videos/video-test.mp4

$ python3 emotion2.py -i cam

◦ NCS2(MYRIAD)

$ python3 emotion2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/emotions-recognition-retail-0003.xml -i ~/Images/emo2.jpg -d MYRIAD

$ python3 emotion2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/emotions-recognition-retail-0003.xml -i ~/Images/emo1.jpg -d MYRIAD

$ python3 emotion2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/emotions-recognition-retail-0003.xml -i ~/Images/emo3.jpg -d MYRIAD

$ python3 emotion2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/emotions-recognition-retail-0003.xml -i ~/Videos/video-test.mp4 -d MYRIAD

$ python3 emotion2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/emotions-recognition-retail-0003.xml -i cam -d MYRIAD

▼「emotion2.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Emotion Recognition
##
## model: face-detection-adas-0001
##        emotions-recognition-retail-0003
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
##   2021.03.25 model/device parameter
##   2021.06.23 fps display

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
WINDOW_WIDTH = 640
TEXT_COLOR = (255, 255, 255)   # white text

from os.path import expanduser
MODEL_DEF_FACE = expanduser('~/model/intel/FP32/face-detection-adas-0001.xml')
MODEL_DEF_EMO  = expanduser('~/model/intel/FP32/emotions-recognition-retail-0003.xml')

# モジュール読み込み 
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction
import mylib

# タイトル・バージョン情報
title = 'Emotion Recognition 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type = str, default = 'cam',
            help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument('-m_dt', '--m_detector', type=str,
            default = MODEL_DEF_FACE,
            help = 'Detector Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_FACE)
    parser.add_argument('-m_re', '--m_recognition', type=str,
            default = MODEL_DEF_EMO,
            help = 'Emotion Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_EMO)
    parser.add_argument('-d', '--device', default='CPU', type=str,
            help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
            'acceptable. The demo will look for a suitable plugin for the device specified. '
            'Default value is CPU')
    parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
            default = 'jp',
            help = 'Language.(jp/en) Default value is \'jp\'')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'y',
            help = 'Program title flag.(y/n) Default value is \'y\'')
    parser.add_argument('-s', '--speed', metavar = 'SPEED',
            default = 'y',
            help = 'Speed display flag.(y/n) Default calue is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
            default = 'non',
            help = 'Processed image file path. Default value is \'non\'')
    return parser

# モデル基本情報の表示
def display_info(image, detector, recognition, device, lang, input_blob, out_blob, input_blob_emo, out_blob_emo, titleflg, speedflg, outpath):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'm_detect     : ' + NOCOLOR, detector)
    print('   - ' + YELLOW + 'm_recognition: ' + NOCOLOR, recognition)
    print('   - ' + YELLOW + 'Device       : ' + NOCOLOR, device)
    print('   - ' + YELLOW + 'Language     : ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Input Shape1 : ' + NOCOLOR, input_blob)
    print('   - ' + YELLOW + 'Output Shape1: ' + NOCOLOR, out_blob)
    print('   - ' + YELLOW + 'Input Shape2 : ' + NOCOLOR, input_blob_emo)
    print('   - ' + YELLOW + 'Output Shape2: ' + NOCOLOR, out_blob_emo)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Speed flag   : ' + NOCOLOR, speedflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.image
    lang = ARGS.language
    titleflg = ARGS.title
    speedflg = ARGS.speed
    if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    model_detector=ARGS.m_detector
    model_recognition=ARGS.m_recognition
    device = ARGS.device
    outpath = ARGS.out
    
    # 感情ラベル
    if (lang == 'jp'):
        list_emotion = ['平静', '嬉しい', '悲しい', '驚き', '怒り']
    else:
        list_emotion = ['neutral', 'happy', 'sad', 'surprise', 'anger']

    # 感情色ラベル
    color_emotion =     [(255, 255,   0), (  0, 255,   0), (  0, 255, 255), (255,   0, 255), (  0,   0, 255)]
    bkcolor_emotion =   [(120, 120,  70), ( 70, 120,  70), ( 70, 120, 120), (120,  70, 120), ( 70,  70, 120)]
    textcolor_emotion = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255)]

    # モデルの読み込み （顔検出）face-detection-adas-0001
    ie = IECore()
    net = ie.read_network(model = model_detector, weights = model_detector[:-4] + '.bin')
    exec_net = ie.load_network(network = net, device_name = device)

    # 入出力設定（顔検出）
    input_blob = net.input_info['data'].name
    out_blob = next(iter(net.outputs))
    n, c, h, w = net.input_info[input_blob].input_data.shape

    # モデルの読み込み（感情検出）emotions-recognition-retail-0003
    net_emo = ie.read_network(model = model_recognition, weights = model_recognition[:-4] + '.bin')
    exec_net_emo = ie.load_network(network = net_emo, device_name=device)

    # 入出力設定（感情）
    input_blob_emo = net.input_info['data'].name
    out_blob_emo = next(iter(net_emo.outputs))
    n_emo, c_emo, h_emo, w_emo = net.input_info[input_blob_emo].input_data.shape

    # 情報表示
    display_info(input_stream, model_detector, model_recognition, device, lang, input_blob, out_blob, input_blob_emo, out_blob_emo, titleflg, speedflg, outpath)

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # 処理結果の記録 step1
    if (outpath != 'non'):
        if (isstream):
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
            outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))

    # 計測値初期化
    fpsWithTick = mylib.fpsWithTick()
    frame_count = 0
    fps_total = 0
    fpsWithTick.get()                       # fps計測開始

    # メインループ 
    while (loopflg):
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 入力データフォーマットへ変換 
        img = cv2.resize(frame, (w, h))     # サイズ変更 
        img = img.transpose((2, 0, 1))      # HWC > CHW 
        img = np.expand_dims(img, axis=0)   # 次元合せ 

        # 推論実行 
        out = exec_net.infer(inputs={'data': img})

        # 出力から必要なデータのみ取り出し 
        out = out['detection_out']
        out = np.squeeze(out) #サイズ1の次元を全て削除 

        # 検出されたすべての顔領域に対して１つずつ処理 
        for detection in out:
            # conf値の取得 
            confidence = float(detection[2])

            # バウンディングボックス座標を入力画像のスケールに変換 
            xmin = int(detection[3] * frame.shape[1])
            ymin = int(detection[4] * frame.shape[0])
            xmax = int(detection[5] * frame.shape[1])
            ymax = int(detection[6] * frame.shape[0])

            # conf値が0.5より大きい場合のみバウンディングボックス表示 
            if confidence > 0.5:
                # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる 
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > frame.shape[1]:
                    xmax = frame.shape[1]
                if ymax > frame.shape[0]:
                    ymax = frame.shape[0]

                # 顔領域のみ切り出し 
                frame_face = frame[ ymin:ymax, xmin:xmax ]

                # 入力データフォーマットへ変換 
                img = cv2.resize(frame_face, (64, 64))    # サイズ変更 
                img = img.transpose((2, 0, 1))            # HWC > CHW 
                img = np.expand_dims(img, axis=0)         # 次元合せ 

                # 推論実行 
                out = exec_net_emo.infer(inputs={'data': img})

                # 出力から必要なデータのみ取り出し 
                out = out['prob_emotion']
                out = np.squeeze(out)                     # 不要な次元の削減 

                # 出力値が最大のインデックスを得る 
                emoid = np.argmax(out)
                emotion = list_emotion[emoid]

                # バウンディングボックス(顔領域)表示 
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymin), bkcolor_emotion[emoid], -1)
#                cv2.putText(frame, emotion, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA)
                myfunction.cv2_putText(img = frame,
                               text = emotion,
                               org = (xmin+2, ymin-4),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = textcolor_emotion[emoid],
                               mode = 0)
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymax), color_emotion[emoid], thickness = 1)

        # FPSを計算する
        fps = fpsWithTick.get()
        st_fps = 'fps: {:>6.2f}'.format(fps)
        if (speedflg == 'y'):
            cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
            cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + "  (hit 'q' or 'esc' key to exit)"
        cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
        cv2.imshow(window_name, frame)

        # 処理結果の記録 step2
        if (outpath != 'non'):
            if (isstream):
                outvideo.write(frame)
            else:
                cv2.imwrite(outpath, frame)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if key == 27 or key == 113 or (prop_val < 0.0):     # 'esc' or 'q'
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()

        # 処理結果の記録 step3
        if (outpath != 'non'):
            if (isstream):
                outvideo.release()

    cv2.destroyAllWindows()

    print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

年齢/性別分析アプリケーション †

　画像から顔を部分を特定しディープラーニングで年齢/性別を推論する。

↑

動作環境 †

プログラムの場所：~/workspace/apps/

プログラムの実行：python3 age_gender.py

使用する学習済みモデル
- face-detection-adas-0001（顔検出）
- age-gender-recognition-retail-0013（年齢/性別分析）

↑

「age_gender.py」 †

▼　入力簡易版

コマンドオプション	デフォールト設定	意味
-h, --help		ヘルプ表示
-i, --image	cam	カメラ(cam)または入力画像ファイル
-l, --language	jp	言語 (en/jp)
-t, --title	y	タイトル表示 (y/n)

▼「age_gender.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Age/Gender Recognition
##
## model: face-detection-adas-0001
##        age-gender-recognition-retail-0013
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
## age_gender.py

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
DEVICE = "MYRIAD"
MODULE_FACE = '../FP16/face-detection-adas-0001'
MODULE_AGE  = '../FP16/age-gender-recognition-retail-0013'
WINDOW_WIDTH = 640
BOX_COLOR_M = (  0,255,   0)
BOX_COLOR_F = (  0,  0, 255)
LABEL_BG_COLOR_M = ( 70, 120,  70) # greyish green background for text
LABEL_BG_COLOR_F = ( 70,  70, 120) # greyish red background for text
TEXT_COLOR = (255, 255, 255)   # white text

# モジュール読み込み 
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction

# タイトル・バージョン情報
title = 'Age/Gender Recognition'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser(description = 'Image classifier using \
                         Intel® Neural Compute Stick 2.' )
    parser.add_argument( '-i', '--image', metavar = 'IMAGE_FILE', 
                        type=str, default = 'cam',
                        help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument( '-l', '--language', metavar = 'LANGUAGE', default = 'jp',
                        help = 'Language.')
    parser.add_argument( '-t', '--title', metavar = 'TITLE', default = 'y',
                        help = 'Language.')
    return parser

# モデル基本情報の表示
def display_info(image, lang, input_blob, out_blob, input_blob_age, out_blob_age, titleflg):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Plugin:       ' + NOCOLOR + 'Myriad')
    print('   - ' + YELLOW + 'Image File:   ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Language:     ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Input Shape1: ' + NOCOLOR, input_blob)
    print('   - ' + YELLOW + 'Output Shape1:' + NOCOLOR, out_blob)
    print('   - ' + YELLOW + 'Input Shape2: ' + NOCOLOR, input_blob_age)
    print('   - ' + YELLOW + 'Output Shape2:' + NOCOLOR, out_blob_age)
    print('   - ' + YELLOW + 'Program Title:' + NOCOLOR, titleflg)

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.image
    lang = ARGS.language
    titleflg = ARGS.title
    if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    
    # 性別ラベル
    if (lang == 'jp'):
        label = ('女性: ', '男性: ')
    else:
        label = ('Female: ', 'Male: ')

    # モデルの読み込み （顔検出）face-detection-adas-0001
    ie = IECore()
    net = ie.read_network(model = MODULE_FACE + '.xml', weights = MODULE_FACE + '.bin')
    exec_net = ie.load_network(network = net, device_name = DEVICE)

    # 入出力設定（顔検出）
    input_blob = net.input_info['data'].name
    out_blob = next(iter(net.outputs))
    n, c, h, w = net.input_info[input_blob].input_data.shape

    # モデルの読み込み（年齢/性別）age-gender-recognition-retail-0013
    net_age = ie.read_network(model = MODULE_AGE + '.xml', weights = MODULE_AGE + '.bin')
    exec_net_age = ie.load_network(network = net_age, device_name=DEVICE)

    # 入出力設定（年齢/性別）
    input_blob_age = net.input_info['data'].name
    out_blob_age = next(iter(net_age.outputs))
    n_age, c_age, h_age, w_age = net.input_info[input_blob_age].input_data.shape

    # 情報表示
    display_info(input_stream, lang, input_blob, out_blob, input_blob_age, out_blob_age, titleflg)

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # メインループ 
    while (loopflg):
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 入力データフォーマットへ変換 
        img = cv2.resize(frame, (w, h))   # サイズ変更 
        img = img.transpose((2, 0, 1))    # HWC > CHW 
        img = np.expand_dims(img, axis=0) # 次元合せ 

        # 推論実行 
        out = exec_net.infer(inputs={'data': img})

        # 出力から必要なデータのみ取り出し 
        out = out['detection_out']
        out = np.squeeze(out) # サイズ1の次元を全て削除 

        # 検出されたすべての顔領域に対して１つずつ処理 
        for detection in out:
            # conf値の取得 
            confidence = float(detection[2])

            # バウンディングボックス座標を入力画像のスケールに変換 
            xmin = int(detection[3] * frame.shape[1])
            ymin = int(detection[4] * frame.shape[0])
            xmax = int(detection[5] * frame.shape[1])
            ymax = int(detection[6] * frame.shape[0])

            # conf値が0.5より大きい場合のみバウンディングボックス表示 
            if confidence > 0.5:
                # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる 
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > frame.shape[1]:
                    xmax = frame.shape[1]
                if ymax > frame.shape[0]:
                    ymax = frame.shape[0]

                # 顔領域のみ切り出し 
                frame_face = frame[ ymin:ymax, xmin:xmax ]

                # 入力データフォーマットへ変換 
                img = cv2.resize(frame_face, (62, 62))    # サイズ変更 
                img = img.transpose((2, 0, 1))            # HWC > CHW 
                img = np.expand_dims(img, axis=0)         # 次元合せ 

                # 推論実行 
                out = exec_net_age.infer(inputs={'data': img})

                # 出力から必要なデータのみ取り出し 
                age = out['age_conv3']
                prob = out['prob']

                age = age[0][0][0][0] * 100
                gender = label[np.argmax(prob[0])]
                if gender == label[0]:
                    box_color = BOX_COLOR_F
                    label_bgcolor = LABEL_BG_COLOR_F
                else:
                    box_color = BOX_COLOR_M
                    label_bgcolor = LABEL_BG_COLOR_M
                out_str = gender+':'+'{:>5.1f}'.format(age)
                label_text_color = TEXT_COLOR

                # バウンディングボックス(顔領域)表示 
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymin), label_bgcolor, -1)
#                cv2.putText(frame, out_str, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA)
                myfunction.cv2_putText(img = frame,
                               text = out_str,
                               org = (xmin+2, ymin-4),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = label_text_color,
                               mode = 0)
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymax), box_color, thickness = 1)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + '  (hit key to exit)'
        cv2.imshow(window_name, frame)
        cv2.moveWindow(window_name, 10, 40)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if ((key != -1) or (prop_val < 0.0)):
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()
    cv2.destroyAllWindows()

    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

「age_gender2.py」 †

▲　モデル対応版

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help	-	ヘルプ表示
-i, --image	cam	カメラ(cam)または動画・静止画像ファイル
-m_dt, --m_detector	必須指定	IR フォーマットの顔検出モデル
-m_re, --m_recognition	必須指定	IR フォーマット年齢/性別分析モデル
-d, --device	必須指定	デバイス指定 (CPU/MYRIAD)
-l, --language	jp	言語 (en/jp)
-t, --title	y	タイトル表示 (y/n)
-s, --speed	y	スピード計測表示 (y/n)
-o, --out	non	処理結果を出力する場合のファイルパス

$ python3 age_gender2.py -h

--- Age/Gender Recognition 2 ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

usage: age_gender2.py [-h] [-i IMAGE_FILE] [-m_dt M_DETECTOR]
                      [-m_re M_RECOGNITION] [-d DEVICE] [-l LANGUAGE]
                      [-t TITLE] [-s SPEED] [-o IMAGE_OUT]

optional arguments:
  -h, --help            show this help message and exit
  -i IMAGE_FILE, --image IMAGE_FILE
                        Absolute path to image file or cam for camera stream.
  -m_dt M_DETECTOR, --m_detector M_DETECTOR
                        Detector Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/face-detection-
                        adas-0001.xml
  -m_re M_RECOGNITION, --m_recognition M_RECOGNITION
                        Recognition Path to an .xml file with a trained
                        model.Default value is
                        /home/mizutu/model/intel/FP32/age-gender-recognition-
                        retail-0013.xml
  -d DEVICE, --device DEVICE
                        Optional. Specify a target device to infer on. CPU,
                        GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
                        look for a suitable plugin for the device specified.
                        Default value is CPU
  -l LANGUAGE, --language LANGUAGE
                        Language.(jp/en) Default value is 'jp'
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'y'
  -s SPEED, --speed SPEED
                        Speed display flag.(y/n) Default calue is 'y'
  -o IMAGE_OUT, --out IMAGE_OUT
                        Processed image file path. Default value is 'non'

実行結果

$ python3 age_gender2.py -i ~/Images/emo2.jpg
 
--- Age/Gender Recognition 2 ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Age/Gender Recognition 2: Starting application...
   - Image File   :  /home/mizutu/Images/emo2.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-adas-0001.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/age-gender-recognition-retail-0013.xml
   - Device       :  CPU
   - Language     :  jp
   - Input Shape1 :  data
   - Output Shape1:  detection_out
   - Input Shape2 :  data
   - Output Shape2:  age_conv3
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:       3.30

 Finished.

その他の実行例

$ python3 age_gender2.py -i ~/Images/photo3.jpg

--- Age/Gender Recognition 2 ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Age/Gender Recognition 2: Starting application...
   - Image File   :  /home/mizutu/Images/photo3.jpg
   - m_detect     :  /home/mizutu/model/intel/FP32/face-detection-adas-0001.xml
   - m_recognition:  /home/mizutu/model/intel/FP32/age-gender-recognition-retail-0013.xml
   - Device       :  CPU
   - Language     :  jp
   - Input Shape1 :  data
   - Output Shape1:  detection_out
   - Input Shape2 :  data
   - Output Shape2:  age_conv3
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non

FPS average:      14.80

 Finished.

その他のコマンド入力例

$ cd ~/workspace/apps

◦ CPU

$ python3 age_gender2.py -i ~/Images/emo2.jpg

$ python3 age_gender2.py -i ~/Images/main001.jpg

$ python3 age_gender2.py -i ~/Images/photo3.jpg

$ python3 age_gender2.py -i ~/Videos/video-test.mp4

$ python3 age_gender2.py -i cam

◦ NCS2(MYRIAD)

$ python3 age_gender2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/age-gender-recognition-retail-0013.xml -i ~/Images/emo2.jpg -d MYRIAD

$ python3 age_gender2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/age-gender-recognition-retail-0013.xml -i ~/Images/main001.jpg -d MYRIAD

$ python3 age_gender2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/age-gender-recognition-retail-0013.xml -i ~/Images/photo3.jpg -d MYRIAD

$ python3 age_gender2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/age-gender-recognition-retail-0013.xml -i ~/Videos/video-test.mp4 -d MYRIAD

$ python3 age_gender2.py -m_dt ~/model/intel/FP16/face-detection-adas-0001.xml -m_re ~/model/intel/FP16/age-gender-recognition-retail-0013.xml -i cam -d MYRIAD

▼「age_gender2.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Age/Gender Recognition
##
## model: face-detection-adas-0001
##        age-gender-recognition-retail-0013
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
##   2021.03.25 model/device parameter
##   2021.06.23 fps display

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
WINDOW_WIDTH = 640
BOX_COLOR_M = (  0,255,   0)
BOX_COLOR_F = (  0,  0, 255)
LABEL_BG_COLOR_M = ( 70, 120,  70) # greyish green background for text
LABEL_BG_COLOR_F = ( 70,  70, 120) # greyish red background for text
TEXT_COLOR = (255, 255, 255)   # white text

from os.path import expanduser
MODEL_DEF_FACE = expanduser('~/model/intel/FP32/face-detection-adas-0001.xml')
MODEL_DEF_AGE  = expanduser('~/model/intel/FP32/age-gender-recognition-retail-0013.xml')

# モジュール読み込み 
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import cv2
import numpy as np
import argparse
import myfunction
import mylib

# タイトル・バージョン情報
title = 'Age/Gender Recognition 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str, default = 'cam',
            help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument('-m_dt', '--m_detector', type=str,
            default = MODEL_DEF_FACE,
            help = 'Detector Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_FACE)
    parser.add_argument('-m_re', '--m_recognition', type=str,
            default = MODEL_DEF_AGE,
            help = 'Recognition Path to an .xml file with a trained model.'
            'Default value is '+MODEL_DEF_AGE)
    parser.add_argument('-d', '--device', default = 'CPU', type=str,
            help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
                   'acceptable. The demo will look for a suitable plugin for the device specified. '
                   'Default value is CPU')
    parser.add_argument('-l', '--language', metavar = 'LANGUAGE',
            default = 'jp',
            help = 'Language.(jp/en) Default value is \'jp\'')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'y',
            help = 'Program title flag.(y/n) Default value is \'y\'')
    parser.add_argument('-s', '--speed', metavar = 'SPEED',
            default = 'y',
            help = 'Speed display flag.(y/n) Default calue is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
            default = 'non',
            help = 'Processed image file path. Default value is \'non\'')
    return parser

# モデル基本情報の表示
def display_info(image, detector, recognition, device, lang, input_blob, out_blob, input_blob_age, out_blob_age, titleflg, speedflg, outpath):
    print(YELLOW + title + ': Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'm_detect     : ' + NOCOLOR, detector)
    print('   - ' + YELLOW + 'm_recognition: ' + NOCOLOR, recognition)
    print('   - ' + YELLOW + 'Device       : ' + NOCOLOR, device)
    print('   - ' + YELLOW + 'Language     : ' + NOCOLOR, lang)
    print('   - ' + YELLOW + 'Input Shape1 : ' + NOCOLOR, input_blob)
    print('   - ' + YELLOW + 'Output Shape1: ' + NOCOLOR, out_blob)
    print('   - ' + YELLOW + 'Input Shape2 : ' + NOCOLOR, input_blob_age)
    print('   - ' + YELLOW + 'Output Shape2: ' + NOCOLOR, out_blob_age)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Speed flag   : ' + NOCOLOR, speedflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.image
    lang = ARGS.language
    titleflg = ARGS.title
    speedflg = ARGS.speed
    if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    model_detector=ARGS.m_detector
    model_recognition=ARGS.m_recognition
    device = ARGS.device
    outpath = ARGS.out
    
    # 性別ラベル
    if (lang == 'jp'):
        label = ('女性: ', '男性: ')
    else:
        label = ('Female: ', 'Male: ')

    # モデルの読み込み （顔検出）face-detection-adas-0001
    ie = IECore()
    net = ie.read_network(model = model_detector, weights = model_detector[:-4] + '.bin')
    exec_net = ie.load_network(network = net, device_name = device)

    # 入出力設定（顔検出）
    input_blob = net.input_info['data'].name
    out_blob = next(iter(net.outputs))
    n, c, h, w = net.input_info[input_blob].input_data.shape

    # モデルの読み込み（年齢/性別）age-gender-recognition-retail-0013
    net_age = ie.read_network(model = model_recognition, weights = model_recognition[:-4] + '.bin')
    exec_net_age = ie.load_network(network = net_age, device_name=device)

    # 入出力設定（年齢/性別）
    input_blob_age = net.input_info['data'].name
    out_blob_age = next(iter(net_age.outputs))
    n_age, c_age, h_age, w_age = net.input_info[input_blob_age].input_data.shape

    # 情報表示
    display_info(input_stream, model_detector, model_recognition, device, lang, input_blob, out_blob, input_blob_age, out_blob_age, titleflg, speedflg, outpath)

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # 処理結果の記録 step1
    if (outpath != 'non'):
        if (isstream):
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
            outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))

    # 計測値初期化
    fpsWithTick = mylib.fpsWithTick()
    frame_count = 0
    fps_total = 0
    fpsWithTick.get()                       # fps計測開始

    # メインループ 
    while (loopflg):
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # 入力データフォーマットへ変換 
        img = cv2.resize(frame, (w, h))   # サイズ変更 
        img = img.transpose((2, 0, 1))    # HWC > CHW 
        img = np.expand_dims(img, axis=0) # 次元合せ 

        # 推論実行 
        out = exec_net.infer(inputs={'data': img})

        # 出力から必要なデータのみ取り出し 
        out = out['detection_out']
        out = np.squeeze(out) # サイズ1の次元を全て削除 

        # 検出されたすべての顔領域に対して１つずつ処理 
        for detection in out:
            # conf値の取得 
            confidence = float(detection[2])

            # バウンディングボックス座標を入力画像のスケールに変換 
            xmin = int(detection[3] * frame.shape[1])
            ymin = int(detection[4] * frame.shape[0])
            xmax = int(detection[5] * frame.shape[1])
            ymax = int(detection[6] * frame.shape[0])

            # conf値が0.5より大きい場合のみバウンディングボックス表示 
            if confidence > 0.5:
                # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる 
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > frame.shape[1]:
                    xmax = frame.shape[1]
                if ymax > frame.shape[0]:
                    ymax = frame.shape[0]

                # 顔領域のみ切り出し 
                frame_face = frame[ ymin:ymax, xmin:xmax ]

                # 入力データフォーマットへ変換 
                img = cv2.resize(frame_face, (62, 62))    # サイズ変更 
                img = img.transpose((2, 0, 1))            # HWC > CHW 
                img = np.expand_dims(img, axis=0)         # 次元合せ 

                # 推論実行 
                out = exec_net_age.infer(inputs={'data': img})

                # 出力から必要なデータのみ取り出し 
                age = out['age_conv3']
                prob = out['prob']

                age = age[0][0][0][0] * 100
                gender = label[np.argmax(prob[0])]
                if gender == label[0]:
                    box_color = BOX_COLOR_F
                    label_bgcolor = LABEL_BG_COLOR_F
                else:
                    box_color = BOX_COLOR_M
                    label_bgcolor = LABEL_BG_COLOR_M
                out_str = gender+':'+'{:>5.1f}'.format(age)
                label_text_color = TEXT_COLOR

                # バウンディングボックス(顔領域)表示 
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymin), label_bgcolor, -1)
#                cv2.putText(frame, out_str, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA)
                myfunction.cv2_putText(img = frame,
                               text = out_str,
                               org = (xmin+2, ymin-4),
                               fontFace = fontPIL,
                               fontScale = 12,
                               color = label_text_color,
                               mode = 0)
                cv2.rectangle(frame, (xmin, ymin-20), (xmax, ymax), box_color, thickness = 1)

        # FPSを計算する
        fps = fpsWithTick.get()
        st_fps = 'fps: {:>6.2f}'.format(fps)
        if (speedflg == 'y'):
            cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
            cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + "  (hit 'q' or 'esc' key to exit)"
        cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
        cv2.imshow(window_name, frame)

        # 処理結果の記録 step2
        if (outpath != 'non'):
            if (isstream):
                outvideo.write(frame)
            else:
                cv2.imwrite(outpath, frame)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if key == 27 or key == 113 or (prop_val < 0.0):     # 'esc' or 'q'
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()

        # 処理結果の記録 step3
        if (outpath != 'non'):
            if (isstream):
                outvideo.release()

    cv2.destroyAllWindows()

    print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

物体検出アプリケーション †

　画像からディープラーニングで 80種類のオブジェクトを検出する。

↑

動作環境 †

プログラムの場所：~/workspace/apps/
プログラムの実行：python3 object_detect_yolo3.py, object_detect_yolo3_2.py
使用する学習済みモデル：yolo-v3-tiny-tf

↑

「object_detect_yolo3.py」 †

▼　入力簡易版

コマンドオプション	デフォールト設定	意味
-h, --help		ヘルプ表示
--ir	../public/yolo-v3-tiny-tf/FP16/yolo-v3-tiny-tf.xml	IRフォーマットの学習済みファイル
-l, --labels	coco.names_jp	オブジェクトのラベルファイル
-i, --image	cam	カメラ(cam)または入力画像ファイル
--threshold	DETECTION_THRESHOLD,0.60	オブジェクトの閾値
--iou'	0.25	オブジェクトの重なりレベル
-t, --title	y	タイトル表示 (y/n)

▼「object_detect_yolo3.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Object detection
##
## model: yolo-v3-tiny-tf
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
## object_detect_yolo3.py

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
DEVICE = "MYRIAD"
WINDOW_WIDTH = 640

# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import numpy as np
import cv2
import argparse
import myfunction

# タイトル・バージョン情報
title = 'TinyYOLO V3 Object detection'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)

# Adjust these thresholds
DETECTION_THRESHOLD = 0.60
IOU_THRESHOLD = 0.25

# Tiny yolo anchor box values
anchors = [10,14, 23,27, 37,58, 81,82, 135,169, 344,319]

# Used for display
BOX_COLOR = (0,255,0)
LABEL_BG_COLOR = (70, 120, 70) # greyish green background for text
TEXT_COLOR = (255, 255, 255)   # white text
TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX
WINDOW_SIZE_W = 640
WINDOW_SIZE_H = 480


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser(description = 'Image classifier using \
                         Intel® Neural Compute Stick 2.' )
    parser.add_argument( '--ir', metavar = 'IR_File',
                        type=str, default = '../public/yolo-v3-tiny-tf/FP16/yolo-v3-tiny-tf.xml', 
                        help = 'Absolute path to the neural network IR xml file.')
    parser.add_argument( '-l', '--labels', metavar = 'LABEL_FILE', 
                        type=str, default = 'coco.names_jp',
                        help='Absolute path to labels file.')
    parser.add_argument( '-i', '--input', metavar = 'IMAGE_FILE or cam', 
                        type=str, default = 'cam',
                        help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument( '--threshold', metavar = 'FLOAT', 
                        type=float, default = DETECTION_THRESHOLD,
                        help = 'Threshold for detection.')
    parser.add_argument( '--iou', metavar = 'FLOAT', 
                        type=float, default = IOU_THRESHOLD,
                        help = 'Intersection Over Union.')
    parser.add_argument( '-t', '--title', metavar = 'TITLE', default = 'y',
                        help = 'Language.')
    return parser


# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications
# that should be considered the same object.  This is determined by how similar the boxes are
# based on the intersection-over-union metric.
# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width)
def get_duplicate_box_mask(box_list, iou_threshold):
    # The intersection-over-union threshold to use when determining duplicates.
    # objects/boxes found that are over this threshold will be
    # considered the same object
    max_iou = iou_threshold

    box_mask = np.ones(len(box_list))

    for i in range(len(box_list)):
        if box_mask[i] == 0: continue
        for j in range(i + 1, len(box_list)):
            if get_intersection_over_union(box_list[i], box_list[j]) >= max_iou:
                if box_list[i][4] < box_list[j][4]:
                    box_list[i], box_list[j] = box_list[j], box_list[i]
                box_mask[j] = 0.0

    filter_iou_mask = np.array(box_mask > 0.0, dtype='bool')
    return filter_iou_mask

# Evaluate the intersection-over-union for two boxes
# The intersection-over-union metric determines how close
# two boxes are to being the same box.  The closer the boxes
# are to being the same, the closer the metric will be to 1.0
# box_1 and box_2 are arrays of 4 numbers which are the (x, y)
# points that define the center of the box and the length and width of
# the box.
# Returns the intersection-over-union (between 0.0 and 1.0)
# for the two boxes specified.
def get_intersection_over_union(box_1, box_2):

    # one diminsion of the intersecting box
    intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\
                         max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2])

    # the other dimension of the intersecting box
    intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\
                         max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3])

    if intersection_dim_1 < 0 or intersection_dim_2 < 0 :
        # no intersection area
        intersection_area = 0
    else :
        # intersection area is product of intersection dimensions
        intersection_area =  intersection_dim_1*intersection_dim_2

    # calculate the union area which is the area of each box added
    # and then we need to subtract out the intersection area since
    # it is counted twice (by definition it is in each box)
    union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area;

    # now we can return the intersection over union
    iou = intersection_area / union_area
    #print("iou: ", iou)
    return iou

# モデル基本情報の表示
def display_info(input_shape, net_outputs, image, ir, labels, threshold, iou_threshold, titleflg):
    
    output_nodes = []
    output_iter = iter(net_outputs)
    for i in range(len(net_outputs)):
        output_nodes.append(next(output_iter))

    print(YELLOW + 'Tiny Yolo v3: Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'Plugin:       ' + NOCOLOR + 'Myriad')
    print('   - ' + YELLOW + 'IR File:     ' + NOCOLOR, ir)
    print('   - ' + YELLOW + 'Input Shape: ' + NOCOLOR, input_shape)
    print('   - ' + YELLOW + 'Output Shapes:' + NOCOLOR)
    for j in range(len(output_nodes)):
        print('      - '+YELLOW+'output #' + str(j) + ' name: ' + NOCOLOR + output_nodes[j])
        print('         - output shape: ' + NOCOLOR + str(net_outputs[output_nodes[j]].shape))
    print('   - ' + YELLOW + 'Labels File: ' + NOCOLOR, labels)
    print('   - ' + YELLOW + 'Image File:   ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Threshold:   ' + NOCOLOR, threshold)
    print('   - ' + YELLOW + 'Intersection Over Union:   ' + NOCOLOR, iou_threshold)
    print('   - ' + YELLOW + 'Program Title:' + NOCOLOR, titleflg)

# This function parses the output results from tiny yolo v3.
# The results are transposed so the output shape is (1, 13, 13, 255) or (1, 26, 26, 255). Original will be (1, 255, w, h).
# Tiny yolo does detection on two different scales using 13x13 grid and 26x26 grid.
# This is how the output is parsed:
# Imagine the image being split up into 13x13 or 26x26 grid. Each grid cell contains 3 anchor boxes. 
# For each of those 3 anchor boxes, there are 85 values. 
# 80 class probabilities + 4 coordinate values + 1 box confidence score = 85 values 
# So that results in each grid cell having 255 values (85 values x 3 anchor boxes = 255 values)
def parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, num_labels):
    # transpose the output node results
    output_node_results = output_node_results.transpose(0,2,3,1)
    output_h = output_node_results.shape[1]
    output_w = output_node_results.shape[2]

    # 80 class scores + 4 coordinate values + 1 objectness score = 85 values
    # 85 values * 3 prior box scores per grid cell= 255 values 
    # 255 values * either 26 or 13 grid cells
    num_of_classes = num_labels
    num_anchor_boxes_per_cell = 3
    
    # Set the anchor offset depending on the output result shape
    anchor_offset = 0
    if output_w == 13:
        anchor_offset = 2 * 3
    elif output_w == 26:
        anchor_offset = 2 * 0

    # used to calculate approximate coordinates of bounding box
    x_ratio = float(source_image_width) / scaled_w
    y_ratio = float(source_image_height) / scaled_h

    # Filter out low scoring results
    output_size = output_w * output_h
    for result_counter in range(output_size): 
        row = int(result_counter / output_w)
        col = int(result_counter % output_h)
        for anchor_boxes in range(num_anchor_boxes_per_cell): 
            # check the box confidence score of the anchor box. This is how likely the box contains an object
            box_confidence_score = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 4]
            if box_confidence_score < detection_threshold:
                continue
            # Calculate the x, y, width, and height of the box
            x_center = (col + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 0]) / output_w * scaled_w
            y_center = (row + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 1]) / output_h * scaled_h
            width = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 2]) * anchors[anchor_offset + 2 * anchor_boxes]
            height = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 3]) * anchors[anchor_offset + 2 * anchor_boxes + 1]
            # Now we check for anchor box for the highest class probabilities.
            # If the probability exceeds the threshold, we save the box coordinates, class score and class id
            for class_id in range(num_of_classes): 
                class_probability = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 5 + class_id]
                # Calculate the class's confidence score by multiplying the box_confidence score by the class probabiity
                class_confidence_score = class_probability * box_confidence_score
                if (class_confidence_score) < detection_threshold:
                    continue
                # Calculate the bounding box top left and bottom right vertexes
                xmin = max(int((x_center - width / 2) * x_ratio), 0)
                ymin = max(int((y_center - height / 2) * y_ratio), 0)
                xmax = min(int(xmin + width * x_ratio), source_image_width-1)
                ymax = min(int(ymin + height * y_ratio), source_image_height-1)
                filtered_objects.append((xmin, ymin, xmax, ymax, class_confidence_score, class_id))

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.input
    labels = ARGS.labels
    titleflg = ARGS.title
    if ARGS.input.lower() == "cam" or ARGS.input.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    ir = ARGS.ir
    detection_threshold = ARGS.threshold
    iou_threshold = ARGS.iou
    
    # Prepare Categories
    with open(labels) as labels_file:
        label_list = labels_file.read().splitlines()
        
    print(YELLOW + 'Running OpenVINO NCS Tensorflow TinyYolo v3 example...' + NOCOLOR)
    print('\n Displaying image with objects detected in GUI...')
    print(' Click in the GUI window and hit any key to exit.')

    ####################### 1. Create ie core and network #######################
    # Select the myriad plugin and IRs to be used
    ie = IECore()
    net = ie.read_network(model = ir, weights = ir[:-3] + 'bin')

    # Set up the input blobs
    input_blob = next(iter(net.inputs))
    input_shape = net.inputs[input_blob].shape

    # Display model information
    display_info(input_shape, net.outputs, input_stream, ir, labels, detection_threshold, iou_threshold, titleflg)
    
    # Load the network and get the network input shape information
    exec_net = ie.load_network(network = net, device_name = DEVICE)
    n, c, network_input_h, network_input_w = input_shape

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        cap.set(cv2.CAP_PROP_FPS, 30)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, WINDOW_SIZE_W)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, WINDOW_SIZE_H)

        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # Width and height calculations. These will be used to scale the bounding boxes
    source_image_width = frame.shape[1]
    source_image_height = frame.shape[0]
    scaled_w = int(source_image_width * min(network_input_w/source_image_width, network_input_w/source_image_height))
    scaled_h = int(source_image_height * min(network_input_h/source_image_width, network_input_h/source_image_height))

    # メインループ 
    while (loopflg):
        # Make a copy of the original frame. Get the frame's width and height.
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        ####################### 2. Preprocessing #######################
        # Image preprocessing
#        frame = cv2.flip(frame, 1)

        display_image = frame

        # Image preprocessing (resize, transpose, reshape)
        input_image = cv2.resize(frame, (network_input_w, network_input_h), cv2.INTER_LINEAR)
        input_image = input_image.astype(np.float32)
        input_image = np.transpose(input_image, (2,0,1))
        reshaped_image = input_image.reshape((n, c, network_input_h, network_input_w))
        ####################### 3. Perform Inference #######################
        # Perform the inference asynchronously
        req_handle = exec_net.start_async(request_id=0, inputs={input_blob: reshaped_image})
        status = req_handle.wait()
        ####################### 4. Get results #######################
        all_output_results = req_handle.outputs
        
        ####################### 5. Post processing for results #######################
        # Post-processing for tiny yolo v3 
        # The post process consists of the following steps:
        # 1. Parse the output and filter out low scores
        # 2. Filter out duplicates using intersection over union
        # 3. Draw boxes and text

        ## 1. Tiny yolo v3 has two outputs and we check/parse both outputs
        filtered_objects = []
        for output_node_results in all_output_results.values():
            parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, len(label_list))

        ## 2. Filter out duplicate objects from all detected objects
        filtered_mask = get_duplicate_box_mask(filtered_objects, iou_threshold)
        ## 3. Draw rectangles and set up display texts
        for object_index in range(len(filtered_objects)):
            if filtered_mask[object_index] == True:
                # get all values from the filtered object list
                xmin = filtered_objects[object_index][0]
                ymin = filtered_objects[object_index][1]
                xmax = filtered_objects[object_index][2]
                ymax = filtered_objects[object_index][3]
                confidence = filtered_objects[object_index][4]
                class_id = filtered_objects[object_index][5]
                # Set up the text for display
                cv2.rectangle(display_image,(xmin, ymin), (xmax, ymin+20), LABEL_BG_COLOR, -1)
#                cv2.putText(display_image, label_list[class_id] + ': %.2f' % confidence, (xmin+5, ymin+15), TEXT_FONT, 0.5, TEXT_COLOR, 1)
                myfunction.cv2_putText(img = display_image,
                               text = label_list[class_id] + ': %.2f' % confidence,
                               org = (xmin+5, ymin+18), fontFace = fontPIL,
                               fontScale = 14,
                               color = TEXT_COLOR,
                               mode = 0)
                # Set up the bounding box
                cv2.rectangle(display_image, (xmin, ymin), (xmax, ymax), BOX_COLOR, 1)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(display_image, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + '  (hit key to exit)'
        cv2.imshow(window_name, display_image)
        cv2.moveWindow(window_name, 10, 40)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if ((key != -1) or (prop_val < 0.0)):
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()
    cv2.destroyAllWindows()
    del net
    del exec_net

    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

「object_detect_yolo3_2.py」 †

▲　モデル対応版

実行時に利用できるコマンドオプション

コマンドオプション	デフォールト設定	意味
-h, --help	-	ヘルプ表示
--ir	必須項目	IRフォーマットの学習済みファイル
-l, --labels	coco.names_jp	ラベルファイル(coco.names_jp/coco.names)
-i, --image	cam	カメラ(cam)または動画・静止画像ファイル
-d, --device	必須指定	デバイス指定 (CPU/MYRIAD)
--threshold	CENTER;0.60	オブジェクトの閾値
--iou'	0.25	オブジェクトの重なりレベル
-t, --title	y	タイトル表示 (y/n)
-s, --speed	y	スピード計測表示 (y/n)
-o, --out	non	処理結果を出力する場合のファイルパス

$ python3 object_detect_yolo3_2.py -h

--- TinyYOLO V3 Object detection ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

usage: object_detect_yolo3_2.py [-h] [--ir IR_File] [-lb LABEL_FILE]
                                [-i IMAGE_FILE or cam] [-d DEVICE]
                                [--threshold FLOAT] [--iou FLOAT] [-t TITLE]
                                [-s SPEED] [-o IMAGE_OUT]

optional arguments:
  -h, --help            show this help message and exit
  --ir IR_File          Absolute path to the neural network IR xml
                        file.Default value is
                        /home/mizutu/model/public/FP32/yolo-v3-tiny-tf.xml
  -lb LABEL_FILE, --labels LABEL_FILE
                        Absolute path to labels file.Default value is
                        coco.names_jp
  -i IMAGE_FILE or cam, --input IMAGE_FILE or cam
                        Absolute path to image file or cam for camera stream.
  -d DEVICE, --device DEVICE
                        Optional. Specify a target device to infer on. CPU,
                        GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
                        look for a suitable plugin for the device specified.
                        Default value is CPU
  --threshold FLOAT     Threshold for detection.
  --iou FLOAT           Intersection Over Union.
  -t TITLE, --title TITLE
                        Program title flag.(y/n) Default value is 'y'
  -s SPEED, --speed SPEED
                        Speed display flag.(y/n) Default calue is 'y'
  -o IMAGE_OUT, --out IMAGE_OUT
                        Processed image file path. Default value is 'non'

実行結果

$ python3 object_detect_yolo3_2.py -i ~/Videos/car.mp4

--- TinyYOLO V3 Object detection ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Running OpenVINO NCS Tensorflow TinyYolo v3 example...

 Displaying image with objects detected in GUI...
 Click in the GUI window and hit any key to exit.
object_detect_yolo3_2.py:289: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
  input_blob = next(iter(net.inputs))
Tiny Yolo v3: Starting application...
   - IR File      :  /home/mizutu/model/public/FP32/yolo-v3-tiny-tf.xml
   - Input Shape  :  [1, 3, 416, 416]
   - Output Shapes: 
      - output #0 name: conv2d_12/Conv2D/YoloRegion
         - output shape: [1, 255, 26, 26]
      - output #1 name: conv2d_9/Conv2D/YoloRegion
         - output shape: [1, 255, 13, 13]
   - Labels File  :  coco.names_jp
   - Image File   :  /home/mizutu/Videos/car.mp4
   - Threshold    :  0.6
   - Intersection Over Union:    0.25
   - Device       :  CPU
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non
object_detect_yolo3_2.py:367: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property.
  all_output_results = req_handle.outputs

FPS average:      12.10

 Finished.

その他の実行例

$ python3 object_detect_yolo3_2.py -i ~/Videos/car_person.mp4

--- TinyYOLO V3 Object detection ---
4.5.2-openvino
OpenVINO inference_engine: 2.1.2021.3.0-2787-60059f2c755-releases/2021/3

Running OpenVINO NCS Tensorflow TinyYolo v3 example...

 Displaying image with objects detected in GUI...
 Click in the GUI window and hit any key to exit.
object_detect_yolo3_2.py:289: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
  input_blob = next(iter(net.inputs))
Tiny Yolo v3: Starting application...
   - IR File      :  /home/mizutu/model/public/FP32/yolo-v3-tiny-tf.xml
   - Input Shape  :  [1, 3, 416, 416]
   - Output Shapes: 
      - output #0 name: conv2d_12/Conv2D/YoloRegion
         - output shape: [1, 255, 26, 26]
      - output #1 name: conv2d_9/Conv2D/YoloRegion
         - output shape: [1, 255, 13, 13]
   - Labels File  :  coco.names_jp
   - Image File   :  /home/mizutu/Videos/car_person.mp4
   - Threshold    :  0.6
   - Intersection Over Union:    0.25
   - Device       :  CPU
   - Program Title:  y
   - Speed flag   :  y
   - Processed out:  non
object_detect_yolo3_2.py:367: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property.
  all_output_results = req_handle.outputs

FPS average:      12.50

 Finished.

その他のコマンド入力例

 $ cd ~/workspace/apps

◦ CPU

$ python3 object_detect_yolo3_2.py -i ~/Videos/car.mp4

$ python3 object_detect_yolo3_2.py -i ~/Videos/car_person.mp4

$ python3 object_detect_yolo3_2.py -i ~/Images/desk-image.jpg

$ python3 object_detect_yolo3_2.py -i cam

◦ NCS2(MYRIAD)

$ python3 object_detect_yolo3_2.py --ir ~/model/public/FP16/yolo-v3-tiny-tf.xml -i ~/Videos/car.mp4 -d MYRIAD

$ python3 object_detect_yolo3_2.py --ir ~/model/public/FP16/yolo-v3-tiny-tf.xml -i ~/Videos/car_person.mp4 -d MYRIAD

$ python3 object_detect_yolo3_2.py --ir ~/model/public/FP16/yolo-v3-tiny-tf.xml -i ~/Images/desk-image.jpg -d MYRIAD

$ python3 object_detect_yolo3_2.py --ir ~/model/public/FP16/yolo-v3-tiny-tf.xml -i cam -d MYRIAD

▼「object_detect_yolo3_2.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Object detection
##
## model: yolo-v3-tiny-tf
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
##   2021.03.25 device parameter
##   2021.06.23 fps display

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
WINDOW_WIDTH = 640

from os.path import expanduser
MODEL_DEF_DETECT = expanduser('~/model/public/FP32/yolo-v3-tiny-tf.xml')

# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import numpy as np
import cv2
import argparse
import myfunction
import mylib

# タイトル・バージョン情報
title = 'TinyYOLO V3 Object detection'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)

# Adjust these thresholds
DETECTION_THRESHOLD = 0.60
IOU_THRESHOLD = 0.25

# Tiny yolo anchor box values
anchors = [10,14, 23,27, 37,58, 81,82, 135,169, 344,319]

# Used for display
BOX_COLOR = (0,255,0)
LABEL_BG_COLOR = (70, 120, 70) # greyish green background for text
TEXT_COLOR = (255, 255, 255)   # white text
TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX
WINDOW_SIZE_W = 640
WINDOW_SIZE_H = 480


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--ir', metavar = 'IR_File', type=str,
            default = MODEL_DEF_DETECT,
            help = 'Absolute path to the neural network IR xml file.'
            'Default value is '+MODEL_DEF_DETECT)
    parser.add_argument('-lb', '--labels', metavar = 'LABEL_FILE', type = str, default = 'coco.names_jp',
            help = 'Absolute path to labels file.'
            'Default value is coco.names_jp')
    parser.add_argument('-i', '--input', metavar = 'IMAGE_FILE or cam', type = str, default = 'cam',
            help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument('-d', '--device', default='CPU', type = str,
            help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
                   'acceptable. The demo will look for a suitable plugin for the device specified. '
                   'Default value is CPU')
    parser.add_argument('--threshold', metavar = 'FLOAT', type = float, default = DETECTION_THRESHOLD,
            help = 'Threshold for detection.')
    parser.add_argument('--iou', metavar = 'FLOAT', type = float, default = IOU_THRESHOLD,
            help = 'Intersection Over Union.')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'y',
            help = 'Program title flag.(y/n) Default value is \'y\'')
    parser.add_argument('-s', '--speed', metavar = 'SPEED',
            default = 'y',
            help = 'Speed display flag.(y/n) Default calue is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
            default = 'non',
            help = 'Processed image file path. Default value is \'non\'')
    return parser


# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications
# that should be considered the same object.  This is determined by how similar the boxes are
# based on the intersection-over-union metric.
# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width)
def get_duplicate_box_mask(box_list, iou_threshold):
    # The intersection-over-union threshold to use when determining duplicates.
    # objects/boxes found that are over this threshold will be
    # considered the same object
    max_iou = iou_threshold

    box_mask = np.ones(len(box_list))

    for i in range(len(box_list)):
        if box_mask[i] == 0: continue
        for j in range(i + 1, len(box_list)):
            if get_intersection_over_union(box_list[i], box_list[j]) >= max_iou:
                if box_list[i][4] < box_list[j][4]:
                    box_list[i], box_list[j] = box_list[j], box_list[i]
                box_mask[j] = 0.0

    filter_iou_mask = np.array(box_mask > 0.0, dtype='bool')
    return filter_iou_mask

# Evaluate the intersection-over-union for two boxes
# The intersection-over-union metric determines how close
# two boxes are to being the same box.  The closer the boxes
# are to being the same, the closer the metric will be to 1.0
# box_1 and box_2 are arrays of 4 numbers which are the (x, y)
# points that define the center of the box and the length and width of
# the box.
# Returns the intersection-over-union (between 0.0 and 1.0)
# for the two boxes specified.
def get_intersection_over_union(box_1, box_2):

    # one diminsion of the intersecting box
    intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\
                         max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2])

    # the other dimension of the intersecting box
    intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\
                         max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3])

    if intersection_dim_1 < 0 or intersection_dim_2 < 0 :
        # no intersection area
        intersection_area = 0
    else :
        # intersection area is product of intersection dimensions
        intersection_area =  intersection_dim_1*intersection_dim_2

    # calculate the union area which is the area of each box added
    # and then we need to subtract out the intersection area since
    # it is counted twice (by definition it is in each box)
    union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area;

    # now we can return the intersection over union
    iou = intersection_area / union_area
    #print("iou: ", iou)
    return iou

# モデル基本情報の表示
def display_info(input_shape, net_outputs, image, ir, labels, threshold, iou_threshold, device, titleflg, speedflg, outpath):
    
    output_nodes = []
    output_iter = iter(net_outputs)
    for i in range(len(net_outputs)):
        output_nodes.append(next(output_iter))

    print(YELLOW + 'Tiny Yolo v3: Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'IR File      : ' + NOCOLOR, ir)
    print('   - ' + YELLOW + 'Input Shape  : ' + NOCOLOR, input_shape)
    print('   - ' + YELLOW + 'Output Shapes: ' + NOCOLOR)
    for j in range(len(output_nodes)):
        print('      - '+YELLOW+'output #' + str(j) + ' name: ' + NOCOLOR + output_nodes[j])
        print('         - output shape: ' + NOCOLOR + str(net_outputs[output_nodes[j]].shape))
    print('   - ' + YELLOW + 'Labels File  : ' + NOCOLOR, labels)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Threshold    : ' + NOCOLOR, threshold)
    print('   - ' + YELLOW + 'Intersection Over Union:   ' + NOCOLOR, iou_threshold)
    print('   - ' + YELLOW + 'Device       : ' + NOCOLOR, device)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Speed flag   : ' + NOCOLOR, speedflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# This function parses the output results from tiny yolo v3.
# The results are transposed so the output shape is (1, 13, 13, 255) or (1, 26, 26, 255). Original will be (1, 255, w, h).
# Tiny yolo does detection on two different scales using 13x13 grid and 26x26 grid.
# This is how the output is parsed:
# Imagine the image being split up into 13x13 or 26x26 grid. Each grid cell contains 3 anchor boxes. 
# For each of those 3 anchor boxes, there are 85 values. 
# 80 class probabilities + 4 coordinate values + 1 box confidence score = 85 values 
# So that results in each grid cell having 255 values (85 values x 3 anchor boxes = 255 values)
def parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, num_labels):
    # transpose the output node results
    output_node_results = output_node_results.transpose(0,2,3,1)
    output_h = output_node_results.shape[1]
    output_w = output_node_results.shape[2]

    # 80 class scores + 4 coordinate values + 1 objectness score = 85 values
    # 85 values * 3 prior box scores per grid cell= 255 values 
    # 255 values * either 26 or 13 grid cells
    num_of_classes = num_labels
    num_anchor_boxes_per_cell = 3
    
    # Set the anchor offset depending on the output result shape
    anchor_offset = 0
    if output_w == 13:
        anchor_offset = 2 * 3
    elif output_w == 26:
        anchor_offset = 2 * 0

    # used to calculate approximate coordinates of bounding box
    x_ratio = float(source_image_width) / scaled_w
    y_ratio = float(source_image_height) / scaled_h

    # Filter out low scoring results
    output_size = output_w * output_h
    for result_counter in range(output_size): 
        row = int(result_counter / output_w)
        col = int(result_counter % output_h)
        for anchor_boxes in range(num_anchor_boxes_per_cell): 
            # check the box confidence score of the anchor box. This is how likely the box contains an object
            box_confidence_score = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 4]
            if box_confidence_score < detection_threshold:
                continue
            # Calculate the x, y, width, and height of the box
            x_center = (col + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 0]) / output_w * scaled_w
            y_center = (row + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 1]) / output_h * scaled_h
            width = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 2]) * anchors[anchor_offset + 2 * anchor_boxes]
            height = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 3]) * anchors[anchor_offset + 2 * anchor_boxes + 1]
            # Now we check for anchor box for the highest class probabilities.
            # If the probability exceeds the threshold, we save the box coordinates, class score and class id
            for class_id in range(num_of_classes): 
                class_probability = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 5 + class_id]
                # Calculate the class's confidence score by multiplying the box_confidence score by the class probabiity
                class_confidence_score = class_probability * box_confidence_score
                if (class_confidence_score) < detection_threshold:
                    continue
                # Calculate the bounding box top left and bottom right vertexes
                xmin = max(int((x_center - width / 2) * x_ratio), 0)
                ymin = max(int((y_center - height / 2) * y_ratio), 0)
                xmax = min(int(xmin + width * x_ratio), source_image_width-1)
                ymax = min(int(ymin + height * y_ratio), source_image_height-1)
                filtered_objects.append((xmin, ymin, xmax, ymax, class_confidence_score, class_id))

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.input
    labels = ARGS.labels
    titleflg = ARGS.title
    speedflg = ARGS.speed
    if ARGS.input.lower() == "cam" or ARGS.input.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    ir = ARGS.ir
    detection_threshold = ARGS.threshold
    iou_threshold = ARGS.iou
    device = ARGS.device
    outpath = ARGS.out
    
    # Prepare Categories
    with open(labels) as labels_file:
        label_list = labels_file.read().splitlines()
        
    print(YELLOW + 'Running OpenVINO NCS Tensorflow TinyYolo v3 example...' + NOCOLOR)
    print('\n Displaying image with objects detected in GUI...')
    print(' Click in the GUI window and hit any key to exit.')

    ####################### 1. Create ie core and network #######################
    # Select the myriad plugin and IRs to be used
    ie = IECore()
    net = ie.read_network(model = ir, weights = ir[:-3] + 'bin')

    # Set up the input blobs
    input_blob = next(iter(net.inputs))
    input_shape = net.inputs[input_blob].shape

    # Display model information
    display_info(input_shape, net.outputs, input_stream, ir, labels, detection_threshold, iou_threshold, device, titleflg, speedflg, outpath)
    
    # Load the network and get the network input shape information
    exec_net = ie.load_network(network = net, device_name = device)
    n, c, network_input_h, network_input_w = input_shape

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        cap.set(cv2.CAP_PROP_FPS, 30)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, WINDOW_SIZE_W)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, WINDOW_SIZE_H)

        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # 処理結果の記録 step1
    if (outpath != 'non'):
        if (isstream):
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
            outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))

    # Width and height calculations. These will be used to scale the bounding boxes
    source_image_width = frame.shape[1]
    source_image_height = frame.shape[0]
    scaled_w = int(source_image_width * min(network_input_w/source_image_width, network_input_w/source_image_height))
    scaled_h = int(source_image_height * min(network_input_h/source_image_width, network_input_h/source_image_height))

    # 計測値初期化
    fpsWithTick = mylib.fpsWithTick()
    frame_count = 0
    fps_total = 0
    fpsWithTick.get()                       # fps計測開始

    # メインループ 
    while (loopflg):
        # Make a copy of the original frame. Get the frame's width and height.
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        ####################### 2. Preprocessing #######################
        # Image preprocessing
#        frame = cv2.flip(frame, 1)

        display_image = frame

        # Image preprocessing (resize, transpose, reshape)
        input_image = cv2.resize(frame, (network_input_w, network_input_h), cv2.INTER_LINEAR)
        input_image = input_image.astype(np.float32)
        input_image = np.transpose(input_image, (2,0,1))
        reshaped_image = input_image.reshape((n, c, network_input_h, network_input_w))
        ####################### 3. Perform Inference #######################
        # Perform the inference asynchronously
        req_handle = exec_net.start_async(request_id=0, inputs={input_blob: reshaped_image})
        status = req_handle.wait()
        ####################### 4. Get results #######################
        all_output_results = req_handle.outputs
        
        ####################### 5. Post processing for results #######################
        # Post-processing for tiny yolo v3 
        # The post process consists of the following steps:
        # 1. Parse the output and filter out low scores
        # 2. Filter out duplicates using intersection over union
        # 3. Draw boxes and text

        ## 1. Tiny yolo v3 has two outputs and we check/parse both outputs
        filtered_objects = []
        for output_node_results in all_output_results.values():
            parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, len(label_list))

        ## 2. Filter out duplicate objects from all detected objects
        filtered_mask = get_duplicate_box_mask(filtered_objects, iou_threshold)
        ## 3. Draw rectangles and set up display texts
        for object_index in range(len(filtered_objects)):
            if filtered_mask[object_index] == True:
                # get all values from the filtered object list
                xmin = filtered_objects[object_index][0]
                ymin = filtered_objects[object_index][1]
                xmax = filtered_objects[object_index][2]
                ymax = filtered_objects[object_index][3]
                confidence = filtered_objects[object_index][4]
                class_id = filtered_objects[object_index][5]
                # Set up the text for display
                cv2.rectangle(display_image,(xmin, ymin), (xmax, ymin+20), LABEL_BG_COLOR, -1)
#                cv2.putText(display_image, label_list[class_id] + ': %.2f' % confidence, (xmin+5, ymin+15), TEXT_FONT, 0.5, TEXT_COLOR, 1)
                myfunction.cv2_putText(img = display_image,
                               text = label_list[class_id] + ': %.2f' % confidence,
                               org = (xmin+5, ymin+18), fontFace = fontPIL,
                               fontScale = 14,
                               color = TEXT_COLOR,
                               mode = 0)
                # Set up the bounding box
                cv2.rectangle(display_image, (xmin, ymin), (xmax, ymax), BOX_COLOR, 1)

        # FPSを計算する
        fps = fpsWithTick.get()
        st_fps = 'fps: {:>6.2f}'.format(fps)
        if (speedflg == 'y'):
            cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
            cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(display_image, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + "  (hit 'q' or 'esc' key to exit)"
        cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
        cv2.imshow(window_name, display_image)

        # 処理結果の記録 step2
        if (outpath != 'non'):
            if (isstream):
                outvideo.write(display_image)
            else:
                cv2.imwrite(outpath, display_image)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if key == 27 or key == 113 or (prop_val < 0.0):     # 'esc' or 'q'
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()

        # 処理結果の記録 step3
        if (outpath != 'non'):
            if (isstream):
                outvideo.release()

    cv2.destroyAllWindows()
    del net
    del exec_net

    print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

色パターン統一版 †

▼「object_detect_yolo3_3.py」

# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
##   Object detection
##
## model: yolo-v3-tiny-tf
##
##               2021.02.24 Masahiro Izutsu
##------------------------------------------
##   2021.03.25 device parameter
##   2021.06.23 fps display
##   2021.10.23 display color  object_detect_yolo3_3.py

# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'

# 定数定義
WINDOW_WIDTH = 640

from os.path import expanduser
MODEL_DEF_DETECT = expanduser('~/model/public/FP32/yolo-v3-tiny-tf.xml')

# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version

# import処理
import sys
import numpy as np
import cv2
import argparse
import myfunction
import mylib
import color_table80

TEXT_COLOR = color_table80.CR_white

# タイトル・バージョン情報
title = 'TinyYOLO V3 Object detection'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)

# Adjust these thresholds
DETECTION_THRESHOLD = 0.60
IOU_THRESHOLD = 0.25

# Tiny yolo anchor box values
anchors = [10,14, 23,27, 37,58, 81,82, 135,169, 344,319]

# Used for display
#BOX_COLOR = (0,255,0)
#LABEL_BG_COLOR = (70, 120, 70) # greyish green background for text
#TEXT_COLOR = (255, 255, 255)   # white text
TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX
WINDOW_SIZE_W = 640
WINDOW_SIZE_H = 480


# Parses arguments for the application
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--ir', metavar = 'IR_File', type=str,
            default = MODEL_DEF_DETECT,
            help = 'Absolute path to the neural network IR xml file.'
            'Default value is '+MODEL_DEF_DETECT)
    parser.add_argument('-lb', '--labels', metavar = 'LABEL_FILE', type = str, default = 'coco.names_jp',
            help = 'Absolute path to labels file.'
            'Default value is coco.names_jp')
    parser.add_argument('-i', '--input', metavar = 'IMAGE_FILE or cam', type = str, default = 'cam',
            help = 'Absolute path to image file or cam for camera stream.')
    parser.add_argument('-d', '--device', default='CPU', type = str,
            help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
                   'acceptable. The demo will look for a suitable plugin for the device specified. '
                   'Default value is CPU')
    parser.add_argument('--threshold', metavar = 'FLOAT', type = float, default = DETECTION_THRESHOLD,
            help = 'Threshold for detection.')
    parser.add_argument('--iou', metavar = 'FLOAT', type = float, default = IOU_THRESHOLD,
            help = 'Intersection Over Union.')
    parser.add_argument('-t', '--title', metavar = 'TITLE',
            default = 'y',
            help = 'Program title flag.(y/n) Default value is \'y\'')
    parser.add_argument('-s', '--speed', metavar = 'SPEED',
            default = 'y',
            help = 'Speed display flag.(y/n) Default calue is \'y\'')
    parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
            default = 'non',
            help = 'Processed image file path. Default value is \'non\'')
    return parser


# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications
# that should be considered the same object.  This is determined by how similar the boxes are
# based on the intersection-over-union metric.
# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width)
def get_duplicate_box_mask(box_list, iou_threshold):
    # The intersection-over-union threshold to use when determining duplicates.
    # objects/boxes found that are over this threshold will be
    # considered the same object
    max_iou = iou_threshold

    box_mask = np.ones(len(box_list))

    for i in range(len(box_list)):
        if box_mask[i] == 0: continue
        for j in range(i + 1, len(box_list)):
            if get_intersection_over_union(box_list[i], box_list[j]) >= max_iou:
                if box_list[i][4] < box_list[j][4]:
                    box_list[i], box_list[j] = box_list[j], box_list[i]
                box_mask[j] = 0.0

    filter_iou_mask = np.array(box_mask > 0.0, dtype='bool')
    return filter_iou_mask

# Evaluate the intersection-over-union for two boxes
# The intersection-over-union metric determines how close
# two boxes are to being the same box.  The closer the boxes
# are to being the same, the closer the metric will be to 1.0
# box_1 and box_2 are arrays of 4 numbers which are the (x, y)
# points that define the center of the box and the length and width of
# the box.
# Returns the intersection-over-union (between 0.0 and 1.0)
# for the two boxes specified.
def get_intersection_over_union(box_1, box_2):

    # one diminsion of the intersecting box
    intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\
                         max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2])

    # the other dimension of the intersecting box
    intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\
                         max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3])

    if intersection_dim_1 < 0 or intersection_dim_2 < 0 :
        # no intersection area
        intersection_area = 0
    else :
        # intersection area is product of intersection dimensions
        intersection_area =  intersection_dim_1*intersection_dim_2

    # calculate the union area which is the area of each box added
    # and then we need to subtract out the intersection area since
    # it is counted twice (by definition it is in each box)
    union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area;

    # now we can return the intersection over union
    iou = intersection_area / union_area
    #print("iou: ", iou)
    return iou

# モデル基本情報の表示
def display_info(input_shape, net_outputs, image, ir, labels, threshold, iou_threshold, device, titleflg, speedflg, outpath):
    
    output_nodes = []
    output_iter = iter(net_outputs)
    for i in range(len(net_outputs)):
        output_nodes.append(next(output_iter))

    print(YELLOW + 'Tiny Yolo v3: Starting application...' + NOCOLOR)
    print('   - ' + YELLOW + 'IR File      : ' + NOCOLOR, ir)
    print('   - ' + YELLOW + 'Input Shape  : ' + NOCOLOR, input_shape)
    print('   - ' + YELLOW + 'Output Shapes: ' + NOCOLOR)
    for j in range(len(output_nodes)):
        print('      - '+YELLOW+'output #' + str(j) + ' name: ' + NOCOLOR + output_nodes[j])
        print('         - output shape: ' + NOCOLOR + str(net_outputs[output_nodes[j]].shape))
    print('   - ' + YELLOW + 'Labels File  : ' + NOCOLOR, labels)
    print('   - ' + YELLOW + 'Image File   : ' + NOCOLOR, image)
    print('   - ' + YELLOW + 'Threshold    : ' + NOCOLOR, threshold)
    print('   - ' + YELLOW + 'Intersection Over Union:   ' + NOCOLOR, iou_threshold)
    print('   - ' + YELLOW + 'Device       : ' + NOCOLOR, device)
    print('   - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
    print('   - ' + YELLOW + 'Speed flag   : ' + NOCOLOR, speedflg)
    print('   - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)

# This function parses the output results from tiny yolo v3.
# The results are transposed so the output shape is (1, 13, 13, 255) or (1, 26, 26, 255). Original will be (1, 255, w, h).
# Tiny yolo does detection on two different scales using 13x13 grid and 26x26 grid.
# This is how the output is parsed:
# Imagine the image being split up into 13x13 or 26x26 grid. Each grid cell contains 3 anchor boxes. 
# For each of those 3 anchor boxes, there are 85 values. 
# 80 class probabilities + 4 coordinate values + 1 box confidence score = 85 values 
# So that results in each grid cell having 255 values (85 values x 3 anchor boxes = 255 values)
def parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, num_labels):
    # transpose the output node results
    output_node_results = output_node_results.transpose(0,2,3,1)
    output_h = output_node_results.shape[1]
    output_w = output_node_results.shape[2]

    # 80 class scores + 4 coordinate values + 1 objectness score = 85 values
    # 85 values * 3 prior box scores per grid cell= 255 values 
    # 255 values * either 26 or 13 grid cells
    num_of_classes = num_labels
    num_anchor_boxes_per_cell = 3
    
    # Set the anchor offset depending on the output result shape
    anchor_offset = 0
    if output_w == 13:
        anchor_offset = 2 * 3
    elif output_w == 26:
        anchor_offset = 2 * 0

    # used to calculate approximate coordinates of bounding box
    x_ratio = float(source_image_width) / scaled_w
    y_ratio = float(source_image_height) / scaled_h

    # Filter out low scoring results
    output_size = output_w * output_h
    for result_counter in range(output_size): 
        row = int(result_counter / output_w)
        col = int(result_counter % output_h)
        for anchor_boxes in range(num_anchor_boxes_per_cell): 
            # check the box confidence score of the anchor box. This is how likely the box contains an object
            box_confidence_score = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 4]
            if box_confidence_score < detection_threshold:
                continue
            # Calculate the x, y, width, and height of the box
            x_center = (col + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 0]) / output_w * scaled_w
            y_center = (row + output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 1]) / output_h * scaled_h
            width = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 2]) * anchors[anchor_offset + 2 * anchor_boxes]
            height = np.exp(output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 3]) * anchors[anchor_offset + 2 * anchor_boxes + 1]
            # Now we check for anchor box for the highest class probabilities.
            # If the probability exceeds the threshold, we save the box coordinates, class score and class id
            for class_id in range(num_of_classes): 
                class_probability = output_node_results[0][row][col][anchor_boxes * num_of_classes + 5 + 5 + class_id]
                # Calculate the class's confidence score by multiplying the box_confidence score by the class probabiity
                class_confidence_score = class_probability * box_confidence_score
                if (class_confidence_score) < detection_threshold:
                    continue
                # Calculate the bounding box top left and bottom right vertexes
                xmin = max(int((x_center - width / 2) * x_ratio), 0)
                ymin = max(int((y_center - height / 2) * y_ratio), 0)
                xmax = min(int(xmin + width * x_ratio), source_image_width-1)
                ymax = min(int(ymin + height * y_ratio), source_image_height-1)
                filtered_objects.append((xmin, ymin, xmax, ymax, class_confidence_score, class_id))

# 画像の種類を判別する
#   戻り値: 'jeg''png'... 画像ファイル
#           'None'        画像ファイル以外 (動画ファイル)
#           'NotFound'    ファイルが存在しない
import imghdr
def is_pict(filename):
    try:
        imgtype = imghdr.what(filename)
    except FileNotFoundError as e:
        imgtype = 'NotFound'
    return str(imgtype)

# ** main関数 **
def main():
    # 日本語フォント指定
    fontPIL = 'NotoSansCJK-Bold.ttc'

    # Argument parsing and parameter setting
    ARGS = parse_args().parse_args()
    input_stream = ARGS.input
    labels = ARGS.labels
    titleflg = ARGS.title
    speedflg = ARGS.speed
    if ARGS.input.lower() == "cam" or ARGS.input.lower() == "camera":
        input_stream = 0
        isstream = True
    else:
        filetype = is_pict(input_stream)
        isstream = filetype == 'None'
        if (filetype == 'NotFound'):
            print(RED + "\ninput file Not found." + NOCOLOR)
            quit()
    ir = ARGS.ir
    detection_threshold = ARGS.threshold
    iou_threshold = ARGS.iou
    device = ARGS.device
    outpath = ARGS.out
    
    # Prepare Categories
    with open(labels) as labels_file:
        label_list = labels_file.read().splitlines()
        
    print(YELLOW + 'Running OpenVINO NCS Tensorflow TinyYolo v3 example...' + NOCOLOR)
    print('\n Displaying image with objects detected in GUI...')
    print(' Click in the GUI window and hit any key to exit.')

    ####################### 1. Create ie core and network #######################
    # Select the myriad plugin and IRs to be used
    ie = IECore()
    net = ie.read_network(model = ir, weights = ir[:-3] + 'bin')

    # Set up the input blobs
    input_blob = next(iter(net.inputs))
    input_shape = net.inputs[input_blob].shape

    # Display model information
    display_info(input_shape, net.outputs, input_stream, ir, labels, detection_threshold, iou_threshold, device, titleflg, speedflg, outpath)
    
    # Load the network and get the network input shape information
    exec_net = ie.load_network(network = net, device_name = device)
    n, c, network_input_h, network_input_w = input_shape

    # 入力準備
    if (isstream):
        # カメラ 
        cap = cv2.VideoCapture(input_stream)
        cap.set(cv2.CAP_PROP_FPS, 30)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, WINDOW_SIZE_W)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, WINDOW_SIZE_H)

        ret, frame = cap.read()
        loopflg = cap.isOpened()
    else:
        # 画像ファイル読み込み
        frame = cv2.imread(input_stream)
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        # アスペクト比を固定してリサイズ
        img_h, img_w = frame.shape[:2]
        if (img_w > WINDOW_WIDTH):
            height = round(img_h * (WINDOW_WIDTH / img_w))
            frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
        loopflg = True   # 1回ループ

    # 処理結果の記録 step1
    if (outpath != 'non'):
        if (isstream):
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
            outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))

    # Width and height calculations. These will be used to scale the bounding boxes
    source_image_width = frame.shape[1]
    source_image_height = frame.shape[0]
    scaled_w = int(source_image_width * min(network_input_w/source_image_width, network_input_w/source_image_height))
    scaled_h = int(source_image_height * min(network_input_h/source_image_width, network_input_h/source_image_height))

    # 計測値初期化
    fpsWithTick = mylib.fpsWithTick()
    frame_count = 0
    fps_total = 0
    fpsWithTick.get()                       # fps計測開始

    # メインループ 
    while (loopflg):
        # Make a copy of the original frame. Get the frame's width and height.
        if frame is None:
            print(RED + "\nUnable to read the input." + NOCOLOR)
            quit()

        ####################### 2. Preprocessing #######################
        # Image preprocessing
#        frame = cv2.flip(frame, 1)

        display_image = frame

        # Image preprocessing (resize, transpose, reshape)
        input_image = cv2.resize(frame, (network_input_w, network_input_h), cv2.INTER_LINEAR)
        input_image = input_image.astype(np.float32)
        input_image = np.transpose(input_image, (2,0,1))
        reshaped_image = input_image.reshape((n, c, network_input_h, network_input_w))
        ####################### 3. Perform Inference #######################
        # Perform the inference asynchronously
        req_handle = exec_net.start_async(request_id=0, inputs={input_blob: reshaped_image})
        status = req_handle.wait()
        ####################### 4. Get results #######################
        all_output_results = req_handle.outputs
        
        ####################### 5. Post processing for results #######################
        # Post-processing for tiny yolo v3 
        # The post process consists of the following steps:
        # 1. Parse the output and filter out low scores
        # 2. Filter out duplicates using intersection over union
        # 3. Draw boxes and text

        ## 1. Tiny yolo v3 has two outputs and we check/parse both outputs
        filtered_objects = []
        for output_node_results in all_output_results.values():
            parseTinyYoloV3Output(output_node_results, filtered_objects, source_image_width, source_image_height, scaled_w, scaled_h, detection_threshold, len(label_list))

        ## 2. Filter out duplicate objects from all detected objects
        filtered_mask = get_duplicate_box_mask(filtered_objects, iou_threshold)
        ## 3. Draw rectangles and set up display texts
        for object_index in range(len(filtered_objects)):
            if filtered_mask[object_index] == True:
                # get all values from the filtered object list
                xmin = filtered_objects[object_index][0]
                ymin = filtered_objects[object_index][1]
                xmax = filtered_objects[object_index][2]
                ymax = filtered_objects[object_index][3]
                confidence = filtered_objects[object_index][4]
                class_id = filtered_objects[object_index][5]

                # オブジェクト別の色指定
                BOX_COLOR = color_table80.get_boder_bgr80(class_id)
                LABEL_BG_COLOR = color_table80.get_back_bgr80(class_id)

                # Set up the text for display
                cv2.rectangle(display_image,(xmin, ymin), (xmax, ymin+20), LABEL_BG_COLOR, -1)
#                cv2.putText(display_image, label_list[class_id] + ': %.2f' % confidence, (xmin+5, ymin+15), TEXT_FONT, 0.5, TEXT_COLOR, 1)
                myfunction.cv2_putText(img = display_image,
                               text = label_list[class_id] + ': %.2f' % confidence,
                               org = (xmin+5, ymin+18), fontFace = fontPIL,
                               fontScale = 14,
                               color = TEXT_COLOR,
                               mode = 0)
                # Set up the bounding box
                cv2.rectangle(display_image, (xmin, ymin), (xmax, ymax), BOX_COLOR, 1)

        # FPSを計算する
        fps = fpsWithTick.get()
        st_fps = 'fps: {:>6.2f}'.format(fps)
        if (speedflg == 'y'):
            cv2.rectangle(frame, (10, 38), (95, 55), (90, 90, 90), -1)
            cv2.putText(frame, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)

        # タイトル描画
        if (titleflg == 'y'):
            cv2.putText(display_image, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA)

        # 画像表示 
        window_name = title + "  (hit 'q' or 'esc' key to exit)"
        cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
        cv2.imshow(window_name, display_image)

        # 処理結果の記録 step2
        if (outpath != 'non'):
            if (isstream):
                outvideo.write(display_image)
            else:
                cv2.imwrite(outpath, display_image)

        # 何らかのキーが押されたら終了 
        breakflg = False
        while(True):
            key = cv2.waitKey(1)
            prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
            if key == 27 or key == 113 or (prop_val < 0.0):     # 'esc' or 'q'
                breakflg = True
                break
            if (isstream):
                break

        if ((breakflg == False) and isstream):
            # 次のフレームを読み出す
            ret, frame = cap.read()
            if ret == False:
                break
            loopflg = cap.isOpened()
        else:
            loopflg = False

    # 終了処理 
    if (isstream):
        cap.release()

        # 処理結果の記録 step3
        if (outpath != 'non'):
            if (isstream):
                outvideo.release()

    cv2.destroyAllWindows()
    del net
    del exec_net

    print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
    print('\n Finished.')

# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    sys.exit(main())

↑

更新履歴 †

2021/02/24 初版
2021/04/01 ソースコード畳み込み表示
2021/04/02 モデル入力対応版
2021/06/23 fps計測表示対応版
2021/09/30 ソース修正
2021/10/23 色パターン統一版

↑

参考資料 †

サイト内参考ページ

最新の20件

Neural Compute Application を作る †

製作方針 †

感情分析アプリケーション †

動作環境 †

「emotion.py」 †

「emotion2.py」 †

年齢/性別分析アプリケーション †

動作環境 †

「age_gender.py」 †

「age_gender2.py」 †

物体検出アプリケーション †

動作環境 †

「object_detect_yolo3.py」 †

「object_detect_yolo3_2.py」 †

色パターン統一版 †

更新履歴 †

参考資料 †