$ python3 dbface-infer2.py -h
--- DBFace single-stage face detector 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: dbface-infer2.py [-h] [-i IMAGE_FILE] [-m MODEL] [-d DEVICE] [-t TITLE]
[-s SPEED] [-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file. Default value is
./selfie.jpg
-m MODEL, --model MODEL
Detector Path to an .xml file with a trained
model.Default value is ./ir-models/dbface-4vga-
fp16/dbface.xml
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-t TITLE, --title TITLE
Program title flag.(y/n) Default value is 'y'
-s SPEED, --speed SPEED
Speed display flag.(y/n) Default calue is 'y'
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
▼ 修正済ソースファイル「dbface-infer2.py」
▲ 修正済ソースファイル「dbface-infer2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## DBFace single-stage face detector
##
## 2021.07.23 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/dbface-on-openvino
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
# DL models for pedestrian detection and person re-identification
MODEL_DEF = './ir-models/dbface-4vga-fp16/dbface.xml'
WINDOW_WIDTH = 640
# import処理
import os
import sys
import argparse
import math
import time
import numpy as np
from numpy.lib.stride_tricks import as_strided
import cv2
import mylib
# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version
# タイトル・バージョン情報
title = 'DBFace single-stage face detector 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str,
default = 'selfie.jpg',
help = 'Absolute path to image file. Default value is ./selfie.jpg')
parser.add_argument('-m', '--model', type=str, default = MODEL_DEF,
help = 'Detector Path to an .xml file with a trained model.'
'Default value is '+MODEL_DEF)
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'y',
help = 'Program title flag.(y/n) Default value is \'y\'')
parser.add_argument('-s', '--speed', metavar = 'SPEED',
default = 'y',
help = 'Speed display flag.(y/n) Default calue is \'y\'')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, model, device, titleflg, speedflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'model : ' + NOCOLOR, model)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Speed flag : ' + NOCOLOR, speedflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
def _exp(v):
if isinstance(v, tuple) or isinstance(v, list):
return [_exp(item) for item in v]
elif isinstance(v, np.ndarray):
return np.array([_exp(item) for item in v], v.dtype)
gate = 1
base = np.exp(1)
if abs(v) < gate:
return v * base
if v > 0:
return np.exp(v)
else:
return -np.exp(-v)
def IOU(rec1, rec2):
cx1, cy1, cx2, cy2 = rec1
gx1, gy1, gx2, gy2 = rec2
S_rec1 = (cx2 - cx1 + 1) * (cy2 - cy1 + 1)
S_rec2 = (gx2 - gx1 + 1) * (gy2 - gy1 + 1)
x1 = max(cx1, gx1)
y1 = max(cy1, gy1)
x2 = min(cx2, gx2)
y2 = min(cy2, gy2)
w = max(0, x2 - x1 + 1)
h = max(0, y2 - y1 + 1)
area = w * h
iou = area / (S_rec1 + S_rec2 - area)
return iou
def NMS(objs, iou=0.5):
if objs is None or len(objs) <= 1:
return objs
objs = sorted(objs, key=lambda obj: obj[1], reverse=True)
keep = []
flags = [0] * len(objs)
for index, obj in enumerate(objs):
if flags[index] != 0:
continue
keep.append(obj)
for j in range(index + 1, len(objs)):
if flags[j] == 0 and IOU(obj[0], objs[j][0]) > iou:
flags[j] = 1
return keep
def max_pooling(x, kernel_size, stride=1, padding=1):
x = np.pad(x, padding, mode='constant')
output_shape = ((x.shape[0] - kernel_size)//stride + 1,
(x.shape[1] - kernel_size)//stride + 1)
kernel_size = (kernel_size, kernel_size)
x_w = as_strided(x, shape=output_shape + kernel_size, strides=(stride*x.strides[0], stride*x.strides[1]) + x.strides)
x_w = x_w.reshape(-1, *kernel_size)
return x_w.max(axis=(1, 2)).reshape(output_shape)
def detect(hm, box, landmark, threshold=0.4, nms_iou=0.5):
hm_pool = max_pooling(hm[0,0,:,:], 3, 1, 1) # 1,1,240,320
interest_points = ((hm==hm_pool) * hm) # screen out low-conf pixels
flat = interest_points.ravel() # flatten
indices = np.argsort(flat)[::-1] # index sort
scores = np.array([ flat[idx] for idx in indices ])
hm_height, hm_width = hm.shape[2:]
ys = indices // hm_width
xs = indices % hm_width
box = box.reshape(box.shape[1:]) # 4,240,320
landmark = landmark.reshape(landmark.shape[1:]) # 10,240,,320
stride = 4
objs = []
for cx, cy, score in zip(xs, ys, scores):
if score < threshold:
break
x, y, r, b = box[:, cy, cx]
xyrb = (np.array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride
x5y5 = landmark[:, cy, cx]
x5y5 = (_exp(x5y5 * 4) + ([cx]*5 + [cy]*5)) * stride
box_landmark = list(zip(x5y5[:5], x5y5[5:]))
objs.append([xyrb, score, box_landmark])
return NMS(objs, iou=nms_iou)
def drawBBox(image, bbox, color=(0,255,0), thickness=2, textcolor=(0, 0, 0), landmarkcolor=(0, 0, 255)):
text = f"{bbox[1]:.2f}"
xyrb = bbox[0]
x, y, r, b = int(xyrb[0]), int(xyrb[1]), int(xyrb[2]), int(xyrb[3])
w = r - x + 1
h = b - y + 1
cv2.rectangle(image, (x, y, r-x+1, b-y+1), color, thickness, 16)
border = int(thickness / 2)
pos = (x + 3, y - 5)
cv2.rectangle(image, (x - border, y - 21, w + thickness, 21), color, -1, 16)
cv2.putText(image, text, pos, 0, 0.5, textcolor, 1, 16)
landmark = bbox[2]
if len(landmark)>0:
for i in range(len(landmark)):
x, y = landmark[i][:2]
cv2.circle(image, (int(x), int(y)), 3, landmarkcolor, -1, 16)
def main():
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
input_stream = 0
isstream = True
else:
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
model = ARGS.model
device = ARGS.device
titleflg = ARGS.title
speedflg = ARGS.speed
outpath = ARGS.out
display_info(input_stream, model, device, titleflg, speedflg, outpath)
ie = IECore()
net = ie.read_network(model, model[:-4] + '.bin')
exenet = ie.load_network(net, device)
inblobs = (list(net.inputs.keys()))
outblobs = (list(net.outputs.keys()))
print(inblobs, outblobs)
# ['x'] ['Conv_525', 'Exp_527', 'Sigmoid_526']
inshapes = [ net.inputs [i].shape for i in inblobs ]
outshapes = [ net.outputs[i].shape for i in outblobs ]
print(inshapes, outshapes)
# 4vga : [[1, 3, 960, 1280]] [[1, 10, 240, 320], [1, 4, 240, 320], [1, 1, 240, 320]]
# Assign output node idex by checking the number of channels
for i,outblob in enumerate(outblobs):
C = outshapes[i][1]
if C==1:
hm_idx=i
if C==4:
box_idx=i
if C==10:
lm_idx=i
# 入力準備
if (isstream):
# カメラ
cap = cv2.VideoCapture(input_stream)
ret, image = cap.read()
loopflg = cap.isOpened()
else:
# 画像ファイル読み込み
image = cv2.imread(input_stream)
if image is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# アスペクト比を固定してリサイズ
img_h, img_w = image.shape[:2]
if (img_w > WINDOW_WIDTH):
height = round(img_h * (WINDOW_WIDTH / img_w))
image = cv2.resize(image, dsize = (WINDOW_WIDTH, height))
loopflg = True # 1回ループ
# 処理結果の記録 step1
if (outpath != 'non'):
if (isstream):
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))
# 計測値初期化
fpsWithTick = mylib.fpsWithTick()
frame_count = 0
fps_total = 0
fpsWithTick.get() # fps計測開始
# メインループ
while (loopflg):
if image is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# 入力データフォーマットへ変換
image = cv2.resize(image, (inshapes[0][3], inshapes[0][2]))
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = img.transpose((2,0,1))
res = exenet.infer({inblobs[0]:img})
lm = res[outblobs[lm_idx ]] # 1,10,h,w
box = res[outblobs[box_idx]] # 1,4,h,w
hm = res[outblobs[hm_idx ]] # 1,1,h,w
objs = detect(hm=hm, box=box, landmark=lm, threshold=0.4, nms_iou=0.5)
for obj in objs:
drawBBox(image, obj)
# FPSを計算する
fps = fpsWithTick.get()
st_fps = 'fps: {:>6.2f}'.format(fps)
if (speedflg == 'y'):
cv2.rectangle(image, (10, 38), (95, 55), (90, 90, 90), -1)
cv2.putText(image, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
# タイトル描画
if (titleflg == 'y'):
cv2.putText(image, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(240, 240, 0), lineType=cv2.LINE_AA)
# 画像表示
window_name = title + " (hit 'q' or 'esc' key to exit)"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.imshow(window_name, image)
# 処理結果の記録 step2
if (outpath != 'non'):
if (isstream):
outvideo.write(image)
else:
cv2.imwrite(outpath, image)
# 何らかのキーが押されたら終了
breakflg = False
while(True):
key = cv2.waitKey(1)
prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
if key == 27 or key == 113 or (prop_val < 0.0): # 'esc' or 'q'
breakflg = True
break
if (isstream):
break
if ((breakflg == False) and isstream):
# 次のフレームを読み出す
ret, image = cap.read()
if ret == False:
break
loopflg = cap.isOpened()
else:
loopflg = False
# 終了処理
if (isstream):
cap.release()
# 処理結果の記録 step3
if (outpath != 'non'):
if (isstream):
outvideo.release()
cv2.destroyAllWindows()
print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == '__main__':
sys.exit(main() or 0)
$ python3 dbface-infer2.py selfie.jpg
--- DBFace single-stage face detector 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: dbface-infer2.py [-h] [-i IMAGE_FILE] [-m MODEL] [-d DEVICE] [-t TITLE]
[-s SPEED] [-o IMAGE_OUT]
dbface-infer2.py: error: unrecognized arguments: selfie.jpg
mizutu@ubuntu-vbox:~/workspace/apps3/dbface-on-openvino-master$ python3 dbface-infer2.py -i selfie.jpg
--- DBFace single-stage face detector 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
DBFace single-stage face detector 2: Starting application...
- Image File : selfie.jpg
- model : ./ir-models/dbface-4vga-fp16/dbface.xml
- Device : CPU
- Program Title: y
- Speed flag : y
- Processed out: non
dbface-infer2.py:234: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
inblobs = (list(net.inputs.keys()))
['x'] ['Conv_525', 'Exp_527', 'Sigmoid_526']
[[1, 3, 960, 1280]] [[1, 10, 240, 320], [1, 4, 240, 320], [1, 1, 240, 320]]
FPS average: 0.80
Finished.
実行例 2「12_Group_Group_12_Group_Group_12_728.jpg」
$ python3 dbface-infer2.py -i 12_Group_Group_12_Group_Group_12_728.jpg
--- DBFace single-stage face detector 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
DBFace single-stage face detector 2: Starting application...
- Image File : 12_Group_Group_12_Group_Group_12_728.jpg
- model : ./ir-models/dbface-4vga-fp16/dbface.xml
- Device : CPU
- Program Title: y
- Speed flag : y
- Processed out: non
dbface-infer2.py:234: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
inblobs = (list(net.inputs.keys()))
['x'] ['Conv_525', 'Exp_527', 'Sigmoid_526']
[[1, 3, 960, 1280]] [[1, 10, 240, 320], [1, 4, 240, 320], [1, 1, 240, 320]]
FPS average: 3.80
Finished.
$ python3 naive-objdet2.py -h
--- Native Object Detection 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: naive-objdet2.py [-h] [-i IMAGE_FILE] [-m MODEL] [-d DEVICE]
[-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file. Default value is
./image.jpg
-m MODEL, --model MODEL
Detector Path to an .xml file with a trained model.
Default value is /home/mizutu/model/intel/FP16/face-
detection-0200.xml
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
▼ 修正済ソースファイル「naive-objdet2.py」
▲ 修正済ソースファイル「naive-objdet2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Native Object Detection (without adaptive partitioning)
##
## 2021.07.23 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/object-detection-with-adaptive-partitioning
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# import処理
import os
import sys
import argparse
import cv2
import numpy as np
# モジュール読み込み
from openvino.inference_engine import IENetwork, IECore
from openvino.inference_engine import get_version
# 定数定義
MODEL_DEF = os.path.expanduser('~/model/intel/FP16/face-detection-0200.xml')
WINDOW_WIDTH = 640
# タイトル・バージョン情報
title = 'Native Object Detection 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str,
default = 'image.jpg',
help = 'Absolute path to image file. Default value is ./image.jpg')
parser.add_argument('-m', '--model', type=str, default = MODEL_DEF,
help = 'Detector Path to an .xml file with a trained model. '
'Default value is '+MODEL_DEF)
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, model, device, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'model : ' + NOCOLOR, model)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
model_list = [
'face-detection-0200', # 'face-detection-0100'
'face-detection-0202', # 'face-detection-0202'
'face-detection-0204', # 'face-detection-0204'
'face-detection-0205', # 'face-detection-0205'
'face-detection-0206', # 'face-detection-0206'
'face-detection-adas-0001',
'face-detection-retail-0004',
'face-detection-retail-0005'
][0]
def main():
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
model = ARGS.model
device = ARGS.device
outpath = ARGS.out
display_info(input_stream, model, device, outpath)
_N = 0
_C = 1
_H = 2
_W = 3
# Load DL model and setup Inference Engine of OpenVINO
ie = IECore()
net = ie.read_network(model, model[:-4] + '.bin')
input_name = next(iter(net.input_info))
input_shape = net.input_info[input_name].tensor_desc.dims
out_name = next(iter(net.outputs))
out_shape = net.outputs[out_name].shape # [ image_id, label, conf, xmin, ymin, xmax, ymax ]
exec_net = ie.load_network(net, device)
img = cv2.imread(input_stream)
objects=[]
inBlob = cv2.resize(img, (input_shape[_W], input_shape[_H]))
inBlob = inBlob.transpose((2, 0, 1))
inBlob = inBlob.reshape(input_shape)
res = exec_net.infer(inputs={input_name: inBlob})
for obj in res[out_name][0][0]: # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
conf = obj[2]
if conf > 0.6: # Confidence > 60%
ROI_shape = img.shape
xmin = abs(int(obj[3] * ROI_shape[1]))
ymin = abs(int(obj[4] * ROI_shape[0]))
xmax = abs(int(obj[5] * ROI_shape[1]))
ymax = abs(int(obj[6] * ROI_shape[0]))
class_id = int(obj[1])
objects.append([xmin, ymin, xmax, ymax, conf, class_id, True])
# Draw detection result
for obj in objects:
img = cv2.rectangle(img, (obj[0], obj[1]), (obj[2], obj[3]), (0,255,0), 2) # Found object
cv2.imshow('result', img)
# 処理結果の記録
if (outpath != 'non'):
cv2.imwrite(outpath, img)
print('Displaying detection result for 10 seconds.')
cv2.waitKey(10 * 1000)
if __name__ == '__main__':
main()
$ python3 objdet-adaptive-partitioning2.py -h
--- Object Detection with adaptive partitioning 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: objdet-adaptive-partitioning2.py [-h] [-i IMAGE_FILE] [-m MODEL]
[-d DEVICE] [-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file. Default value is
./image.jpg
-m MODEL, --model MODEL
Detector Path to an .xml file with a trained model.
Default value is /home/mizutu/model/intel/FP16/face-
detection-0200.xml
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
mizutu@ubuntu-vbox:~/workspace/apps3/object-detection-with-adaptive-partitioning
▼ 修正済ソースファイル「objdet-adaptive-partitioning2.py」
▲ 修正済ソースファイル「objdet-adaptive-partitioning2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Object Detection with adaptive partitioning
##
## 2021.07.23 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/object-detection-with-adaptive-partitioning
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# import処理
import os
import sys
import argparse
import cv2
import numpy as np
# モジュール読み込み
from openvino.inference_engine import IENetwork, IECore
from openvino.inference_engine import get_version
# 定数定義
MODEL_DEF = os.path.expanduser('~/model/intel/FP16/face-detection-0200.xml')
WINDOW_WIDTH = 640
# タイトル・バージョン情報
title = 'Object Detection with adaptive partitioning 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str,
default = 'image.jpg',
help = 'Absolute path to image file. Default value is ./image.jpg')
parser.add_argument('-m', '--model', type=str,
default = MODEL_DEF,
help = 'Detector Path to an .xml file with a trained model. '
'Default value is '+MODEL_DEF)
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, model, device, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'model : ' + NOCOLOR, model)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
model_list = [
'face-detection-0200', # 'face-detection-0100'
'face-detection-0202', # 'face-detection-0202'
'face-detection-0204', # 'face-detection-0204'
'face-detection-0205', # 'face-detection-0205'
'face-detection-0206', # 'face-detection-0206'
'face-detection-adas-0001',
'face-detection-retail-0004',
'face-detection-retail-0005'
][0]
# Divide an image into multiple regions for object detection task
# image_shape = (w,h)
# dividers_list = A list of integer numbers. The numbers represent how many columns the row will be divided into, from top row to bottom row respectively.
# overlap_rate = How much the regions overlap each other
def divideImage(image_shape, dividers_list, overlap_rate=0.1):
_W=0
_H=1
rows = len(dividers_list)
region_list = []
baseY = 0
for row, num_divide in enumerate(dividers_list):
region_width = image_shape[_W]/num_divide
overlap = region_width * overlap_rate
for i in range(num_divide):
x1 = i * region_width - overlap
y1 = baseY - overlap
x2 = (i+1) * region_width + overlap
y2 = baseY + region_width + overlap
if x1<0: x1=0
if x1>=image_shape[_W]: x1=image_shape[_W]-1
if y1<0: y1=0
if y1>=image_shape[_H]: y1=image_shape[_H]-1
if x2<0: x2=0
if x2>=image_shape[_W]: x2=image_shape[_W]-1
if y2<0: y2=0
if y2>=image_shape[_H]: y2=image_shape[_H]-1
region_list.append((int(x1),int(y1),int(x2),int(y2)))
baseY+=region_width
return region_list
# Prepare data for object detection task
# - Crop input image based on the region list produced by divideImage()
# - Create a list of task which consists of coordinate of the ROI in the input image, and the image of the ROI
def createObjectDectionTasks(img, region_list):
task_id = 0
task_list = []
for region in region_list:
ROI = img[region[1]:region[3], region[0]:region[2]]
task_list.append([region, ROI])
return task_list
# Calculate IOU for non-maximum suppression
def iou(a, b):
area_a = (a[2] - a[0]) * (a[3] - a[1])
area_b = (b[2] - b[0]) * (b[3] - b[1])
iou_x1 = np.maximum(a[0], b[0])
iou_y1 = np.maximum(a[1], b[1])
iou_x2 = np.minimum(a[2], b[2])
iou_y2 = np.minimum(a[3], b[3])
iou_w = iou_x2 - iou_x1
iou_h = iou_y2 - iou_y1
if iou_w < 0 or iou_h < 0:
return 0.0
area_iou = iou_w * iou_h
iou = area_iou / (area_a + area_b - area_iou)
return iou
def draw_regions(img, region_list):
colors= [
( 0, 0, 0 ),
( 255, 0, 0 ),
( 0, 0, 255 ),
( 255, 0, 255 ),
( 0, 255, 0 ),
( 255, 255, 0 ),
( 0, 255, 255 )
]
_W=0
_H=1
for i, region in enumerate(region_list):
img = cv2.rectangle(img, (region[0], region[1]), (region[2], region[3]), colors[i%7], 4)
return img
def main():
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
model = ARGS.model
device = ARGS.device
outpath = ARGS.out
display_info(input_stream, model, device, outpath)
_N = 0
_C = 1
_H = 2
_W = 3
# Load DL model and setup Inference Engine of OpenVINO
ie = IECore()
net = ie.read_network(model, model[:-4] + '.bin')
input_name = next(iter(net.input_info))
input_shape = net.input_info[input_name].tensor_desc.dims
out_name = next(iter(net.outputs))
out_shape = net.outputs[out_name].shape # [ image_id, label, conf, xmin, ymin, xmax, ymax ]
exec_net = ie.load_network(net, 'CPU')
img = cv2.imread(input_stream)
# Divide an image into multiple regions for object detection task
# (1920,1080) is the input image size
# [9,6,3] means, the image will be divided into 3 rows and each row will be divided into multiple colums (9, 6, and 3 columns from tom to bottom, respectively)
# [4,3] or [5,3] could be another options for 1920x1080 pictures.
region_list = divideImage((img.shape[1], img.shape[0]), [9,6,3])
print('Displaying regions boudary boxes for 3 seconds.')
img_tmp = img.copy()
img_tmp = draw_regions(img_tmp, region_list)
cv2.imshow('regions', img_tmp)
cv2.waitKey(3 * 1000)
task_list = createObjectDectionTasks(img, region_list)
objects=[]
for task in task_list:
inBlob = cv2.resize(task[1], (input_shape[_W], input_shape[_H]))
inBlob = inBlob.transpose((2, 0, 1))
inBlob = inBlob.reshape(input_shape)
res = exec_net.infer(inputs={input_name: inBlob})
for obj in res[out_name][0][0]: # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
conf = obj[2]
if conf > 0.6: # Confidence > 60%
ROI_shape = task[1].shape
xmin = abs(int(obj[3] * ROI_shape[1])) + task[0][0]
ymin = abs(int(obj[4] * ROI_shape[0])) + task[0][1]
xmax = abs(int(obj[5] * ROI_shape[1])) + task[0][0]
ymax = abs(int(obj[6] * ROI_shape[0])) + task[0][1]
class_id = int(obj[1])
objects.append([xmin, ymin, xmax, ymax, conf, class_id, True])
# Do non-maximum suppression to reject the redundant objects on the overlap region
for obj_id1, obj1 in enumerate(objects[:-2]):
for obj_id2, obj2 in enumerate(objects[obj_id1+1:]):
if obj1[6] == True and obj2[6]==True:
IOU = iou(obj1[0:3+1], obj2[0:3+1])
if IOU>0.5:
if obj1[4]<obj2[4]:
obj1[6] = False
else:
obj2[6] = False
img = draw_regions(img, region_list)
# Draw detection result
for obj in objects:
if obj[6]==True:
img = cv2.rectangle(img, (obj[0], obj[1]), (obj[2], obj[3]), (0,255,0), 2) # Found object
else:
pass
img = cv2.rectangle(img, (obj[0], obj[1]), (obj[2], obj[3]), (0,0,255), 1) # Object which is rejected by NMS
cv2.imshow('result', img)
# 処理結果の記録
if (outpath != 'non'):
cv2.imwrite(outpath, img)
print('Displaying detection result for 10 seconds.')
cv2.waitKey(10 * 1000)
if __name__ == '__main__':
main()
$ python3 object-detection-and-line-cross2.py -h
--- Object Tracking with Line Crossing 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: object-detection-and-line-cross2.py [-h] [-i IMAGE_FILE] [-d DEVICE]
[-t TITLE] [-s SPEED]
[-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-t TITLE, --title TITLE
Program title flag.(y/n) Default value is 'y'
-s SPEED, --speed SPEED
Speed display flag.(y/n) Default calue is 'y'
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
▼ 修正済ソースファイル「object-detection-and-line-cross2.py」
▲ 修正済ソースファイル「object-detection-and-line-cross2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Object Tracking with Line Crossing and Area Intrusion Detection
##
## 2021.07.22 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/object-tracking-line-crossing-area-intrusion
## pip install munkres
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
# DL models for pedestrian detection and person re-identification
MODEL_DETE_DEF = '~/model/intel/FP16/pedestrian-detection-adas-0002.xml'
MODEL_REID_DEF = '~/model/intel/FP16/person-reidentification-retail-0277.xml'
WINDOW_WIDTH = 640
# import処理
import os
import sys
import argparse
import math
import time
import numpy as np
from numpy import linalg as LA
import cv2
from scipy.spatial import distance
from munkres import Munkres # Hungarian algorithm for ID assignment
import mylib
# モジュール読み込み
from openvino.inference_engine import IENetwork, IECore
from openvino.inference_engine import get_version
# タイトル・バージョン情報
title = 'Object Tracking with Line Crossing 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str, default = 'people-detection.264',
help = 'Absolute path to image file')
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'y',
help = 'Program title flag.(y/n) Default value is \'y\'')
parser.add_argument('-s', '--speed', metavar = 'SPEED',
default = 'y',
help = 'Speed display flag.(y/n) Default calue is \'y\'')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, device, titleflg, speedflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Speed flag : ' + NOCOLOR, speedflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
# Checking boundary line crossing detection
def line(p1, p2):
A = (p1[1] - p2[1])
B = (p2[0] - p1[0])
C = (p1[0]*p2[1] - p2[0]*p1[1])
return A, B, -C
# Calcuate the coordination of intersect point of line segments - 線分同士が交差する座標を計算
def calcIntersectPoint(line1p1, line1p2, line2p1, line2p2):
L1 = line(line1p1, line1p2)
L2 = line(line2p1, line2p2)
D = L1[0] * L2[1] - L1[1] * L2[0]
Dx = L1[2] * L2[1] - L1[1] * L2[2]
Dy = L1[0] * L2[2] - L1[2] * L2[0]
x = Dx / D
y = Dy / D
return x,y
# Check if line segments intersect - 線分同士が交差するかどうかチェック
def checkIntersect(p1, p2, p3, p4):
tc1 = (p1[0] - p2[0]) * (p3[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p3[0])
tc2 = (p1[0] - p2[0]) * (p4[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p4[0])
td1 = (p3[0] - p4[0]) * (p1[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p1[0])
td2 = (p3[0] - p4[0]) * (p2[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p2[0])
return tc1*tc2<0 and td1*td2<0
# line(point1)-(point2)
# convert a line to a vector
def line_vectorize(point1, point2):
a = point2[0]-point1[0]
b = point2[1]-point1[1]
return [a,b]
# point = (x,y)
# line1(point1)-(point2), line2(point3)-(point4)
# Calculate the angle made by two line segments - 線分同士が交差する角度を計算
def calcVectorAngle( point1, point2, point3, point4 ):
u = np.array(line_vectorize(point1, point2))
v = np.array(line_vectorize(point3, point4))
i = np.inner(u, v)
n = LA.norm(u) * LA.norm(v)
c = i / n
a = np.rad2deg(np.arccos(np.clip(c, -1.0, 1.0)))
if u[0]*v[1]-u[1]*v[0]<0:
return a
else:
return 360-a
class boundaryLine:
def __init__(self, line=(0,0,0,0)):
self.p0 = (line[0], line[1])
self.p1 = (line[2], line[3])
self.color = (0,255,255)
self.lineThinkness = 4
self.textColor = (0,255,255)
self.textSize = 4
self.textThinkness = 2
self.count1 = 0
self.count2 = 0
# Draw single boundary line
def drawBoundaryLine(img, line):
x1, y1 = line.p0
x2, y2 = line.p1
cv2.line(img, (x1, y1), (x2, y2), line.color, line.lineThinkness)
cv2.putText(img, str(line.count1), (x1, y1), cv2.FONT_HERSHEY_PLAIN, line.textSize, line.textColor, line.textThinkness)
cv2.putText(img, str(line.count2), (x2, y2), cv2.FONT_HERSHEY_PLAIN, line.textSize, line.textColor, line.textThinkness)
# Draw multiple boundary lines
def drawBoundaryLines(img, boundaryLines):
for line in boundaryLines:
drawBoundaryLine(img, line)
# in: boundary_line = boundaryLine class object
# trajectory = (x1, y1, x2, y2)
def checkLineCross(boundary_line, trajectory):
traj_p0 = (trajectory[0], trajectory[1]) # Trajectory of an object
traj_p1 = (trajectory[2], trajectory[3])
bLine_p0 = (boundary_line.p0[0], boundary_line.p0[1]) # Boundary line
bLine_p1 = (boundary_line.p1[0], boundary_line.p1[1])
intersect = checkIntersect(traj_p0, traj_p1, bLine_p0, bLine_p1) # Check if intersect or not
if intersect == True:
angle = calcVectorAngle(traj_p0, traj_p1, bLine_p0, bLine_p1) # Calculate angle between trajectory and boundary line
if angle<180:
boundary_line.count1 += 1
else:
boundary_line.count2 += 1
#cx, cy = calcIntersectPoint(traj_p0, traj_p1, bLine_p0, bLine_p1) # Calculate the intersect coordination
# Multiple lines cross check
def checkLineCrosses(boundaryLines, objects):
for obj in objects:
traj = obj.trajectory
if len(traj)>1:
p0 = traj[-2]
p1 = traj[-1]
for line in boundaryLines:
checkLineCross(line, [p0[0],p0[1], p1[0],p1[1]])
#------------------------------------
# Area intrusion detection
class area:
def __init__(self, contour):
self.contour = np.array(contour, dtype=np.int32)
self.count = 0
# Area intrusion check
def checkAreaIntrusion(areas, objects):
for area in areas:
area.count = 0
for obj in objects:
p0 = (obj.pos[0]+obj.pos[2])//2
p1 = (obj.pos[1]+obj.pos[3])//2
if cv2.pointPolygonTest(area.contour, (p0, p1), False)>=0:
area.count += 1
# Draw areas (polygons)
def drawAreas(img, areas):
for area in areas:
if area.count>0:
color=(0,0,255)
else:
color=(255,0,0)
cv2.polylines(img, [area.contour], True, color,4)
cv2.putText(img, str(area.count), (area.contour[0][0], area.contour[0][1]), cv2.FONT_HERSHEY_PLAIN, 4, color, 2)
#------------------------------------
# Object tracking
class object:
def __init__(self, pos, feature, id=-1):
self.feature = feature
self.id = id
self.trajectory = []
self.time = time.monotonic()
self.pos = pos
class objectTracker:
def __init__(self):
self.objectid = 0
self.timeout = 3 # sec
self.clearDB()
self.similarityThreshold = 0.4
pass
def clearDB(self):
self.objectDB = []
def evictTimeoutObjectFromDB(self):
# discard time out objects
now = time.monotonic()
for object in self.objectDB:
if object.time + self.timeout < now:
self.objectDB.remove(object) # discard feature vector from DB
print("Discarded : id {}".format(object.id))
# objects = list of object class
def trackObjects(self, objects):
# if no object found, skip the rest of processing
if len(objects) == 0:
return
# If any object is registred in the db, assign registerd ID to the most similar object in the current image
if len(self.objectDB)>0:
# Create a matix of cosine distance
cos_sim_matrix=[ [ distance.cosine(objects[j].feature, self.objectDB[i].feature)
for j in range(len(objects))] for i in range(len(self.objectDB)) ]
# solve feature matching problem by Hungarian assignment algorithm
hangarian = Munkres()
combination = hangarian.compute(cos_sim_matrix)
# assign ID to the object pairs based on assignment matrix
for dbIdx, objIdx in combination:
if distance.cosine(objects[objIdx].feature, self.objectDB[dbIdx].feature)<self.similarityThreshold:
objects[objIdx].id = self.objectDB[dbIdx].id # assign an ID
self.objectDB[dbIdx].feature = objects[objIdx].feature # update the feature vector in DB with the latest vector (to make tracking easier)
self.objectDB[dbIdx].time = time.monotonic() # update last found time
xmin, ymin, xmax, ymax = objects[objIdx].pos
self.objectDB[dbIdx].trajectory.append([(xmin+xmax)//2, (ymin+ymax)//2]) # record position history as trajectory
objects[objIdx].trajectory = self.objectDB[dbIdx].trajectory
# Register the new objects which has no ID yet
for obj in objects:
if obj.id==-1: # no similar objects is registred in feature_db
obj.id = self.objectid
self.objectDB.append(obj) # register a new feature to the db
self.objectDB[-1].time = time.monotonic()
xmin, ymin, xmax, ymax = obj.pos
self.objectDB[-1].trajectory = [[(xmin+xmax)//2, (ymin+ymax)//2]] # position history for trajectory line
obj.trajectory = self.objectDB[-1].trajectory
self.objectid+=1
def drawTrajectory(self, img, objects):
for obj in objects:
if len(obj.trajectory)>1:
cv2.polylines(img, np.array([obj.trajectory], np.int32), False, (0,0,0), 4)
#------------------------------------
# boundary lines
boundaryLines = [
boundaryLine([ 300, 40, 20, 400 ]),
boundaryLine([ 440, 40, 700, 400 ])
]
# Areas
areas = [
area([ [200,200], [500,180], [600,400], [300,300], [100,360] ])
]
_N = 0
_C = 1
_H = 2
_W = 3
def main():
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
input_stream = 0
isstream = True
else:
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
device = ARGS.device
titleflg = ARGS.title
speedflg = ARGS.speed
outpath = ARGS.out
display_info(input_stream, device, titleflg, speedflg, outpath)
ie = IECore()
# Prep for face/pedestrian detection
model_det = os.path.expanduser(MODEL_DETE_DEF)
net_det = ie.read_network(model_det, model_det[:-4] + '.bin')
input_name_det = next(iter(net_det.input_info)) # Input blob name "data"
input_shape_det = net_det.input_info[input_name_det].tensor_desc.dims # [1,3,384,672]
out_name_det = next(iter(net_det.outputs)) # Output blob name "detection_out"
out_shape_det = net_det.outputs[out_name_det].shape # [ image_id, label, conf, xmin, ymin, xmax, ymax ]
exec_net_det = ie.load_network(net_det, device)
# Preparation for face/pedestrian re-identification
model_reid = os.path.expanduser(MODEL_REID_DEF)
net_reid = ie.read_network(model_reid, model_reid[:-4] + '.bin')
input_name_reid = next(iter(net_reid.input_info)) # Input blob name "data"
input_shape_reid = net_reid.input_info[input_name_reid].tensor_desc.dims # [1,3,160,64]
out_name_reid = next(iter(net_reid.outputs)) # Output blob name "embd/dim_red/conv"
out_shape_reid = net_reid.outputs[out_name_reid].shape # [1,256,1,1]
exec_net_reid = ie.load_network(net_reid, device)
# 入力準備
if (isstream):
# カメラ
cap = cv2.VideoCapture(input_stream)
ret, frame = cap.read()
loopflg = cap.isOpened()
else:
# 画像ファイル読み込み
frame = cv2.imread(input_stream)
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# アスペクト比を固定してリサイズ
img_h, img_w = frame.shape[:2]
if (img_w > WINDOW_WIDTH):
height = round(img_h * (WINDOW_WIDTH / img_w))
frame = cv2.resize(frame, dsize = (WINDOW_WIDTH, height))
loopflg = True # 1回ループ
# 処理結果の記録 step1
if (outpath != 'non'):
if (isstream):
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))
# 計測値初期化
fpsWithTick = mylib.fpsWithTick()
frame_count = 0
fps_total = 0
fpsWithTick.get() # fps計測開始
tracker = objectTracker()
# メインループ
while (loopflg):
if frame is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# 入力データフォーマットへ変換
inBlob = cv2.resize(frame, (input_shape_det[_W], input_shape_det[_H]))
inBlob = inBlob.transpose((2, 0, 1))
inBlob = inBlob.reshape(input_shape_det)
detObj = exec_net_det.infer(inputs={input_name_det: inBlob}) # [1,1,200,7]
detObj = detObj[out_name_det][0].reshape((200,7))
objects = []
for obj in detObj: # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
if obj[2] > 0.75: # Confidence > 75%
xmin = abs(int(obj[3] * frame.shape[1]))
ymin = abs(int(obj[4] * frame.shape[0]))
xmax = abs(int(obj[5] * frame.shape[1]))
ymax = abs(int(obj[6] * frame.shape[0]))
class_id = int(obj[1])
obj_img=frame[ymin:ymax,xmin:xmax].copy() # Crop the found object
# Obtain feature vector of the detected object using re-identification model
inBlob = cv2.resize(obj_img, (input_shape_reid[_W], input_shape_reid[_H]))
inBlob = inBlob.transpose((2, 0, 1))
inBlob = inBlob.reshape(input_shape_reid)
featVec = exec_net_reid.infer(inputs={input_name_reid: inBlob})
featVec = featVec[out_name_reid][0].reshape((256))
objects.append(object([xmin,ymin, xmax,ymax], featVec, -1))
outimg = frame.copy()
tracker.trackObjects(objects)
tracker.evictTimeoutObjectFromDB()
tracker.drawTrajectory(outimg, objects)
checkLineCrosses(boundaryLines, objects)
drawBoundaryLines(outimg, boundaryLines)
checkAreaIntrusion(areas, objects)
drawAreas(outimg, areas)
# Draw bounding boxes, IDs and trajectory
for obj in objects:
id = obj.id
color = ( (((~id)<<6) & 0x100)-1, (((~id)<<7) & 0x0100)-1, (((~id)<<8) & 0x0100)-1 )
xmin, ymin, xmax, ymax = obj.pos
cv2.rectangle(outimg, (xmin, ymin), (xmax, ymax), color, 2)
cv2.putText(outimg, 'ID='+str(id), (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 1.0, color, 1)
# FPSを計算する
fps = fpsWithTick.get()
st_fps = 'fps: {:>6.2f}'.format(fps)
if (speedflg == 'y'):
cv2.rectangle(outimg, (10, 38), (95, 55), (90, 90, 90), -1)
cv2.putText(outimg, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
# タイトル描画
if (titleflg == 'y'):
cv2.putText(outimg, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(240, 240, 0), lineType=cv2.LINE_AA)
# 画像表示
window_name = title + " (hit 'q' or 'esc' key to exit)"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.imshow(window_name, outimg)
# 処理結果の記録 step2
if (outpath != 'non'):
if (isstream):
outvideo.write(outimg)
else:
cv2.imwrite(outpath, outimg)
# 何らかのキーが押されたら終了
breakflg = False
while(True):
key = cv2.waitKey(1)
prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
if key == 27 or key == 113 or (prop_val < 0.0): # 'esc' or 'q'
breakflg = True
break
if (isstream):
break
if ((breakflg == False) and isstream):
# 次のフレームを読み出す
ret, frame = cap.read()
if ret == False:
break
loopflg = cap.isOpened()
else:
loopflg = False
# 終了処理
if (isstream):
cap.release()
# 処理結果の記録 step3
if (outpath != 'non'):
if (isstream):
outvideo.release()
cv2.destroyAllWindows()
print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == '__main__':
sys.exit(main() or 0)
$ python3 object-detection-and-line-cross2.py
--- Object Tracking with Line Crossing 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
Object Tracking with Line Crossing 2: Starting application...
- Image File : people-detection.264
- Device : CPU
- Program Title: y
- Speed flag : y
- Processed out: non
Discarded : id 0
Discarded : id 1
Discarded : id 2
Discarded : id 3
Discarded : id 4
Discarded : id 6
Discarded : id 5
Discarded : id 8
Discarded : id 11
Discarded : id 9
Discarded : id 7
Discarded : id 10
Discarded : id 12
FPS average: 32.50
Finished.
$ python3 gaze-estimation2.py -h
--- Gaze Estimation Demo with Sparking Laser Beam 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: gaze-estimation2.py [-h] [-i IMAGE_FILE] [-d DEVICE] [-t TITLE]
[-s SPEED] [-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file. Default value is cam
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-t TITLE, --title TITLE
Program title flag.(y/n) Default value is 'y'
-s SPEED, --speed SPEED
Speed display flag.(y/n) Default calue is 'y'
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
▼ 修正済ソースファイル「gaze-estimation2.py」
▲ 修正済ソースファイル「gaze-estimation2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## Gaze Estimation Demo with Sparking Laser Beam
##
## 2021.07.22 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/gaze-estimation-with-laser-sparking
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
WINDOW_WIDTH = 640
MODEL_DET_DEF = '~/model/intel/FP16/face-detection-adas-0001.xml'
MODEL_HP_DEF = '~/model/intel/FP16/head-pose-estimation-adas-0001.xml'
MODEL_GAZE_DEF = '~/model/intel/FP16/gaze-estimation-adas-0002.xml'
MODEL_LM_DEF = '~/model/intel/FP16/facial-landmarks-35-adas-0002.xml'
# import処理
import os
import sys
import argparse
import math
import time
import random
import numpy as np
from numpy import linalg as LA
import cv2
from scipy.spatial import distance
import mylib
# モジュール読み込み
from openvino.inference_engine import IECore
from openvino.inference_engine import get_version
# タイトル・バージョン情報
title = 'Gaze Estimation Demo with Sparking Laser Beam 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str,
default = 'cam',
help = 'Absolute path to image file. Default value is cam')
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-t', '--title', metavar = 'TITLE',
default = 'y',
help = 'Program title flag.(y/n) Default value is \'y\'')
parser.add_argument('-s', '--speed', metavar = 'SPEED',
default = 'y',
help = 'Speed display flag.(y/n) Default calue is \'y\'')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, device, titleflg, speedflg, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Program Title: ' + NOCOLOR, titleflg)
print(' - ' + YELLOW + 'Speed flag : ' + NOCOLOR, speedflg)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
model_det = os.path.expanduser(MODEL_DET_DEF)
model_hp = os.path.expanduser(MODEL_HP_DEF)
model_gaze = os.path.expanduser(MODEL_GAZE_DEF)
model_lm = os.path.expanduser(MODEL_LM_DEF)
_N = 0
_C = 1
_H = 2
_W = 3
def line(p1, p2):
A = (p1[1] - p2[1])
B = (p2[0] - p1[0])
C = (p1[0]*p2[1] - p2[0]*p1[1])
return A, B, -C
def intersection(L1, L2):
D = L1[0] * L2[1] - L1[1] * L2[0]
Dx = L1[2] * L2[1] - L1[1] * L2[2]
Dy = L1[0] * L2[2] - L1[2] * L2[0]
x = Dx / D
y = Dy / D
return x,y
def intersection_check(p1, p2, p3, p4):
tc1 = (p1[0] - p2[0]) * (p3[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p3[0])
tc2 = (p1[0] - p2[0]) * (p4[1] - p1[1]) + (p1[1] - p2[1]) * (p1[0] - p4[0])
td1 = (p3[0] - p4[0]) * (p1[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p1[0])
td2 = (p3[0] - p4[0]) * (p2[1] - p3[1]) + (p3[1] - p4[1]) * (p3[0] - p2[0])
return tc1*tc2<0 and td1*td2<0
def draw_gaze_line(img, coord1, coord2, laser_flag):
if laser_flag == False:
# simple line
cv2.line(img, coord1, coord2, (0, 0, 255),2)
else:
# Laser mode :-)
beam_img = np.zeros(img.shape, np.uint8)
for t in range(20)[::-2]:
cv2.line(beam_img, coord1, coord2, (0, 0, 255-t*10), t*2)
img |= beam_img
def draw_spark(img, coord):
for i in range(20):
angle = random.random()*2*math.pi
dia = random.randrange(10,60)
x = coord[0] + int(math.cos(angle)*dia - math.sin(angle)*dia)
y = coord[1] + int(math.sin(angle)*dia + math.cos(angle)*dia)
cv2.line(img, coord, (x, y), (0, 255, 255), 2)
def usage():
print("""
Gaze estimation demo
'f': Flip image
'l': Laser mode on/off
's': Spark mode on/off
'b': Boundary box on/off
""")
def main():
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
if ARGS.image.lower() == "cam" or ARGS.image.lower() == "camera":
input_stream = 0
isstream = True
else:
filetype = is_pict(input_stream)
isstream = filetype == 'None'
if (filetype == 'NotFound'):
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
device = ARGS.device
titleflg = ARGS.title
speedflg = ARGS.speed
outpath = ARGS.out
display_info(input_stream, device, titleflg, speedflg, outpath)
usage()
boundary_box_flag = True
# Prep for face detection
ie = IECore()
net_det = ie.read_network(model=model_det, weights=model_det[:-4] + '.bin')
input_name_det = next(iter(net_det.input_info)) # Input blob name "data"
input_shape_det = net_det.input_info[input_name_det].tensor_desc.dims # [1,3,384,672]
out_name_det = next(iter(net_det.outputs)) # Output blob name "detection_out"
exec_net_det = ie.load_network(network=net_det, device_name=device, num_requests=1)
del net_det
# Preparation for landmark detection
net_lm = ie.read_network(model=model_lm, weights=model_lm[:-4] + '.bin')
input_name_lm = next(iter(net_lm.input_info)) # Input blob name
input_shape_lm = net_lm.input_info[input_name_lm].tensor_desc.dims # [1,3,60,60]
out_name_lm = next(iter(net_lm.outputs)) # Output blob name "embd/dim_red/conv"
out_shape_lm = net_lm.outputs[out_name_lm].shape # 3x [1,1]
exec_net_lm = ie.load_network(network=net_lm, device_name=device, num_requests=1)
del net_lm
# Preparation for headpose detection
net_hp = ie.read_network(model=model_hp, weights=model_hp[:-4] + '.bin')
input_name_hp = next(iter(net_hp.input_info)) # Input blob name
input_shape_hp = net_hp.input_info[input_name_hp].tensor_desc.dims # [1,3,60,60]
out_name_hp = next(iter(net_hp.outputs)) # Output blob name
out_shape_hp = net_hp.outputs[out_name_hp].shape # [1,70]
exec_net_hp = ie.load_network(network=net_hp, device_name=device, num_requests=1)
del net_hp
# Preparation for gaze estimation
net_gaze = ie.read_network(model=model_gaze, weights=model_gaze[:-4] + '.bin')
input_shape_gaze = [1, 3, 60, 60]
exec_net_gaze = ie.load_network(network=net_gaze, device_name=device)
del net_gaze
# 入力準備
if (isstream):
# カメラ
cap = cv2.VideoCapture(input_stream)
if (input_stream == 0):
camx, camy = [(1920, 1080), (1280, 720), (800, 600), (480, 480)][1] # Set camera resolution [1]=1280,720
cap.set(cv2.CAP_PROP_FRAME_WIDTH , camx)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camy)
ret, img = cap.read()
loopflg = cap.isOpened()
else:
# 画像ファイル読み込み
img = cv2.imread(input_stream)
if img is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
# アスペクト比を固定してリサイズ
img_h, img_w = img.shape[:2]
if (img_w > WINDOW_WIDTH):
height = round(img_h * (WINDOW_WIDTH / img_w))
img = cv2.resize(img, dsize = (WINDOW_WIDTH, height))
loopflg = True # 1回ループ
# 処理結果の記録 step1
if (outpath != 'non'):
if (isstream):
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
outvideo = cv2.VideoWriter(outpath, fourcc, fps, (out_w, out_h))
# 計測値初期化
fpsWithTick = mylib.fpsWithTick()
frame_count = 0
fps_total = 0
fpsWithTick.get() # fps計測開始
laser_flag=True
flip_flag =True
spark_flag=True
# メインループ
while (loopflg):
if img is None:
print(RED + "\nUnable to read the input." + NOCOLOR)
quit()
if flip_flag == True:
img = cv2.flip(img, 1) # flip image
out_img = img.copy() # out_img will be drawn and modified to make an display image
# 入力データフォーマットへ変換
img1 = cv2.resize(img, (input_shape_det[_W], input_shape_det[_H]))
img1 = img1.transpose((2, 0, 1)) # Change data layout from HWC to CHW
img1 = img1.reshape(input_shape_det)
res_det = exec_net_det.infer(inputs={input_name_det: img1}) # Detect faces
gaze_lines = []
for obj in res_det[out_name_det][0][0]: # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
if obj[2] > 0.75: # Confidence > 75%
xmin = abs(int(obj[3] * img.shape[1]))
ymin = abs(int(obj[4] * img.shape[0]))
xmax = abs(int(obj[5] * img.shape[1]))
ymax = abs(int(obj[6] * img.shape[0]))
class_id = int(obj[1])
face=img[ymin:ymax,xmin:xmax] # Crop the face image
if boundary_box_flag == True:
cv2.rectangle(out_img, (xmin, ymin), (xmax, ymax), (255,255,0), 2)
# Find facial landmarks (to find eyes)
face1=cv2.resize(face, (input_shape_lm[_W], input_shape_lm[_H]))
face1=face1.transpose((2,0,1))
face1=face1.reshape(input_shape_lm)
res_lm = exec_net_lm.infer(inputs={input_name_lm: face1}) # Run landmark detection
lm=res_lm[out_name_lm][0][:8].reshape(4,2) # [[left0x, left0y], [left1x, left1y], [right0x, right0y], [right1x, right1y] ]
# Estimate head orientation (yaw=Y, pitch=X, role=Z)
res_hp = exec_net_hp.infer(inputs={input_name_hp: face1}) # Run head pose estimation
yaw = res_hp['angle_y_fc'][0][0]
pitch = res_hp['angle_p_fc'][0][0]
roll = res_hp['angle_r_fc'][0][0]
_X=0
_Y=1
# Landmark position memo... lm[1] (eye) lm[0] (nose) lm[2] (eye) lm[3]
eye_sizes = [ abs(int((lm[0][_X]-lm[1][_X]) * face.shape[1])), abs(int((lm[3][_X]-lm[2][_X]) * face.shape[1])) ] # eye size in the cropped face image
eye_centers = [ [ int(((lm[0][_X]+lm[1][_X])/2 * face.shape[1])), int(((lm[0][_Y]+lm[1][_Y])/2 * face.shape[0])) ],
[ int(((lm[3][_X]+lm[2][_X])/2 * face.shape[1])), int(((lm[3][_Y]+lm[2][_Y])/2 * face.shape[0])) ] ] # eye center coordinate in the cropped face image
if eye_sizes[0]<4 or eye_sizes[1]<4:
continue
ratio = 0.7
eyes = []
for i in range(2):
# Crop eye images
x1 = int(eye_centers[i][_X]-eye_sizes[i]*ratio)
x2 = int(eye_centers[i][_X]+eye_sizes[i]*ratio)
y1 = int(eye_centers[i][_Y]-eye_sizes[i]*ratio)
y2 = int(eye_centers[i][_Y]+eye_sizes[i]*ratio)
eyes.append(cv2.resize(face[y1:y2, x1:x2].copy(), (input_shape_gaze[_W], input_shape_gaze[_H]))) # crop and resize
# Draw eye boundary boxes
if boundary_box_flag == True:
cv2.rectangle(out_img, (x1+xmin,y1+ymin), (x2+xmin,y2+ymin), (0,255,0), 2)
# rotate eyes around Z axis to keep them level
if roll != 0.:
rotMat = cv2.getRotationMatrix2D((int(input_shape_gaze[_W]/2), int(input_shape_gaze[_H]/2)), roll, 1.0)
eyes[i] = cv2.warpAffine(eyes[i], rotMat, (input_shape_gaze[_W], input_shape_gaze[_H]), flags=cv2.INTER_LINEAR)
eyes[i] = eyes[i].transpose((2, 0, 1)) # Change data layout from HWC to CHW
eyes[i] = eyes[i].reshape((1,3,60,60))
hp_angle = [ yaw, pitch, 0 ] # head pose angle in degree
res_gaze = exec_net_gaze.infer(inputs={'left_eye_image' : eyes[0],
'right_eye_image' : eyes[1],
'head_pose_angles': hp_angle}) # gaze estimation
gaze_vec = res_gaze['gaze_vector'][0] # result is in orthogonal coordinate system (x,y,z. not yaw,pitch,roll)and not normalized
gaze_vec_norm = gaze_vec / np.linalg.norm(gaze_vec) # normalize the gaze vector
vcos = math.cos(math.radians(roll))
vsin = math.sin(math.radians(roll))
tmpx = gaze_vec_norm[0]*vcos + gaze_vec_norm[1]*vsin
tmpy = -gaze_vec_norm[0]*vsin + gaze_vec_norm[1]*vcos
gaze_vec_norm = [tmpx, tmpy]
# Store gaze line coordinations
for i in range(2):
coord1 = (eye_centers[i][_X]+xmin, eye_centers[i][_Y]+ymin)
coord2 = (eye_centers[i][_X]+xmin+int((gaze_vec_norm[0]+0.)*3000), eye_centers[i][_Y]+ymin-int((gaze_vec_norm[1]+0.)*3000))
gaze_lines.append([coord1, coord2, False]) # line(coord1, coord2); False=spark flag
# Gaze lines intersection check (for sparking)
if spark_flag == True:
for g1 in range(len(gaze_lines)):
for g2 in range(g1+1, len(gaze_lines)):
if gaze_lines[g1][2]==True or gaze_lines[g2][2]==True:
continue # Skip if either line has already marked as crossed
x1 = gaze_lines[g1][0]
y1 = gaze_lines[g1][1]
x2 = gaze_lines[g2][0]
y2 = gaze_lines[g2][1]
if intersection_check(x1, y1, x2, y2) == True:
l1 = line(x1, y1)
l2 = line(x2, y2)
x, y = intersection( l1, l2 ) # calculate crossing coordinate
gaze_lines[g1][1] = [int(x), int(y)]
gaze_lines[g1][2] = True
gaze_lines[g2][1] = [int(x), int(y)]
gaze_lines[g2][2] = True
# Drawing gaze lines and sparks
for gaze_line in gaze_lines:
draw_gaze_line(out_img, (gaze_line[0][0], gaze_line[0][1]), (gaze_line[1][0], gaze_line[1][1]), laser_flag)
if gaze_line[2]==True:
draw_spark(out_img, (gaze_line[1][0], gaze_line[1][1]))
# FPSを計算する
fps = fpsWithTick.get()
st_fps = 'fps: {:>6.2f}'.format(fps)
if (speedflg == 'y'):
cv2.rectangle(out_img, (10, 38), (95, 55), (90, 90, 90), -1)
cv2.putText(out_img, st_fps, (15, 50), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.4, color=(255, 255, 255), lineType=cv2.LINE_AA)
# タイトル描画
if (titleflg == 'y'):
cv2.putText(out_img, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(240, 240, 0), lineType=cv2.LINE_AA)
# 画像表示
window_name = title + " (hit 'q' or 'esc' key to exit)"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
cv2.imshow(window_name, out_img)
# 処理結果の記録 step2
if (outpath != 'non'):
if (isstream):
outvideo.write(out_img)
else:
cv2.imwrite(outpath, out_img)
# 何らかのキーが押されたら終了
breakflg = False
while(True):
key = cv2.waitKey(1)
prop_val = cv2.getWindowProperty(window_name, cv2.WND_PROP_ASPECT_RATIO)
if key == 27 or key == 113 or (prop_val < 0.0): # 'esc' or 'q'
breakflg = True
break
if key == ord(u'l'): laser_flag = True if laser_flag == False else False # toggles laser_flag
if key == ord(u'f'): flip_flag = True if flip_flag == False else False # image flip flag
if key == ord(u'b'): boundary_box_flag = True if boundary_box_flag== False else False # boundary box flag
if key == ord(u's'): spark_flag = True if spark_flag == False else False # spark flag
if (isstream):
break
if ((breakflg == False) and isstream):
# 次のフレームを読み出す
ret, img = cap.read()
if ret == False:
break
loopflg = cap.isOpened()
else:
loopflg = False
# 終了処理
if (isstream):
cap.release()
# 処理結果の記録 step3
if (outpath != 'non'):
if (isstream):
outvideo.release()
cv2.destroyAllWindows()
print('\nFPS average: {:>10.2f}'.format(fpsWithTick.get_average()))
print('\n Finished.')
# main関数エントリーポイント(実行開始)
if __name__ == '__main__':
sys.exit(main() or 0)
$ python3 image-inpainting2.py -h
--- image inpainting 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
usage: image-inpainting2.py [-h] [-i IMAGE_FILE] [-d DEVICE] [-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-i IMAGE_FILE, --image IMAGE_FILE
Absolute path to image file. Default value is
./image.jpg
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
▼ 修正済ソースファイル「image-inpainting2.py」
▲ 修正済ソースファイル「image-inpainting2.py」
# -*- coding: utf-8 -*-
##------------------------------------------
## OpenVINO™ toolkit
## image-inprinting
##
## 2021.07.22 Masahiro Izutsu
##------------------------------------------
## https://github.com/yas-sim/interactive-image-inpainting
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
# 定数定義
MODEL_DEF = '~/model/public/FP16/gmcnn-places2-tf.xml'
# import処理
import os
import sys
import argparse
import cv2
import numpy as np
# モジュール読み込み
from openvino.inference_engine import IENetwork, IECore
from openvino.inference_engine import get_version
# タイトル・バージョン情報
title = 'image inpainting 2'
print(GREEN)
print('--- {} ---'.format(title))
print(cv2.__version__)
print("OpenVINO inference_engine:", get_version())
print(NOCOLOR)
# コマンド・パラメータ処理
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--image', metavar = 'IMAGE_FILE', type=str,
default = 'image.jpg',
help = 'Absolute path to image file. Default value is ./image.jpg')
parser.add_argument('-d', '--device', default = 'CPU', type=str,
help = 'Optional. Specify a target device to infer on. CPU, GPU, FPGA, HDDL or MYRIAD is '
'acceptable. The demo will look for a suitable plugin for the device specified. '
'Default value is CPU')
parser.add_argument('-o', '--out', metavar = 'IMAGE_OUT',
default = 'non',
help = 'Processed image file path. Default value is \'non\'')
return parser
# 基本情報の表示
def display_info(image, device, outpath):
print(YELLOW + title + ': Starting application...' + NOCOLOR)
print(' - ' + YELLOW + 'Image File : ' + NOCOLOR, image)
print(' - ' + YELLOW + 'Device : ' + NOCOLOR, device)
print(' - ' + YELLOW + 'Processed out: ' + NOCOLOR, outpath)
# 画像の種類を判別する
# 戻り値: 'jeg''png'... 画像ファイル
# 'None' 画像ファイル以外 (動画ファイル)
# 'NotFound' ファイルが存在しない
import imghdr
def is_pict(filename):
try:
imgtype = imghdr.what(filename)
except FileNotFoundError as e:
imgtype = 'NotFound'
return str(imgtype)
# ----------------------------------------------------------------------------
g_mouseX=-1
g_mouseY=-1
g_mouseBtn = -1 # 0=left, 1=right, -1=none
g_UIState = 0 # 0: normal UI, 1: wait for a click
g_clickedFlag = False
g_inpaintFlag = False
g_penSize = 8
g_canvas = []
g_mask = []
def clearMask():
global g_canvas
global g_mask
g_mask = np.full(g_canvas.shape, [0,0,0], np.uint8) # The size of the mask is the same as the canvas
def drawUI(image):
cv2.circle(image, (0 , 0), 100, ( 0, 255, 255), -1)
cv2.circle(image, (image.shape[1]-1, 0), 100, ( 0, 255, 0), -1)
cv2.putText(image, 'INPAINT', (4 ,20), cv2.FONT_HERSHEY_PLAIN, 1, ( 0, 0, 0), 2)
cv2.putText(image, 'CLEAR' , (image.shape[1]-60 ,20), cv2.FONT_HERSHEY_PLAIN, 1, ( 0, 0, 0), 2)
def drawCursor(image):
global g_mouseX, g_mouseY
global g_penSize
cv2.circle(image, (g_mouseX, g_mouseY), g_penSize, (0,0,0), -1)
def dispCanvas():
global g_canvas
global g_mask
canvas = g_canvas.copy()
canvas |= g_mask
drawUI(canvas)
drawCursor(canvas)
cv2.imshow('canvas', canvas)
cv2.waitKey(1)
# Mouse event handler
def onMouse(event, x, y, flags, param):
global g_mouseX, g_mouseY
global g_mouseBtn
global g_inpaintFlag
global g_clickedFlag
global g_UIState
global g_mask
black_pen = lambda x1, y1, x2, y2: cv2.line(g_mask, (x1, y1), (x2, y2), ( 0, 0, 0), thickness=16)
white_pen = lambda x1, y1, x2, y2: cv2.line(g_mask, (x1, y1), (x2, y2), (255,255,255), thickness=g_penSize)
if g_UIState==0: # Normal UI
if event == cv2.EVENT_LBUTTONDOWN:
p0=np.array([ 0, 0])
p1=np.array([g_canvas.shape[1], 0])
pp=np.array([ g_mouseX, g_mouseY])
if np.linalg.norm(pp-p0, ord=2)<100: # Recognition
g_inpaintFlag = True
elif np.linalg.norm(pp-p1, ord=2)<100: # Clear
clearMask()
else:
g_mouseBtn = 0 # left button
if event == cv2.EVENT_LBUTTONUP:
if g_mouseBtn==0:
white_pen(g_mouseX, g_mouseY, x, y)
g_mouseBtn = -1
if event == cv2.EVENT_RBUTTONDOWN:
g_mouseBtn = 1 # right button
if event == cv2.EVENT_RBUTTONUP:
if g_mouseBtn==1:
black_pen(g_mouseX, g_mouseY, x, y)
g_mouseBtn = -1
if event == cv2.EVENT_MOUSEMOVE:
if g_mouseBtn==0:
white_pen(g_mouseX, g_mouseY, x, y)
elif g_mouseBtn==1:
black_pen(g_mouseX, g_mouseY, x, y)
elif g_UIState==1: # no draw. wait for click state
if event == cv2.EVENT_LBUTTONUP:
g_clickedFlag=True
g_mouseX = x
g_mouseY = y
def onTrackbar(x):
global g_penSize
g_penSize = x
# ----------------------------------------------------------------------------
def main():
_H=0
_W=1
_C=2
count=0
global g_canvas, g_mask
global g_threshold
global g_UIState
global g_inpaintFlag
global g_clickedFlag
# コマンド・パラメータ解析
ARGS = parse_args().parse_args()
input_stream = ARGS.image
filetype = is_pict(input_stream)
if filetype == 'None' or filetype == 'NotFound':
print(RED + "\ninput file Not found." + NOCOLOR)
quit()
device = ARGS.device
outpath = ARGS.out
display_info(input_stream, device, outpath)
g_canvas = cv2.imread(input_stream)
ie = IECore()
model = os.path.expanduser(MODEL_DEF)
net = ie.read_network(model, model[:-4] + '.bin')
input_blob1 = 'Placeholder'
input_blob2 = 'Placeholder_1'
out_blob = 'Minimum'
in_shape1 = net.input_info[input_blob1].tensor_desc.dims # 1,3,512,680
in_shape2 = net.input_info[input_blob2].tensor_desc.dims
out_shape = net.outputs[out_blob].shape # 1,3,512,680
exec_net = ie.load_network(net, device)
clearMask()
cv2.namedWindow('canvas')
cv2.setMouseCallback('canvas', onMouse)
cv2.createTrackbar('Pen size', 'canvas', 8, 32, onTrackbar)
while True:
g_UIState = 0
while g_inpaintFlag==False:
dispCanvas()
key=cv2.waitKey(100)
if key==27 or key==ord('q'):
return
if key==ord(' '):
break
print('\n inpainting...')
g_inpaintFlag = False
g_UIState = 1
img = g_canvas | g_mask
img = cv2.resize(img, (in_shape1[3], in_shape1[2]))
img = img.transpose((_C, _H, _W))
img = img.reshape(in_shape1)
msk = cv2.resize(g_mask, (in_shape2[3], in_shape2[2]))
msk = msk.transpose((_C, _H, _W))
msk = msk[0,:,:]
msk = np.where(msk>0., 1., 0.).astype(np.float32)
msk = msk.reshape(in_shape2)
res = exec_net.infer(inputs={input_blob1: img, input_blob2: msk})
out = np.transpose(res[out_blob], (0, 2, 3, 1)).astype(np.uint8)
cv2.imshow('Result', out[0])
cv2.waitKey(1)
# 処理結果
if (outpath != 'non'):
if count == 0:
result = outpath
else:
result = outpath.replace('.', str(count)+'.')
cv2.imwrite(result, out[0])
print (' output file', "\'"+result+"\'")
count = count + 1
print(' process end !! >> edit start')
return 0
# main関数エントリーポイント(実行開始)
if __name__ == '__main__':
sys.exit(main())
$ python3 image-inpainting2.py
--- image inpainting 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
image inpainting 2: Starting application...
- Image File : image.jpg
- Device : CPU
- Processed out: non
[ WARN:0] global ../opencv/modules/highgui/src/window.cpp (661) createTrackbar UI/Trackbar(Pen size@canvas): Using 'value' pointer is unsafe and deprecated. Use NULL as value pointer. To fetch trackbar value setup callback.
inpainting...
process end !! >> edit start
$ python3 handwritten-japanese-OCR-touch-panel-demo2.py -h
--- Handwritten Japanese OCR demo 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
Handwritten Japanese OCR Demo
ESC: Quit
Mouse L-Button: Draw
Mouse R-Button: Erase
Threshold = Text area detect threshold
usage: handwritten-japanese-OCR-touch-panel-demo2.py [-h] [-d DEVICE]
[-o IMAGE_OUT]
optional arguments:
-h, --help show this help message and exit
-d DEVICE, --device DEVICE
Optional. Specify a target device to infer on. CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The demo will
look for a suitable plugin for the device specified.
Default value is CPU
-o IMAGE_OUT, --out IMAGE_OUT
Processed image file path. Default value is 'non'
$ python3 handwritten-japanese-OCR-touch-panel-demo2.py
--- Handwritten Japanese OCR demo 2 ---
4.5.3-openvino
OpenVINO inference_engine: 2021.4.0-3839-cd81789d294-releases/2021/4
Handwritten Japanese OCR Demo
ESC: Quit
Mouse L-Button: Draw
Mouse R-Button: Erase
Threshold = Text area detect threshold
Handwritten Japanese OCR demo 2: Starting application...
- Device : CPU
- Processed out: non
[ WARN:0] global ../opencv/modules/highgui/src/window.cpp (661) createTrackbar UI/Trackbar(Threshold@canvas): Using 'value' pointer is unsafe and deprecated. Use NULL as value pointer. To fetch trackbar value setup callback.
text detection...
text detection - completed
OCR result (0): ['日本語入力']
process end !! >> edit start
$ python3 ir-summary.py -h
usage: ir-summary.py [-h] [-m MODEL] [-d DIR] [-v]
optional arguments:
-h, --help show this help message and exit
-m MODEL, --model MODEL
input IR model path
-d DIR, --dir DIR input IR model directory
-v, --verbose output detailed information