私的AI研究会 > OpenVINO10
学習済みモデルファイルのアーカイブからディープラーニング推論を実習する(その3)。
model | nput name: shape | output name: shape |
face-detection-adas-0001 | 'data': [1, 3, 384, 672]An input image in the format [BxCxHxW]Expected color order is BGR. | 'detection_out':[1, 1, 200, 7] format: [image_id, label, conf, x_min, y_min, x_max, y_max] |
age-gender-recognition-retail-0013 | "data": [1, 3, 62, 62] | "age_conv3": [1, 1, 1, 1] Estimated age divided by 100. "prob": [1, 2, 1, 1] Softmax output across 2 type classes [female, male] |
emotion-recognition-retail-0003 | "data": [1, 3, 64, 64] | "prob_emotion": [1, 5, 1, 1] Softmax output across five emotions ('neutral', 'happy', 'sad', 'surprise', 'anger'). |
head-pose-estimation-adas-0001 | "data": [1, 3, 60, 60] | (Inference Engine format) angle_y_fc:[1, 1] angle_p_fc:[1, 1] angle_r_fc: [1, 1]Supported ranges YAW [-90,90], PITCH [-70,70], ROLL [-70,70] |
facial-landmarks-35-adas-0002 | "data" shape:[1, 3, 60, 60] | "align_fc3": [1, 70] 70 floating point values for 35 landmarks' normed coordinates in the form (x0, y0, x1, y1, ..., x34, y34). |
$ cd ~/workspace/FP16 ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/face-detection-adas-0001/FP16/face-detection-adas-0001.bin ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/face-detection-adas-0001/FP16/face-detection-adas-0001.xml ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.bin ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.bin ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.xml ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001.bin ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001.xml ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.bin ~/workspace/FP16 $ wget --no-check-certificate https://download.01.org/opencv/2021/openvinotoolkit/2021.2/open_model_zoo/models_bin/3/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.xml ~/workspace/FP16 $ ls age-gender-recognition-retail-0013.bin face-detection-retail-0004.bin head-pose-estimation-adas-0001.bin age-gender-recognition-retail-0013.xml face-detection-retail-0004.xml head-pose-estimation-adas-0001.xml emotions-recognition-retail-0003.bin face-detection-retail-0005.bin landmarks-regression-retail-0009.bin emotions-recognition-retail-0003.xml face-detection-retail-0005.xml landmarks-regression-retail-0009.xml face-detection-adas-0001.bin facial-landmarks-35-adas-0002.bin face-detection-adas-0001.xml facial-landmarks-35-adas-0002.xml
画像はお手本と同じ Microsoftの How-Old.net のものでテスト。
# -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-1 2021.01.18 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (8, 8) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) plt.figure(figsize=figsize) plt.imshow(frame) plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame)
~/workspace $ vi 01_face_demo1a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Image Show ** step-1 ## 2021.01.18 Masahiro Izutsu ##------------------------------------------ import requests from PIL import Image from io import BytesIO import cv2 import numpy as np # タイトル title = 'Image Show' print('*** {} ***'.format(title)) # バージョン情報 print(cv2.__version__) # 関数定義 def getResize(img, basePixSize): height = img.shape[0] width = img.shape[1] largeSize = max(height, width) # 大きい方の辺のサイズ resizeRate = basePixSize / largeSize # 変更比率を計算 img = cv2.resize(img, (int(width * resizeRate), int(height * resizeRate))) return img # 画像読み込み image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) img = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(img.shape[:2])) # 縦横比を維持してリサイズ img = getResize(img, 640) print("Resize Shape:{}".format(img.shape[:2])) # タイトル描画 cv2.putText(img, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(0, 200, 0), lineType=cv2.LINE_AA) # 画像表示 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) cv2.imshow('Face detection Demo', img) # キーが押されたら終了 cv2.waitKey(0) # 終了処理 cv2.destroyAllWindows()
学習済みモデル「face-detection-adas-0001」を使って、顔検出をおこなう。
OpenVINO™ Toolkit のバージョンが変わっているのでモジュールの読み込みなどの変更が必要。
Inputs Name: input, shape: [1x3x384x672] - An input image in the format [BxCxHxW], where: B - batch size C - number of channels H - image height W - image width Expected color order is BGR.※ 入力の Name は間違っているようだ。 'input' → 'data'
Outputs The net outputs blob with shape: [1, 1, N, 7], where N is the number of detected bounding boxes. The results are sorted by confidence in decreasing order. Each detection has the format [image_id, label, conf, x_min, y_min, x_max, y_max], where: image_id - ID of the image in the batch label - predicted class ID (1 - face) conf - confidence for the predicted class (x_min, y_min) - coordinates of the top left bounding box corner (x_max, y_max) - coordinates of the bottom right bounding box corner.
# -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-2 2021.01.18 update 2021.02.10 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (8, 8) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) #plt.figure(figsize=figsize) #plt.imshow(frame) #plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame) # Face Detetion # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version print("OpenVINO inference_engine:", get_version()) # モデルの読み込み ie = IECore() fp_path = "./FP16/" model_xml = fp_path + "face-detection-adas-0001.xml" model_bin = fp_path + "face-detection-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 3. Configure input & output input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 5. Create Async Request in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) # res's shape: [1, 1, 200, 7] # 6. Receive Async Request if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5 # 7. draw faces frame = init_frame.copy() for i, face in enumerate(faces[0][0]): box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") area = ((ymax - ymin) * (xmax - xmin)) print(i, frame_w, frame_h, xmin, ymin, xmax, ymax, np.sqrt(area)/np.sqrt(frame_w*frame_h)*100) """ xmin = int(face[3] * frame_w) ymin = int(face[4] * frame_h) xmax = int(face[5] * frame_w) ymax = int(face[6] * frame_h) """ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, str(i), (xmin + 3, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) plt.figure(figsize=figsize) plt.imshow(frame) faces plt.show()
pi@raspberrypi:~/workspace $ python3 01_face_demo2.py Original Shape:(1155, 1500) frame_h, frame_w:(493, 640) OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 input blob: name="data", output blob: name="detection_out" input.shape:[1, 3, 384, 672] output.shape:[1, 1, 200, 7] 01_face_demo2.py:73: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property. res = exec_net.requests[0].outputs[out_blob] 0 640 493 221 67 305 196 18.531947088212956 1 640 493 431 100 519 207 17.275068505983118 2 640 493 514 250 600 367 17.857831050198797 3 640 493 39 111 142 229 19.62664961865753※ ワーニングエラーが残っているが後日検証する。
~/workspace $ vi 01_face_demo2a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Age/Gender Recognition ** step-2 ## 2021.01.18 Masahiro Izutsu ## ## 2021.02.10 warning error ##------------------------------------------ import cv2 import numpy as np # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version # タイトル title = 'Face Detection' print('*** {} ***'.format(title)) # バージョン情報 print(cv2.__version__) print("OpenVINO inference_engine:", get_version()) # 関数定義 def getResize(img, basePixSize): height = img.shape[0] width = img.shape[1] largeSize = max(height, width) # 大きい方の辺のサイズ resizeRate = basePixSize / largeSize # 変更比率を計算 img = cv2.resize(img, (int(width * resizeRate), int(height * resizeRate))) return img # モデルの読み込み (顔検出)face-detection-adas-0001 ie = IECore() net = ie.read_network(model='FP16/face-detection-adas-0001.xml', weights='FP16/face-detection-adas-0001.bin') exec_net = ie.load_network(network=net, device_name="MYRIAD") # 入出力設定(顔検出) input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape # 入力画像読み込み frame = cv2.imread('./image/main001.jpg') frame = getResize(frame, 640) # 入力データフォーマットへ変換 img = cv2.resize(frame, (w, h)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['detection_out'] out = np.squeeze(out) #サイズ1の次元を全て削除 # タイトル描画 cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(0, 200, 0), lineType=cv2.LINE_AA) # 検出されたすべての顔領域に対して1つずつ処理 for detection in out: # conf値の取得 confidence = float(detection[2]) # バウンディングボックス座標を入力画像のスケールに変換 xmin = int(detection[3] * frame.shape[1]) ymin = int(detection[4] * frame.shape[0]) xmax = int(detection[5] * frame.shape[1]) ymax = int(detection[6] * frame.shape[0]) # conf値が0.5より大きい場合のみバウンディングボックス表示 if confidence > 0.5: cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color=(240, 180, 0), thickness=3) # 画像表示 cv2.imshow('frame', frame) # キーが押されたら終了 cv2.waitKey(0) cv2.destroyAllWindows()
pi@raspberrypi:~/workspace $ python3 01_face_demo2a.py *** Face Detection *** 4.5.1-openvino OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2
学習済みモデル「age-gender-recognition-retail-0013」を使って、年齢/性別推定をおこなう。
認識できる年齢の幅は 18 - 75 で、training set に子供は含んでいないらしい。
Inputs Name: input, shape: [1x3x62x62] - An input image in [1xCxHxW] format. Expected color order is BGR.※ 入力 Name は間違っているようだ。'input' → 'data'
Outputs Name: age_conv3, shape: [1, 1, 1, 1] - Estimated age divided by 100. Name: prob, shape: [1, 2, 1, 1] - Softmax output across 2 type classes [female, male].
# -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-3 2021.01.18 update 2021.02.10 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (12, 12) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) #plt.figure(figsize=figsize) #plt.imshow(frame) #plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame) # Face Detetion # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version print("OpenVINO inference_engine:", get_version()) # モデルの読み込み ie = IECore() fp_path = "./FP16/" model_xml = fp_path + "face-detection-adas-0001.xml" model_bin = fp_path + "face-detection-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 3. Configure input & output input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 5. Create Async Request in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) # res's shape: [1, 1, 200, 7] # 6. Receive Async Request if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5 # 7. draw faces frame = init_frame.copy() for i, face in enumerate(faces[0][0]): box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") area = ((ymax - ymin) * (xmax - xmin)) print(i, frame_w, frame_h, xmin, ymin, xmax, ymax, np.sqrt(area)/np.sqrt(frame_w*frame_h)*100) """ xmin = int(face[3] * frame_w) ymin = int(face[4] * frame_h) xmax = int(face[5] * frame_w) ymax = int(face[6] * frame_h) """ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, str(i), (xmin + 3, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) plt.figure(figsize=figsize) plt.imshow(frame) faces # Agen/Gender label = ('Female', 'Male') # 1.モデルの読み込み model_xml = fp_path + "age-gender-recognition-retail-0013.xml" model_bin = fp_path + "age-gender-recognition-retail-0013.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 2. Configure input & putput input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 4. Create Async Request per faces frame = init_frame.copy() face_id = 0 for face in faces[0][0]: box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") face_frame = frame[ymin:ymax, xmin:xmax] in_frame = cv2.resize(face_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0 ,inputs={input_blob: in_frame}) # 5. Get reponse if exec_net.requests[0].wait(-1) == 0: age = exec_net.requests[0].outputs['age_conv3'] prob = exec_net.requests[0].outputs['prob'] age = age[0][0][0][0] * 100 gender = label[np.argmax(prob[0])] ax = plt.subplot(rows, columns, face_id + 1) ax.set_title("age:{:.1f} {}".format(age, gender)) plt.imshow(face_frame) face_id +=1 # sync #res = exec_net.infer(inputs={input_blob: in_frame}) #age = res['age_conv3'][0][0][0][0] * 100 #gender = self.label[np.argmax(res['prob'][0])] plt.show()
pi@raspberrypi:~/workspace $ python3 01_face_demo3.py Original Shape:(1155, 1500) frame_h, frame_w:(493, 640) OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 input blob: name="data", output blob: name="detection_out" input.shape:[1, 3, 384, 672] output.shape:[1, 1, 200, 7] 01_face_demo3.py:77: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property. res = exec_net.requests[0].outputs[out_blob] 0 640 493 221 67 305 196 18.531947088212956 1 640 493 431 100 519 207 17.275068505983118 2 640 493 514 250 600 367 17.857831050198797 3 640 493 39 111 142 229 19.62664961865753 input blob: name="data", output blob: name="age_conv3" input.shape:[1, 3, 62, 62] output.shape:[1, 1, 1, 1]※ ワーニングエラーが残っているが後日検証する。
~/workspace $ vi 01_face_demo3a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Age/Gender Recognition ** step-3 ## 2021.01.18 Masahiro Izutsu ## ## 2021.02.10 warning error ##------------------------------------------ import cv2 import numpy as np # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version # タイトル title = 'Age/Gender Recognition' print('*** {} ***'.format(title)) # バージョン情報 print(cv2.__version__) print("OpenVINO inference_engine:", get_version()) # モデルの読み込み (顔検出)face-detection-adas-0001 ie = IECore() net = ie.read_network(model='FP16/face-detection-adas-0001.xml', weights='FP16/face-detection-adas-0001.bin') exec_net = ie.load_network(network=net, device_name="MYRIAD") # 入出力設定(顔検出) input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape # モデルの読み込み(年齢/性別)age-gender-recognition-retail-0013 net_age = ie.read_network(model='FP16/age-gender-recognition-retail-0013.xml', weights='FP16/age-gender-recognition-retail-0013.bin') exec_net_age = ie.load_network(network=net_age, device_name="MYRIAD") # 入出力設定(年齢/性別) input_blob_age = net.input_info['data'].name out_blob_age = next(iter(net_age.outputs)) n_age, c_age, h_age, w_age = net.input_info[input_blob_age].input_data.shape # 性別ラベル label = ('Female', 'Male') # カメラ準備 cap = cv2.VideoCapture(0) # メインループ while True: ret, frame = cap.read() # Reload on error if ret == False: continue # 入力データフォーマットへ変換 img = cv2.resize(frame, (w, h)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['detection_out'] out = np.squeeze(out) #サイズ1の次元を全て削除 # タイトル描画 cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(0, 200,200), lineType=cv2.LINE_AA) # 検出されたすべての顔領域に対して1つずつ処理 for detection in out: # conf値の取得 confidence = float(detection[2]) # バウンディングボックス座標を入力画像のスケールに変換 xmin = int(detection[3] * frame.shape[1]) ymin = int(detection[4] * frame.shape[0]) xmax = int(detection[5] * frame.shape[1]) ymax = int(detection[6] * frame.shape[0]) # conf値が0.5より大きい場合のみバウンディングボックス表示 if confidence > 0.5: # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax > frame.shape[1]: xmax = frame.shape[1] if ymax > frame.shape[0]: ymax = frame.shape[0] # 顔領域のみ切り出し frame_face = frame[ ymin:ymax, xmin:xmax ] # 入力データフォーマットへ変換 img = cv2.resize(frame_face, (62, 62)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net_age.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し age = out['age_conv3'] prob = out['prob'] age = age[0][0][0][0] * 100 gender = label[np.argmax(prob[0])] if gender == label[0]: cor = (0, 0, 255) else: cor = (0, 255, 0) out_str = gender+':'+'{:>5.1f}'.format(age) # バウンディングボックス(顔領域)表示 cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), cor, thickness=2) cv2.putText(frame, out_str, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA) # 画像表示 cv2.imshow('frame', frame) # 何らかのキーが押されたら終了 key = cv2.waitKey(1) if key != -1: break # 終了処理 cap.release() cv2.destroyAllWindows()
pi@raspberrypi:~/workspace $ python3 01_face_demo3a.py *** Age/Gender Recognition *** 4.5.1-openvino OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2
学習済みモデル「emotions-recognition-retail-0003」を使って、感情認識をおこなう。
識別できる感情は5種類。「neutral」「happy」「sad」「surprise」「anger」
Inputs Name: input, shape: [1x3x64x64] - An input image in [1xCxHxW] format. Expected color order is BGR.※ 入力 Name は間違っているようだ。'input' → 'data'
Outputs name: "prob", shape: [1, 5, 1, 1] - Softmax output across five emotions ('neutral', 'happy', 'sad', 'surprise', 'anger').
~workspace $ vi 01_face_demo4.py # -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-4 2021.01.18 update 2021.02.10 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (12, 12) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) #plt.figure(figsize=figsize) #plt.imshow(frame) #plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame) # Face Detetion # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version print("OpenVINO inference_engine:", get_version()) # モデルの読み込み ie = IECore() fp_path = "./FP16/" model_xml = fp_path + "face-detection-adas-0001.xml" model_bin = fp_path + "face-detection-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 3. Configure input & output input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 5. Create Async Request in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) # res's shape: [1, 1, 200, 7] # 6. Receive Async Request if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5 # 7. draw faces frame = init_frame.copy() for i, face in enumerate(faces[0][0]): box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") area = ((ymax - ymin) * (xmax - xmin)) print(i, frame_w, frame_h, xmin, ymin, xmax, ymax, np.sqrt(area)/np.sqrt(frame_w*frame_h)*100) """ xmin = int(face[3] * frame_w) ymin = int(face[4] * frame_h) xmax = int(face[5] * frame_w) ymax = int(face[6] * frame_h) """ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, str(i), (xmin + 3, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) plt.figure(figsize=figsize) plt.imshow(frame) faces # Emotion Recognition label = ('neutral', 'happy', 'sad', 'surprise', 'anger') # 1.モデルの読み込み model_xml = fp_path + "emotions-recognition-retail-0003.xml" model_bin = fp_path + "emotions-recognition-retail-0003.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 2. Configure input & putput input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 4. Create Async Request frame = init_frame.copy() face_id = 0 for face in faces[0][0]: box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") face_frame = frame[ymin:ymax, xmin:xmax] in_frame = cv2.resize(face_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0 ,inputs={input_blob: in_frame}) # 5. Get reponse if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] emotion = label[np.argmax(res[0])] ax = plt.subplot(rows, columns, face_id + 1) ax.set_title("{}".format(emotion)) plt.imshow(face_frame) face_id +=1 plt.show()
pi@raspberrypi:~/workspace $ python3 01_face_demo4.py Original Shape:(1155, 1500) frame_h, frame_w:(493, 640) OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 input blob: name="data", output blob: name="detection_out" input.shape:[1, 3, 384, 672] output.shape:[1, 1, 200, 7] 01_face_demo4.py:76: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property. res = exec_net.requests[0].outputs[out_blob] 0 640 493 221 67 305 196 18.531947088212956 1 640 493 431 100 519 207 17.275068505983118 2 640 493 514 250 600 367 17.857831050198797 3 640 493 39 111 142 229 19.62664961865753 input blob: name="data", output blob: name="prob_emotion" input.shape:[1, 3, 64, 64] output.shape:[1, 5, 1, 1]※ ワーニングエラーが残っているが後日検証する。
~/workspace $ vi 01_face_demo4a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Emotion Recognition ** step-4 ## 2021.01.18 Masahiro Izutsu ## ## 2021.02.10 warning error ##------------------------------------------ import cv2 import numpy as np # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version # タイトル title = 'Emotion Recognition' print('*** {} ***'.format(title)) # バージョン情報 print(cv2.__version__) print("OpenVINO inference_engine:", get_version()) # モデルの読み込み (顔検出)face-detection-adas-0001 ie = IECore() net = ie.read_network(model='FP16/face-detection-adas-0001.xml', weights='FP16/face-detection-adas-0001.bin') exec_net = ie.load_network(network=net, device_name="MYRIAD") # 入出力設定(顔検出) input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape # モデルの読み込み(感情検出)emotions-recognition-retail-0003 net_emo = ie.read_network(model='FP16/emotions-recognition-retail-0003.xml', weights='FP16/emotions-recognition-retail-0003.bin') exec_net_emo = ie.load_network(network=net_emo, device_name="MYRIAD") # 入出力設定(感情) input_blob_emo = net.input_info['data'].name out_blob_emo = next(iter(net_emo.outputs)) n_emo, c_emo, h_emo, w_emo = net.input_info[input_blob_emo].input_data.shape # 感情ラベル label = ('neutral', 'happy', 'sad', 'surprise', 'anger') # カメラ準備 cap = cv2.VideoCapture(0) # メインループ while True: ret, frame = cap.read() # Reload on error if ret == False: continue # 入力データフォーマットへ変換 img = cv2.resize(frame, (w, h)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['detection_out'] out = np.squeeze(out) #サイズ1の次元を全て削除 # タイトル描画 cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA) # 検出されたすべての顔領域に対して1つずつ処理 for detection in out: # conf値の取得 confidence = float(detection[2]) # バウンディングボックス座標を入力画像のスケールに変換 xmin = int(detection[3] * frame.shape[1]) ymin = int(detection[4] * frame.shape[0]) xmax = int(detection[5] * frame.shape[1]) ymax = int(detection[6] * frame.shape[0]) # conf値が0.5より大きい場合のみバウンディングボックス表示 if confidence > 0.5: # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax > frame.shape[1]: xmax = frame.shape[1] if ymax > frame.shape[0]: ymax = frame.shape[0] # 顔領域のみ切り出し frame_face = frame[ ymin:ymax, xmin:xmax ] # 入力データフォーマットへ変換 img = cv2.resize(frame_face, (64, 64)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net_emo.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['prob_emotion'] out = np.squeeze(out) #不要な次元の削減 # 出力値が最大のインデックスを得る emotion = label[np.argmax(out)] # バウンディングボックス(顔領域)表示 cor = (255, 255, 0) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), cor, thickness=2) cv2.putText(frame, emotion, (xmin, ymin-4), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.6, color=cor, lineType=cv2.LINE_AA) # 画像表示 cv2.imshow('frame', frame) # 何らかのキーが押されたら終了 key = cv2.waitKey(1) if key != -1: break # 終了処理 cap.release() cv2.destroyAllWindows()
pi@raspberrypi:~/workspace $ python3 01_face_demo4a.py *** Emotion Recognition *** 4.5.1-openvino OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2
学習済みモデル「emotions-recognition-retail-0003」を使って、顔の特徴点抽出をおこなう。
学習済みモデルのファイル名がお手本から変更になっている。
「facial-landmarks-35-adas-0001」→「facial-landmarks-35-adas-0002」
Inputs Blob in the format [BxCxHxW] where: B - batch size C - number of channels H - image height W - image width
Outputs The net outputs a blob with the shape: [1, 70], containing row-vector of 70 floating point values for 35 landmarks' normed coordinates in the form (x0, y0, x1, y1, ..., x34, y34). Output layer name in Inference Engine format: align_fc3 Output layer name in Caffe* format: align_fc3
~workspace $ vi 01_face_demo5.py # -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-5 2021.01.18 update 2021.02.10 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (12, 12) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) #plt.figure(figsize=figsize) #plt.imshow(frame) #plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame) # Face Detetion # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version print("OpenVINO inference_engine:", get_version()) # モデルの読み込み ie = IECore() fp_path = "./FP16/" model_xml = fp_path + "face-detection-adas-0001.xml" model_bin = fp_path + "face-detection-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 3. Configure input & output input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 5. Create Async Request in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) # res's shape: [1, 1, 200, 7] # 6. Receive Async Request if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5 # 7. draw faces frame = init_frame.copy() for i, face in enumerate(faces[0][0]): box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") area = ((ymax - ymin) * (xmax - xmin)) print(i, frame_w, frame_h, xmin, ymin, xmax, ymax, np.sqrt(area)/np.sqrt(frame_w*frame_h)*100) """ xmin = int(face[3] * frame_w) ymin = int(face[4] * frame_h) xmax = int(face[5] * frame_w) ymax = int(face[6] * frame_h) """ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, str(i), (xmin + 3, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) #plt.figure(figsize=figsize) #plt.imshow(frame) #faces # landmark # 1.モデルの読み込み model_xml = fp_path + "facial-landmarks-35-adas-0002.xml" model_bin = fp_path + "facial-landmarks-35-adas-0002.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 2. Configure input & putput input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 4. Create Async Request frame = init_frame.copy() for face in faces[0][0]: box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") face_frame = frame[ymin:ymax, xmin:xmax] in_frame = cv2.resize(face_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0 ,inputs={input_blob: in_frame}) # 5. Get Response if exec_net.requests[0].wait(-1) == 0: if net.outputs[out_blob].shape == [1, 10, 1, 1]: # for landmarks-regression_retail-0009 normed_landmarks = exec_net.requests[0].outputs[out_blob].reshape(1, 10)[0] else: # for facial-landmarks-35-adas-0001 normed_landmarks = exec_net.requests[0].outputs[out_blob][0] # 6. draw Response for i in range(int(normed_landmarks.size / 2)): normed_x = normed_landmarks[2 * i] normed_y = normed_landmarks[2 * i + 1] x_lm = xmin + face_frame.shape[1] * normed_x y_lm = ymin + face_frame.shape[0] * normed_y cv2.circle(frame, (int(x_lm), int(y_lm)), 1 + int(0.03 * face_frame.shape[1]), (255, 255, 0), -1) plt.figure(figsize=figsize) plt.imshow(frame) normed_landmarks plt.show()
pi@raspberrypi:~/workspace $ python3 01_face_demo5.py Original Shape:(1155, 1500) frame_h, frame_w:(493, 640) OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 input blob: name="data", output blob: name="detection_out" input.shape:[1, 3, 384, 672] output.shape:[1, 1, 200, 7] 01_face_demo5.py:76: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property. res = exec_net.requests[0].outputs[out_blob] 0 640 493 221 67 305 196 18.531947088212956 1 640 493 431 100 519 207 17.275068505983118 2 640 493 514 250 600 367 17.857831050198797 3 640 493 39 111 142 229 19.62664961865753 input blob: name="data", output blob: name="align_fc3" input.shape:[1, 3, 60, 60] output.shape:[1, 70]※ ワーニングエラーが残っているが後日検証する。
~/workspace $ vi 01_face_demo5a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Facial Landmarks Recognission ** step-5 ## 2021.01.18 Masahiro Izutsu ## ## 2021.02.10 warning error ##------------------------------------------ import cv2 import numpy as np # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version # タイトル title = 'Facial Landmarks Recognission' print('*** {} ***'.format(title)) # バージョン情報 print(cv2.__version__) print("OpenVINO inference_engine:", get_version()) # モデルの読み込み (顔検出)face-detection-adas-0001 ie = IECore() net = ie.read_network(model='FP16/face-detection-adas-0001.xml', weights='FP16/face-detection-adas-0001.bin') exec_net = ie.load_network(network=net, device_name="MYRIAD") # 入出力設定(顔検出) input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape # モデルの読み込み(顔の特徴点抽出)facial-landmarks-35-adas-0002 net_mark = ie.read_network(model='FP16/facial-landmarks-35-adas-0002.xml', weights='FP16/facial-landmarks-35-adas-0002.bin') exec_net_mark = ie.load_network(network=net_mark, device_name="MYRIAD") # 入出力設定(顔の特徴点抽出) input_blob_mark = net.input_info['data'].name out_blob_mark = next(iter(net_mark.outputs)) n_mark, c_mark, h_mark, w_mark = net.input_info[input_blob_mark].input_data.shape # カメラ準備 cap = cv2.VideoCapture(0) # メインループ while True: ret, frame = cap.read() # Reload on error if ret == False: continue # 入力データフォーマットへ変換 img = cv2.resize(frame, (w, h)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['detection_out'] out = np.squeeze(out) #サイズ1の次元を全て削除 # タイトル描画 cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA) # 検出されたすべての顔領域に対して1つずつ処理 for detection in out: # conf値の取得 confidence = float(detection[2]) # バウンディングボックス座標を入力画像のスケールに変換 xmin = int(detection[3] * frame.shape[1]) ymin = int(detection[4] * frame.shape[0]) xmax = int(detection[5] * frame.shape[1]) ymax = int(detection[6] * frame.shape[0]) # conf値が0.5より大きい場合のみバウンディングボックス表示 if confidence > 0.5: # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax > frame.shape[1]: xmax = frame.shape[1] if ymax > frame.shape[0]: ymax = frame.shape[0] # 顔領域のみ切り出し frame_face = frame[ ymin:ymax, xmin:xmax ] # 入力データフォーマットへ変換 img = cv2.resize(frame_face, (60, 60)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net_mark.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['align_fc3'] normed_landmarks = out[0] # 特徴点の描画 for i in range(int(normed_landmarks.size / 2)): normed_x = normed_landmarks[2 * i] normed_y = normed_landmarks[2 * i + 1] x_lm = xmin + frame_face.shape[1] * normed_x y_lm = ymin + frame_face.shape[0] * normed_y cv2.circle(frame, (int(x_lm), int(y_lm)), 1 + int(0.03 * frame_face.shape[1]), (255, 255, 0), -1) # 画像表示 cv2.imshow('frame', frame) # 何らかのキーが押されたら終了 key = cv2.waitKey(1) if key != -1: break # 終了処理 cap.release() cv2.destroyAllWindows()
pi@raspberrypi:~/workspace $ python3 01_face_demo5a.py *** Facial Landmarks Recognission *** 4.5.1-openvino OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2
学習済みモデル「head-pose-estimation-adas-0001」を使って、頭のポーズの推定をおこなう。
Inputs name: "data" , shape: [1x3x60x60] - An input image in [1xCxHxW] format. Expected color order is BGR.
Outputs Output layer names in Inference Engine format: name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees). name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees). name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees). Output layer names in Caffe* format: name: "fc_y", shape: [1, 1] - Estimated yaw (in degrees). name: "fc_p", shape: [1, 1] - Estimated pitch (in degrees). name: "fc_r", shape: [1, 1] - Estimated roll (in degrees). Each output contains one float value that represents value in Tait-Bryan angles (yaw, pitch or roll).
~workspace $ vi 01_face_demo6.py # -*- coding: utf-8 -*- ## OpenVINO Interactive face detecion demo ## step-6 2021.01.18 update 2021.02.10 import requests from PIL import Image from io import BytesIO import cv2 import numpy as np import math import matplotlib.pyplot as plt # plot setting rows = 6 columns = 6 plt.rcParams['figure.figsize'] = (18.0, 18.0) figsize = (12, 12) # Read Image image_url = "https://how-old.net/Images/faces2/main001.jpg" response = requests.get(image_url) frame = np.array(Image.open(BytesIO(response.content))) print("Original Shape:{}".format(frame.shape[:2])) # resize image with keeping frame width scale = 640 / frame.shape[1] frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale) frame_h, frame_w = frame.shape[:2] init_frame = frame.copy() print("frame_h, frame_w:{}".format(frame.shape[:2])) #plt.figure(figsize=figsize) #plt.imshow(frame) #plt.show() # for an image on local store # OpenCV uses BGR as its default color (matplotlib uses RGB) # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # plt.imshow(face_frame) # Face Detetion # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version print("OpenVINO inference_engine:", get_version()) # モデルの読み込み ie = IECore() fp_path = "./FP16/" model_xml = fp_path + "face-detection-adas-0001.xml" model_bin = fp_path + "face-detection-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 3. Configure input & output input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 5. Create Async Request in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) # res's shape: [1, 1, 200, 7] # 6. Receive Async Request if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5 # 7. draw faces frame = init_frame.copy() for i, face in enumerate(faces[0][0]): box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") area = ((ymax - ymin) * (xmax - xmin)) print(i, frame_w, frame_h, xmin, ymin, xmax, ymax, np.sqrt(area)/np.sqrt(frame_w*frame_h)*100) """ xmin = int(face[3] * frame_w) ymin = int(face[4] * frame_h) xmax = int(face[5] * frame_w) ymax = int(face[6] * frame_h) """ cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, str(i), (xmin + 3, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) #plt.figure(figsize=figsize) #plt.imshow(frame) #faces # Head Pose Estimation # head pose # 0. define functions def build_camera_matrix(center_of_face, focal_length): cx = int(center_of_face[0]) cy = int(center_of_face[1]) camera_matrix = np.zeros((3, 3), dtype='float32') camera_matrix[0][0] = focal_length camera_matrix[0][2] = cx camera_matrix[1][1] = focal_length camera_matrix[1][2] = cy camera_matrix[2][2] = 1 return camera_matrix def draw_axes(frame, center_of_face, yaw, pitch, roll, scale, focal_length): yaw *= np.pi / 180.0 pitch *= np.pi / 180.0 roll *= np.pi / 180.0 cx = int(center_of_face[0]) cy = int(center_of_face[1]) Rx = np.array([[1, 0, 0], [0, math.cos(pitch), -math.sin(pitch)], [0, math.sin(pitch), math.cos(pitch)]]) Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)], [0, 1, 0], [math.sin(yaw), 0, math.cos(yaw)]]) Rz = np.array([[math.cos(roll), -math.sin(roll), 0], [math.sin(roll), math.cos(roll), 0], [0, 0, 1]]) #R = np.dot(Rz, Ry, Rx) #ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/ #R = np.dot(Rz, np.dot(Ry, Rx)) R = Rz @ Ry @ Rx print(R) camera_matrix = build_camera_matrix(center_of_face, focal_length) xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1) yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1) zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1) zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1) o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1) o[2] = camera_matrix[0][0] xaxis = np.matmul(R, xaxis) + o yaxis = np.matmul(R, yaxis) + o zaxis = np.matmul(R, zaxis) + o zaxis1 = np.matmul(R, zaxis1) + o xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2) xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2) xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy p1 = (int(xp1), int(yp1)) xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, p1, p2, (255, 0, 0), 2) cv2.circle(frame, p2, 3, (255, 0, 0), 2) return frame # 1.モデルの読み込み model_xml = fp_path + "head-pose-estimation-adas-0001.xml" model_bin = fp_path + "head-pose-estimation-adas-0001.bin" net = ie.read_network(model=model_xml, weights=model_bin) exec_net = ie.load_network(network=net, device_name="MYRIAD") # 2. Configure input & putput input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape print('input blob: name="{}", output blob: name="{}"'.format(input_blob, out_blob)) print("input.shape:{}\noutput.shape:{}".format(net.input_info[input_blob].input_data.shape, net.outputs[out_blob].shape)) # 4. Create Async Request scale = 50 focal_length = 950.0 frame = init_frame.copy() for face in faces[0][0]: box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h]) (xmin, ymin, xmax, ymax) = box.astype("int") face_frame = frame[ymin:ymax, xmin:xmax] in_frame = cv2.resize(face_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) exec_net.start_async(request_id=0 ,inputs={input_blob: in_frame}) if exec_net.requests[0].wait(-1) == 0: yaw = .0 # Axis of rotation: y pitch = .0 # Axis of rotation: x roll = .0 # Axis of rotation: z # Each output contains one float value that represents value in Tait-Bryan angles (yaw, pitсh or roll). yaw = exec_net.requests[0].outputs['angle_y_fc'][0][0] pitch = exec_net.requests[0].outputs['angle_p_fc'][0][0] roll = exec_net.requests[0].outputs['angle_r_fc'][0][0] print("yaw:{:f}, pitch:{:f}, roll:{:f}".format(yaw, pitch, roll)) center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0) draw_axes(frame, center_of_face, yaw, pitch, roll, scale, focal_length) plt.figure(figsize=figsize) plt.imshow(frame) plt.show()
pi@raspberrypi:~/workspace $ python3 01_face_demo6.py Original Shape:(1155, 1500) frame_h, frame_w:(493, 640) OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 input blob: name="data", output blob: name="detection_out" input.shape:[1, 3, 384, 672] output.shape:[1, 1, 200, 7] 01_face_demo6.py:77: DeprecationWarning: 'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property. res = exec_net.requests[0].outputs[out_blob] 0 640 493 221 67 305 196 18.531947088212956 1 640 493 431 100 519 207 17.275068505983118 2 640 493 514 250 600 367 17.857831050198797 3 640 493 39 111 142 229 19.62664961865753 input blob: name="data", output blob: name="angle_p_fc" input.shape:[1, 3, 60, 60] output.shape:[1, 1] yaw:-5.628906, pitch:22.250000, roll:-1.697266 [[ 0.99474143 0.06453661 0.07952678] [-0.02947572 0.92403442 -0.38117131] [-0.09808499 0.37682279 0.92107759]] yaw:-12.828125, pitch:8.781250, roll:-11.359375 [[ 0.95594049 0.22788476 0.1850576 ] [-0.19204616 0.96224301 -0.1928903 ] [-0.22202715 0.14885205 0.96361144]] yaw:-5.777344, pitch:18.953125, roll:-3.296875 [[ 0.99327395 0.08703231 0.07636905] [-0.05721746 0.94233904 -0.32973217] [-0.10066289 0.32314473 0.94098059]] yaw:4.722656, pitch:25.234375, roll:15.250000 [[ 0.96151178 -0.27179476 0.04028278] [ 0.2621382 0.86348661 -0.43089957] [ 0.0823326 0.42487467 0.90150034]]※ ワーニングエラーが残っているが後日検証する。
~/workspace $ vi 01_face_demo6a.py # -*- coding: utf-8 -*- ##------------------------------------------ ## OpenVINO Interactive face detecion demo ## ** Head Pose Estimation ** step-6 ## 2021.01.18 Masahiro Izutsu ## ## 2021.02.10 warning error ##------------------------------------------ import cv2 import numpy as np import math # モジュール読み込み from openvino.inference_engine import IECore from openvino.inference_engine import get_version # タイトル title = 'Head Pose Estimation' print('*** {} ***'.format(title)) # 関数定義 def build_camera_matrix(center_of_face, focal_length): cx = int(center_of_face[0]) cy = int(center_of_face[1]) camera_matrix = np.zeros((3, 3), dtype='float32') camera_matrix[0][0] = focal_length camera_matrix[0][2] = cx camera_matrix[1][1] = focal_length camera_matrix[1][2] = cy camera_matrix[2][2] = 1 return camera_matrix def draw_axes(frame, center_of_face, yaw, pitch, roll, scale, focal_length): yaw *= np.pi / 180.0 pitch *= np.pi / 180.0 roll *= np.pi / 180.0 cx = int(center_of_face[0]) cy = int(center_of_face[1]) Rx = np.array([[1, 0, 0], [0, math.cos(pitch), -math.sin(pitch)], [0, math.sin(pitch), math.cos(pitch)]]) Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)], [0, 1, 0], [math.sin(yaw), 0, math.cos(yaw)]]) Rz = np.array([[math.cos(roll), -math.sin(roll), 0], [math.sin(roll), math.cos(roll), 0], [0, 0, 1]]) #R = np.dot(Rz, Ry, Rx) #ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/ #R = np.dot(Rz, np.dot(Ry, Rx)) R = Rz @ Ry @ Rx print(R) camera_matrix = build_camera_matrix(center_of_face, focal_length) xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1) yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1) zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1) zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1) o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1) o[2] = camera_matrix[0][0] xaxis = np.matmul(R, xaxis) + o yaxis = np.matmul(R, yaxis) + o zaxis = np.matmul(R, zaxis) + o zaxis1 = np.matmul(R, zaxis1) + o xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2) xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2) xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy p1 = (int(xp1), int(yp1)) xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, p1, p2, (255, 0, 0), 2) cv2.circle(frame, p2, 3, (255, 0, 0), 2) return frame # バージョン情報 print(cv2.__version__) print("OpenVINO inference_engine:", get_version()) # モデルの読み込み (顔検出)face-detection-adas-0001 ie = IECore() net = ie.read_network(model='FP16/face-detection-adas-0001.xml', weights='FP16/face-detection-adas-0001.bin') exec_net = ie.load_network(network=net, device_name="MYRIAD") # 入出力設定(顔検出) input_blob = net.input_info['data'].name out_blob = next(iter(net.outputs)) n, c, h, w = net.input_info[input_blob].input_data.shape # モデルの読み込み(頭のポーズの推定)head-pose-estimation-adas-0001 net_pose = ie.read_network(model='FP16/head-pose-estimation-adas-0001.xml', weights='FP16/head-pose-estimation-adas-0001.bin') exec_net_pose = ie.load_network(network=net_pose, device_name="MYRIAD") # 入出力設定(頭のポーズの推定) input_blob_pose = net.input_info['data'].name out_blob_pose = next(iter(net_pose.outputs)) n_pose, c_pose, h_pose, w_pose = net.input_info[input_blob_pose].input_data.shape # カメラ準備 cap = cv2.VideoCapture(0) scale = 50 focal_length = 950.0 # メインループ while True: ret, frame = cap.read() # Reload on error if ret == False: continue # 入力データフォーマットへ変換 img = cv2.resize(frame, (w, h)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し out = out['detection_out'] out = np.squeeze(out) #サイズ1の次元を全て削除 # タイトル描画 cv2.putText(frame, title, (10, 30), cv2.FONT_HERSHEY_DUPLEX, fontScale=0.8, color=(200, 200, 0), lineType=cv2.LINE_AA) # 検出されたすべての顔領域に対して1つずつ処理 for detection in out: # conf値の取得 confidence = float(detection[2]) # バウンディングボックス座標を入力画像のスケールに変換 xmin = int(detection[3] * frame.shape[1]) ymin = int(detection[4] * frame.shape[0]) xmax = int(detection[5] * frame.shape[1]) ymax = int(detection[6] * frame.shape[0]) # conf値が0.5より大きい場合のみバウンディングボックス表示 if confidence > 0.5: # 顔検出領域はカメラ範囲内に補正する。特にminは補正しないとエラーになる if xmin < 0: xmin = 0 if ymin < 0: ymin = 0 if xmax > frame.shape[1]: xmax = frame.shape[1] if ymax > frame.shape[0]: ymax = frame.shape[0] # 顔領域のみ切り出し frame_face = frame[ ymin:ymax, xmin:xmax ] # 入力データフォーマットへ変換 img = cv2.resize(frame_face, (60, 60)) # サイズ変更 img = img.transpose((2, 0, 1)) # HWC > CHW img = np.expand_dims(img, axis=0) # 次元合せ # 推論実行 out = exec_net_pose.infer(inputs={'data': img}) # 出力から必要なデータのみ取り出し yaw = out['angle_y_fc'][0][0] pitch = out['angle_p_fc'][0][0] roll = out['angle_r_fc'][0][0] print("yaw:{:f}, pitch:{:f}, roll:{:f}".format(yaw, pitch, roll)) center_of_face = (xmin + frame_face.shape[1] / 2, ymin + frame_face.shape[0] / 2, 0) draw_axes(frame, center_of_face, yaw, pitch, roll, scale, focal_length) # 画像表示 cv2.imshow('frame', frame) # 何らかのキーが押されたら終了 key = cv2.waitKey(1) if key != -1: break # 終了処理 cap.release() cv2.destroyAllWindows()
pi@raspberrypi:~/workspace $ python3 01_face_demo6a.py *** Head Pose Estimation *** 4.5.1-openvino OpenVINO inference_engine: 2.1.2021.2.0-1877-176bdf51370-releases/2021/2 yaw:-1.632812, pitch:-14.703125, roll:-6.597656 [[ 0.99297412 0.10394989 0.05654047] [-0.11484986 0.96167918 0.24896316] [-0.0284941 -0.25370764 0.96686117]] yaw:13.187500, pitch:-26.187500, roll:-4.718750 [[ 0.9703286 0.17415912 -0.167723 ] [-0.08009523 0.88603071 0.45665559] [ 0.22813846 -0.42967217 0.87369026]]
classification3.py:15: DeprecationWarning: 'inputs' property of IENetwork class is deprecated. To access DataPtrs user need to use 'input_data' property of InputInfoPtr objects which can be accessed by 'input_info' property.
Classification3.py:15:DeprecationWarning:IENetworkクラスの 'inputs'プロパティは非推奨になりました。 DataPtrsにアクセスするには、ユーザーは、「input_info」プロパティでアクセスできるInputInfoPtrオブジェクトの「input_data」プロパティを使用する必要があります。
# 入力データと出力データのキーを取得 input_blob = next(iter(net.inputs))
# 入力データと出力データのキーを取得 input_blob = net.input_info['data'].name