# -*- coding: utf-8 -*-
##------------------------------------------
## StyleGAN3 cl;ass Ver 0.01
##
## 2024.09.14 Masahiro Izutsu
##------------------------------------------
## _stylegan3.py
import warnings
warnings.simplefilter('ignore')
import os
import shutil
from tqdm import tqdm
import dlib
import matplotlib.pyplot as plt
from skimage.transform import resize
import torch
from PIL import Image
import numpy as np
import cv2
import time
import torchvision.transforms as transforms
from torch.cuda import is_available
from utils.common import tensor2im
from utils.inference_utils import run_on_batch
from editing.interfacegan.face_editor import FaceEditor
from editing.styleclip_global_directions import edit as styleclip_edit
from utils.inference_utils import load_encoder, get_average_image
from utils.alignment_utils import align_face, crop_face, get_stylegan_transform
import my_logging
import my_imagetool
class StyleGAN3:
gpu_d = is_available() # GPU 確認
SHAPE_PREDICTOR = './pretrained_models/shape_predictor_68_face_landmarks.dat'
DEF_IMAGE = './edit/pic/001.jpg'
RESULT_PATH = './results'
PIC_DIR = './edit/pic'
ALIGN_DIR = './edit/align'
CROP_DIR = './edit/crop'
INVERT_DIR = './edit/invert'
LATENTS_DIR = './edit/latents'
TMP_IMAGE_DIR = './tmpimg'
OUT_MOVIE = './tmpimg/output.mp4'
img_transforms = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
# 初期化
# out_path: 出力ファイル・パス
# logsel: ログ出力選択
def __init__(self, src, result, model, logsel = 3):
self.logger = my_logging.get_module_logger_sel(__name__, logsel)
self.align_dir = self.ALIGN_DIR
self.crop_dir = self.CROP_DIR
self.result_path = result
self.pic_dir = ''
self.source_image = ''
self.model_sel = ''
self.set_param(src, model)
self.reset_folder(self.TMP_IMAGE_DIR)
self.reset_folder(self.TMP_IMAGE_DIR + '/img')
os.makedirs(self.result_path, exist_ok = True)
def set_param(self, src, model):
if self.model_sel != model:
self.model_sel = model
self.net = None
self.opts = None
self.invert_dir = self.INVERT_DIR + '_' + self.model_sel
self.latens_dir = self.LATENTS_DIR + '_' + self.model_sel
self.experiment_type = 'restyle_pSp_ffhq' if self.model_sel == 'psp' else 'restyle_e4e_ffhq'
base_dir_pair = os.path.split(src)
if self.source_image != base_dir_pair[1] or self.pic_dir != base_dir_pair[0]:
self.pic_dir = base_dir_pair[0]
self.source_image = base_dir_pair[1]
def run_alignment(self, image_path):
predictor = dlib.shape_predictor(self.SHAPE_PREDICTOR)
detector = dlib.get_frontal_face_detector()
# self.logger.debug("Aligning image...")
aligned_image = align_face(filepath=str(image_path), detector=detector, predictor=predictor)
# self.logger.debug(f"Finished aligning image: {image_path}")
return aligned_image
def crop_image(self, image_path):
predictor = dlib.shape_predictor(self.SHAPE_PREDICTOR)
detector = dlib.get_frontal_face_detector()
# self.logger.debug("Cropping image...")
cropped_image = crop_face(filepath=str(image_path), detector=detector, predictor=predictor)
# self.logger.debug(f"Finished cropping image: {image_path}")
return cropped_image
def compute_transforms(self, aligned_path, cropped_path):
predictor = dlib.shape_predictor(self.SHAPE_PREDICTOR)
detector = dlib.get_frontal_face_detector()
# self.logger.debug("Computing landmarks-based transforms...")
res = get_stylegan_transform(str(cropped_path), str(aligned_path), detector, predictor)
# self.logger.debug("transforms Done!")
if res is None:
self.logger.error(f"Failed computing transforms on: {cropped_path}")
return
else:
rotation_angle, translation, transform, inverse_transform = res
return inverse_transform
def reset_folder(self, path):
if os.path.isdir(path):
shutil.rmtree(path)
os.makedirs(path,exist_ok=True)
# フォルダー内の一覧画像の作成
def folder_image(self, folder, save_path='', pixel_size=(256,256), dpi=64, xn=10):
files = os.listdir(folder)
files.sort()
n = len(files)
yn = n // xn + 1
if n < xn:
xn = n
# ピクセル → インチ変換
x_inch = pixel_size[0] / dpi
y_inch = pixel_size[1] / dpi
fig = plt.figure(figsize = (x_inch * xn, y_inch * yn + 0.2), dpi = dpi)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
for i, file in enumerate(files):
img = Image.open(folder+'/'+file)
images = np.asarray(img)
# 正方形にする
img_h, img_w = images.shape[:2]
if img_h != img_w:
images = my_imagetool.frame_square(images)
images = resize(images, pixel_size)[..., :3]
ax = fig.add_subplot(yn, xn, i+1, xticks=[], yticks=[])
image_plt = np.array(images)
ax.imshow(image_plt)
ax.set_xlabel(folder+'/'+file, fontsize=15)
if len(save_path) > 0:
plt.savefig(save_path)
plt.close()
# terget画像の存在確認
def check_target_image(self, img_path, target_path):
files_image = [
f for f in os.listdir(img_path) if os.path.isfile(os.path.join(img_path, f))
]
self.logger.debug(f'{img_path} = {files_image}')
if not os.path.isdir(target_path):
return False
files_target = [
f for f in os.listdir(target_path) if os.path.isfile(os.path.join(target_path, f))
]
self.logger.debug(f'{target_path} = {files_target}')
flag = True
for img_path in files_image:
name, ext= os.path.splitext(img_path)
path = target_path + '/' + name + ext
if not os.path.isfile(path):
flag = False
self.logger.info(f' check_target_image = {flag} \'{target_path}\'')
return flag
# 画像を動画に変換
def make_movie(self, movie_path, rate, disp_f = True):
s_path = self.TMP_IMAGE_DIR + '/img/%3d.jpg'
command = f'ffmpeg -r {rate} -i {s_path} -vcodec libx264 -pix_fmt yuv420p {self.OUT_MOVIE} -loglevel quiet -y'
self.logger.info(f' {command}')
os.system(command)
# out_dir フォルダへ名前を付けてコピー
self.logger.info(f' making movie... → {movie_path}')
shutil.copy(self.OUT_MOVIE, movie_path)
if disp_f:
my_imagetool.image2disp(movie_path)
## align 画像一覧ファイル名の取得
def get_align_filename(self):
base_dir_pair = os.path.split(self.pic_dir)
path = self.result_path + '/' + base_dir_pair[1] + '_align_crop.jpg'
msg = 'pic - align - crop'
return path, msg
## invert 画像一覧ファイル名の取得
def get_invert_filename(self):
base_dir_pair = os.path.split(self.pic_dir)
path = self.result_path + '/' + self.model_sel + '-' + base_dir_pair[1] + '_crop_invert.jpg'
msg = 'crop - invert'
return path, msg
## align & crop 画像の作成
def align_images(self, image_dir):
self.pic_dir = image_dir
self.reset_folder(self.align_dir)
self.reset_folder(self.crop_dir)
files = sorted(os.listdir(self.pic_dir))
for i, file in enumerate(tqdm(files)):
input_image = self.run_alignment(self.pic_dir + '/' + file)
cropped_image = self.crop_image(self.pic_dir + '/' + file)
name = os.path.splitext(file)[0]
input_image.save(self.align_dir + '/' + name + '.jpg')
cropped_image.save(self.crop_dir + '/' + name + '.jpg')
# invert 画像の作成
def invert_images(self):
self.load_encoder()
self.reset_folder(self.invert_dir)
self.reset_folder(self.latens_dir)
avg_image = get_average_image(self.net)
files = sorted(os.listdir(self.align_dir))
for file in tqdm(files):
input_image = Image.open(self.align_dir+ '/' + file)
aligned_path = self.align_dir + '/' + file
cropped_path = self.crop_dir + '/' + file
landmarks_transform = self.compute_transforms(aligned_path = aligned_path, cropped_path = cropped_path)
self.opts.n_iters_per_batch = 3
self.opts.resize_outputs = False # generate outputs at full resolution
transformed_image = self.img_transforms(input_image)
with torch.no_grad():
tic = time.time()
result_batch, result_latents = run_on_batch(
inputs = transformed_image.unsqueeze(0).cuda().float(),
net = self.net,
opts = self.opts,
avg_image = avg_image,
landmarks_transform = torch.from_numpy(landmarks_transform).cuda().float())
toc = time.time()
#print('Inference took {:.4f} seconds.'.format(toc - tic))
result_tensors = result_batch[0]
final_rec = tensor2im(result_tensors[-1]) #.resize(resize_amount)
final_rec.save(self.invert_dir + '/' + file)
name = os.path.splitext(file)[0]
np.save(self.latens_dir + '/' + name, result_latents[0][-1])
## align 画像一覧作成
def make_align(self, disp_f=True):
path, msg = self.get_align_filename()
path0 = self.TMP_IMAGE_DIR + '/tmp_pic.jpg'
path1 = self.TMP_IMAGE_DIR + '/tmp_align.jpg'
path2 = self.TMP_IMAGE_DIR + '/tmp_crop.jpg'
# フォルダ内の画像一覧作成
self.folder_image(self.pic_dir, save_path = path0)
self.folder_image(self.align_dir, save_path = path1)
self.folder_image(self.crop_dir, save_path = path2)
images = []
images.append(cv2.imread(path0))
images.append(cv2.imread(path1))
images.append(cv2.imread(path2))
h, w = images[0].shape[:2]
ds_image = my_imagetool.make_tileimage(images, xmax = w, ymax = h * 3)
my_imagetool.image_disp(ds_image, winname = msg, dispf = disp_f, save_path = path, maxsize = 1024, wait_s = 2)
## invert 画像一覧作成
def make_invert(self, disp_f=True):
path, msg = self.get_invert_filename()
path0 = self.TMP_IMAGE_DIR + '/tmp_crop.jpg'
path1 = self.TMP_IMAGE_DIR + '/tmp_invert.jpg'
# フォルダ内の画像一覧作成
self.folder_image(self.crop_dir, save_path = path0)
self.folder_image(self.invert_dir, save_path = path1)
images = []
images.append(cv2.imread(path0))
images.append(cv2.imread(path1))
h, w = images[0].shape[:2]
ds_image = my_imagetool.make_tileimage(images, xmax = w, ymax = h * 2)
my_imagetool.image_disp(ds_image, winname = msg, dispf = disp_f, save_path = path, maxsize = 1024, wait_s = 2)
## エンコーダ・ロード
def load_encoder(self):
if self.net == None:
model_path = f'./pretrained_models/{self.experiment_type}.pt'
self.net, self.opts = load_encoder(checkpoint_path=model_path)
## infer_path 取得
def get_infer_path(self,direction, min_value, max_value):
infer_path = f'{self.result_path}/{self.model_sel}-gan_{direction}_{min_value}_{max_value}_{self.source_image}'
msg = f'interFaceGAN> {infer_path}'
movie_path = infer_path[:-3] + 'mp4'
return infer_path, movie_path, msg
## infer_clip_path 取得
def get_infer_clip_path(self, neutral_text, target_text, alpha, beta):
neutral = neutral_text.replace(' ', '~')
target = target_text.replace(' ', '~')
infer_path = f'{self.result_path}/{self.model_sel}-clip_{neutral}_{target}_{int(alpha)}_{int(beta)}_{self.source_image}'
msg = f'StyleCLIP> {infer_path}'
infer_path_a = infer_path[:-4] + '_a' + infer_path[-4:]
return infer_path, infer_path_a, msg
## interFaceGANによる編集
## in edit_direction: 'age', 'smile', 'pose', 'Male'
## min_value: min:-10, max:10, step:1
## max_value: min:-10, max:10, step:1
def edit_interface_gan(self, edit_direction, min_value, max_value, disp_f=True):
self.load_encoder()
self.reset_folder(self.TMP_IMAGE_DIR)
self.reset_folder(self.TMP_IMAGE_DIR + '/img')
name = os.path.splitext(self.source_image)[0] + '.npy'
result_latents_ = np.load(self.latens_dir + '/' + name)
aligned_path = self.align_dir + '/' + self.source_image
cropped_path = self.crop_dir + '/' + self.source_image
landmarks_transform = self.compute_transforms(aligned_path = aligned_path, cropped_path = cropped_path)
editor = FaceEditor(stylegan_generator = self.net.decoder, generator_type = "aligned")
self.logger.info(f"<interFaceGAN> Performing edit for {edit_direction}...")
input_latent = torch.from_numpy(result_latents_).unsqueeze(0).cuda()
edit_images, edit_latents = editor.edit(latents = input_latent,
direction = edit_direction,
factor_range = (min_value, max_value),
user_transforms = landmarks_transform,
apply_user_transformations = True)
# 結果出力
def prepare_edited_result(edit_images):
if type(edit_images[0]) == list:
edit_images = [image[0] for image in edit_images]
i = 0
for image in edit_images:
o_path = self.TMP_IMAGE_DIR + '/img/' + str(i).zfill(3) + '.jpg'
self.logger.debug(f' Image out ... {o_path}')
image.resize((512, 512)).save(o_path)
i = i + 1
res = np.array(edit_images[0].resize((512, 512)))
for image in edit_images[1:]:
res = np.concatenate([res, image.resize((512, 512))], axis=1)
res = Image.fromarray(res).convert("RGB")
return res
infer_path, movie_path, msg = self.get_infer_path(edit_direction, min_value, max_value)
res = prepare_edited_result(edit_images)
res.save(infer_path)
self.logger.info(f'<interFaceGAN> result image → {infer_path}')
if disp_f:
my_imagetool.image2disp(infer_path, winname = msg, maxsize = 1000)
self.make_movie(movie_path, rate = 5, disp_f = disp_f)
## StyleCLIPによる編集
## in neutral_text: text string
## target_text: text string
## alpha: min:-5, max:5, step:0.5 (x10)
## beta: min:-1, max:1, step:0.1 (x100)
def edit_styleclip(self, neutral_text, target_text, alpha, beta, disp_f=True):
self.load_encoder()
styleclip_args = styleclip_edit.EditConfig()
global_direction_calculator = styleclip_edit.load_direction_calculator(stylegan_model = self.net.decoder, opts = styleclip_args)
opts = styleclip_edit.EditConfig()
opts.alpha_min = alpha / 10
opts.alpha_max = alpha / 10
opts.num_alphas = 1
opts.beta_min = beta / 100
opts.beta_max = beta / 100
opts.num_betas = 1
opts.neutral_text = neutral_text
opts.target_text = target_text
# 推論実行
name = os.path.splitext(self.source_image)[0] + '.npy'
result_latents_ = np.load(self.latens_dir + '/' + name)
aligned_path = self.align_dir + '/' + self.source_image
cropped_path = self.crop_dir + '/' + self.source_image
landmarks_transform = self.compute_transforms(aligned_path = aligned_path, cropped_path = cropped_path)
input_transforms = torch.from_numpy(landmarks_transform).cpu().numpy()
self.logger.info(f'<StyleCLIP> Performing edit for: "{opts.target_text}"...')
edit_res, edit_latent = styleclip_edit.edit_image(latent = result_latents_,
landmarks_transform = input_transforms,
stylegan_model = self.net.decoder,
global_direction_calculator = global_direction_calculator,
opts = opts,
image_name = None,
save = False)
input_image = Image.open(self.invert_dir + '/' + self.source_image)
transformed_image = self.img_transforms(input_image)
# 結果出力
infer_clip_path, infer_clip_path_a, msg = self.get_infer_clip_path(neutral_text, target_text, alpha, beta)
input_im = tensor2im(transformed_image).resize((512, 512))
edited_im = tensor2im(edit_res[0]).resize((512, 512))
edit_coupled = np.concatenate([np.array(input_im), np.array(edited_im)], axis=1)
edit_coupled = Image.fromarray(edit_coupled)
edited_im.save(infer_clip_path)
self.logger.info(f'<StyleCLIP> result image → {infer_clip_path}')
edit_coupled.save(infer_clip_path_a)
self.logger.info(f'<StyleCLIP> result image → {infer_clip_path_a}')
if disp_f:
my_imagetool.image2disp(infer_clip_path, winname = msg, maxsize = 1000)
#-----Test routine-----
# $ python _stylegan3.py
#
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--model', type = str, default='psp', choices=['psp', 'e4e'], help = 'encoder type \'psp / e4e\'')
parser.add_argument("--source_image", default=StyleGAN3.DEF_IMAGE, help="path to source image")
parser.add_argument("--result_path", default=StyleGAN3.RESULT_PATH, help="path to output")
parser.add_argument("--align", action="store_false", help="make align image flag")
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
parser.add_argument('--log', type = int, metavar = 'LOG', default = '3', help = 'Log level(-1/0/1/2/3/4/5) Default value is \'3\'')
opt = parser.parse_args()
gan = StyleGAN3(opt.source_image, opt.result_path, opt.model, logsel = opt.log)
if opt.cpu:
gan.gpu_d = False
print(f' GPU mode: {gan.gpu_d}')
print(f' インスタンス変数:\t gan.pic_dir = {gan.pic_dir}')
print(f' インスタンス変数:\t gan.source_image = {gan.source_image}')
print(f' クラス変数:\t\t gan.OUT_MOVIE = {gan.OUT_MOVIE}')
print(f' インスタンス変数:\t gan.invert_dir = {gan.invert_dir}')
# align & crop 画像の作成
flg = gan.check_target_image(gan.pic_dir, gan.align_dir) and opt.align
if not flg and gan.gpu_d:
gan.align_images(gan.pic_dir)
gan.make_align(disp_f = False)
path, msg = gan.get_align_filename()
my_imagetool.image2disp(path, winname = msg, maxsize = 1024)
# invert 画像作成
flg = gan.check_target_image(gan.pic_dir, gan.invert_dir) and opt.align
if not flg and gan.gpu_d:
gan.invert_images()
gan.make_invert(disp_f = False)
path, msg = gan.get_invert_filename()
my_imagetool.image2disp(path, winname = msg, maxsize = 1000)
## interFaceGANによる編集
edit_direction = 'age'
min_value = -5
max_value = 5
if gan.gpu_d:
gan.edit_interface_gan(edit_direction, min_value, max_value, disp_f = False)
infer_path, movie_path, msg = gan.get_infer_path(edit_direction, min_value, max_value)
my_imagetool.image2disp(infer_path, winname = msg, maxsize = 1000)
my_imagetool.image2disp(movie_path)
## StyleCLIPによる編集
neutral_text = "a face"
target_text = "a smiling face"
alpha = 40
beta = 13
if gan.gpu_d:
gan.edit_styleclip(neutral_text, target_text, alpha, beta, disp_f = False)
infer_clip_path, infer_clip_path_a, msg = gan.get_infer_clip_path(neutral_text, target_text, alpha, beta)
my_imagetool.image2disp(infer_clip_path, winname = msg, maxsize = 1000)
my_imagetool.image2disp(infer_clip_path_a, winname = msg, maxsize = 1000)