# -*- coding: utf-8 -*-
##--------------------------------------------------
## Stable Diffusion with diffusers(050) Ver 0.06
##
## 2025.07.31 Masahiro Izutsu
##--------------------------------------------------
## sd_050.py 顔の崩れを修正する
## Ver 0.06 2025.07.31 sd_081 IP-Adapter 対応
# タイトル
title = 'Stable Diffusion with diffusers(050) Ver 0.06'
import warnings
warnings.simplefilter('ignore')
# インポート&初期設定
import os
import torch
from PIL import Image
from PIL import ImageDraw, ImageFilter
import face_recognition
from diffusers import StableDiffusionUpscalePipeline
from diffusers import StableDiffusionImg2ImgPipeline
from diffusers import logging
import my_logging
import sd_tools as sdt
logging.set_verbosity_error()
# 定数定義
DEF_MODEL_CNTL = 'control_v11p_sd15_inpaint_fp16.safetensors'
DEF_MODEL_BASE = 'SD1.5/beautifulRealistic_brav5.safetensors'
DEF_IMAGE_PATH = 'images/sd_050_test.jpg'
DEF_PROMPT = 'masterpiece, high quality, very_high_resolution, large_filesize, full color, an extremely cute face, woman, symmetrical, HDR, real, realistic'
DEF_NEG_PROMPT = 'lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, multiple legs, malformation'
#FACE_RECOGNITION_MODEL_ID = "hog" # 速度重視の場合
FACE_RECOGNITION_MODEL_ID = "cnn" # 精度重視の場合
UPSCALE_MODEL_ID = "stabilityai/stable-diffusion-x4-upscaler"
# コマンドライン定義
opt_list = [
['pros_sel','','sd_050'], # 0
['result_image', 'results/image_050.png', 'path to output image file'], # 1
['cpu', 'store_true', 'cpu mode'], # 2
['log', '3', 'Log level(-1/0/1/2/3/4/5) Default value is \'3\''], # 3
['model_dir', '/StabilityMatrix/Data/Models/StableDiffusion', 'Model directory'], # 4
['model_path', DEF_MODEL_BASE, 'Model Path'], # 5
['image_path', DEF_IMAGE_PATH, 'Sourcs image file path'], # 6
['max_size', 0, 'image max size (0=source)'], # 7
['prompt', DEF_PROMPT, 'Prompt text'], # 8
['seed', 12345678, 'Seed parameter (-1 = rundom)'], # 9
['width', 512, 'image size width'], # 10
['height', 512, 'image size height'], # 11
['step', 20, 'infer step'], # 12
['scale', 8.5, 'gaidanse scale'], # 13
['strength', 0.4, 'strength value'], # 15
['neg_prompt', DEF_NEG_PROMPT, 'Negative Prompt text'], # 16
]
# 画像確認
def image_log(pil_image, wait_s = -1):
if wait_s >= 0:
sdt.image_save2(pil_image, save_path = '', dispname = 'Check image', maxsize = 800, wait_s = wait_s)
# 画像を 512x512 アップスケール
def upscale(image, prompt, device):
if device == 'cpu':
pipeline = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID)
else:
pipeline = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID, torch_dtype = torch.float16)
pipeline.to(device)
low_image = image.convert("RGB")
low_image = low_image.resize((128, 128))
new_image = pipeline(prompt = prompt, image = low_image).images[0]
return new_image
# 顔検出
def face_detection(file_name, offset=20):
image = face_recognition.load_image_file(file_name)
#顔部分を検出
face_locs = face_recognition.face_locations(image, number_of_times_to_upsample = 1, model = FACE_RECOGNITION_MODEL_ID)
face_org_rects = []
face_rects = []
if len(face_locs) == 0:
return face_rects, face_org_rects # 検出できない
for face_loc in face_locs:
top, right, bottom, left = face_loc
face_org_rects.append((left, top, right, bottom))
# 範囲が狭いとモデルが顔を認識できない時があるため、検出範囲の矩形をoffset分広げる。
top -= offset
right += offset
bottom += offset
left -= offset
# 検出範囲を正方形にする
w = right - left
h = bottom - top
if w > h:
bottom += w-h
else:
right += h-w
face_rects.append((left, top, right, bottom))
return face_rects, face_org_rects
# 顔のスタイル変換
def style_change(model_path, image, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.4, seed = 0, device = 'cpu'):
if device == 'cpu':
pipeline = StableDiffusionImg2ImgPipeline.from_single_file(model_path)
else:
pipeline = StableDiffusionImg2ImgPipeline.from_single_file(model_path, torch_dtype = torch.float16)
pipeline.to(device)
generator = torch.Generator(device).manual_seed(seed)
new_image = pipeline(
prompt = prompt,
negative_prompt = neg_prompt,
image = image,
guidance_scale = guidance_scale,
strength = strength,
generator = generator
).images[0]
return new_image
# マスク作成
def create_mask(image_width, image_height, rect_width, rect_height, rect_x, rect_y, offset = 10):
image = Image.new('L', (image_width, image_height), 'black') # 8bit グレイスケール 黒の画像を作成
draw = ImageDraw.Draw(image)
# offset分大きい真っ白の矩形を描画
draw.rectangle([rect_x-offset, rect_y-offset, rect_x + rect_width + offset, rect_y + rect_height + offset], fill = 'white')
# offset分小さい真っ黒の矩形を描描画
draw.rectangle([rect_x+offset, rect_y+offset, rect_x + rect_width - offset, rect_y + rect_height - offset], fill = 'black')
return image
# 画像の顔修正する
def face_style_change(model_path, file_name, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.3, seed = 0, device = 'cpu', bUp = False):
face_rects, face_org_rects = face_detection(file_name, offset = 30)
if face_rects == [] or face_rects == []:
return None, None, None # 顔検出なし
face_rect = face_rects[0]
face_org_rect = face_org_rects[0]
left, top, right, bottom = face_rect
left_org, top_org, right_org, bottom_org = face_org_rect
w = right - left
h = bottom - top
#オリジナル画像から顔部分を切り出す
init_img = Image.open(file_name)
new_img = init_img.copy()
face = new_img.crop(face_rect)
# 顔をアップスケール
if bUp:
upscaled_face = upscale(face, prompt='face', device = device) # upscale
else:
upscaled_face = face.resize((512, 512)) # resize
image_log(upscaled_face, 1)
# スタイル変更
new_face = style_change(model_path, upscaled_face, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device)
image_log(new_face, 1)
# 元の画像に貼り付け
new_img.paste(new_face.resize((w, h)), (left, top))
# image_log(new_img, 0)
# 顔の領域
draw = ImageDraw.Draw(init_img)
rectcolor = (0, 0, 255) # 矩形の色(RGB)
linewidth = 2 # 線の太さ
draw.rectangle([(left_org, top_org), (right_org, bottom_org)], outline=rectcolor, width=linewidth)
# image_log(init_img, 0)
# エッジ部分の修正のためのマスクを作成
image_width, image_height = new_img.size
mask = create_mask(image_width, image_height, h, w, left, top, offset=8)
# image_log(mask, 0)
return init_img, new_img, mask
# 画像生成
def image_generation(model_path, image_path, prompt, seed, num_inference_steps=20, width=512, height=512, guidance_scale=8.5, strength=0.4, neg_prompt = '', device='cpu'):
work_path = sdt.get_work_path(logger = None)
os.makedirs(work_path, exist_ok = True) # 作業フォルダ作成
src_path, mask_path = sdt.get_source_mask_path(image_path, logger = None) # ソース/マスク画像作成
image, new_img, mask = face_style_change(model_path, image_path, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device, bUp = False)
if image is None or new_img is None or mask is None:
return None # Error
# マスクのエッジをソフトフォーカスにして元の画像と合成しエッジを修正
mask = mask.filter(ImageFilter.GaussianBlur(10))
new_img = Image.composite(image, new_img, mask)
sdt.image_save2(image, save_path = src_path, dispname = src_path, maxsize = 800, wait_s = 1)
sdt.image_save2(mask, save_path = mask_path, dispname = '', maxsize = 800, wait_s = 1)
return new_img
# ** main関数 **
def main(opt, logger = None):
# パラメータ設定
device = sdt._get_device(opt, logger)
result_image_path = sdt._get_result_image_path(opt, logger)
result_path = sdt._get_result_path(opt, logger)
prompt = sdt._get_prompt(opt, logger)
src_image = sdt._get_source_image(opt, logger)
model_path = sdt._get_model_path(opt, logger)
height, width = sdt._get_image_size(opt, logger)
seed = sdt._get_seed_value(opt, logger)
num_inference_steps = sdt._get_inference_steps(opt, logger)
guidance_scale = sdt._get_guidance_scale(opt, logger)
strength = sdt._get_strength(opt, logger)
neg_prompt = sdt._get_negative_prompt(opt, logger)
image_path = sdt._get_source_image_path(opt, logger)
# 出力フォルダ
os.makedirs(result_path, exist_ok = True)
# 画像生成
image = image_generation(model_path, image_path, prompt, seed, num_inference_steps, width, height, guidance_scale, strength, neg_prompt = neg_prompt, device = device)
if image is None:
logger.info(f'{sdt.RED}There is no face in the image !!{sdt.NOCOLOR}')
else:
sdt.image_save2(image, result_image_path, result_image_path)
logger.info(f'result_file: {result_image_path}')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
parser = sdt.parse_args(None, opt_list)
opt = parser.parse_args()
sdt._get_device(opt)
sdt.display_info(opt, title)
# アプリケーション・ログ設定
module = os.path.basename(__file__)
module_name = os.path.splitext(module)[0]
logger = my_logging.get_module_logger_sel(module_name, int(opt.log))
main(opt, logger)
logger.info('\nFinished.\n')
※ 上記ソースコードは表示の都合上、半角コード '}' が 全角 '}'になっていることに注意