# -*- coding: utf-8 -*-
##--------------------------------------------------
## Stable Diffusion with diffusers(050) Ver 0.06
##
## 2025.07.31 Masahiro Izutsu
##--------------------------------------------------
## sd_050.py 顔の崩れを修正する
## Ver 0.06 2025.07.31 sd_081 IP-Adapter 対応
# タイトル
title = 'Stable Diffusion with diffusers(050) Ver 0.06'
import warnings
warnings.simplefilter('ignore')
# インポート&初期設定
import os
import torch
from PIL import Image
from PIL import ImageDraw
import face_recognition
from diffusers import StableDiffusionUpscalePipeline
from diffusers import StableDiffusionImg2ImgPipeline
from diffusers import StableDiffusionInpaintPipeline
from diffusers import logging
import my_logging
import sd_tools as sdt
logging.set_verbosity_error()
# 定数定義
DEF_MODEL_CNTL = 'control_v11p_sd15_inpaint_fp16.safetensors'
DEF_MODEL_BASE = 'SD1.5/beautifulRealistic_brav5.safetensors'
DEF_IMAGE_PATH = 'images/sd_050_test.jpg'
DEF_PROMPT = 'masterpiece, high quality, very_high_resolution, large_filesize, full color, an extremely cute face, woman, symmetrical, HDR, real, realistic'
DEF_NEG_PROMPT = 'lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, multiple legs, malformation'
FACE_RECOGNITION_MODEL_ID = "hog"
UPSCALE_MODEL_ID = "stabilityai/stable-diffusion-x4-upscaler"
# コマンドライン定義
opt_list = [
['pros_sel','','sd_050'], # 0
['result_image', 'results/image_050.png', 'path to output image file'], # 1
['cpu', 'store_true', 'cpu mode'], # 2
['log', '3', 'Log level(-1/0/1/2/3/4/5) Default value is \'3\''], # 3
['model_dir', '/StabilityMatrix/Data/Models/StableDiffusion', 'Model directory'], # 4
['model_path', DEF_MODEL_BASE, 'Model Path'], # 5
['image_path', DEF_IMAGE_PATH, 'Sourcs image file path'], # 6
['max_size', 0, 'image max size (0=source)'], # 7
['prompt', DEF_PROMPT, 'Prompt text'], # 8
['seed', 12345678, 'Seed parameter (-1 = rundom)'], # 9
['width', 512, 'image size width'], # 10
['height', 512, 'image size height'], # 11
['step', 20, 'infer step'], # 12
['scale', 8.5, 'gaidanse scale'], # 13
['strength', 0.4, 'strength value'], # 15
['neg_prompt', DEF_NEG_PROMPT, 'Negative Prompt text'], # 16
]
# 画像確認
def image_log(pil_image, wait_s = -1):
if wait_s >= 0:
sdt.image_save2(pil_image, save_path = '', dispname = 'Check image', maxsize = 800, wait_s = wait_s)
# 画像を 512x512 アップスケール
def upscale(image, prompt, device):
if device == 'cpu':
pipeline = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID)
else:
pipeline = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID, torch_dtype = torch.float16)
pipeline.to(device)
low_image = image.convert("RGB")
low_image = low_image.resize((128, 128))
new_image = pipeline(prompt = prompt, image = low_image).images[0]
return new_image
# 顔検出
def face_detection(file_name, offset=20):
image = face_recognition.load_image_file(file_name)
#顔部分を検出
face_locs = face_recognition.face_locations(image, number_of_times_to_upsample = 1, model = FACE_RECOGNITION_MODEL_ID)
face_org_rects = []
face_rects = []
if len(face_locs) == 0:
return face_rects, face_org_rects
for face_loc in face_locs:
top, right, bottom, left = face_loc
face_org_rects.append((left, top, right, bottom))
# 範囲が狭いとモデルが顔を認識できない時があるため、検出範囲の矩形をoffset分広げる。
top -= offset
right += offset
bottom += offset
left -= offset
# 検出範囲を正方形にする
w = right - left
h = bottom - top
if w > h:
bottom += w-h
else:
right += h-w
face_rects.append((left, top, right, bottom))
return face_rects, face_org_rects
# 顔のスタイル変換
def style_change(model_path, image, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.4, seed = 0, device = 'cpu'):
if device == 'cpu':
pipeline = StableDiffusionImg2ImgPipeline.from_single_file(model_path)
else:
pipeline = StableDiffusionImg2ImgPipeline.from_single_file(model_path, torch_dtype = torch.float16)
pipeline.to(device)
generator = torch.Generator(device).manual_seed(seed)
with torch.autocast(device):
new_image = pipeline(
prompt = prompt,
negative_prompt = neg_prompt,
image = image,
guidance_scale = guidance_scale,
strength = strength,
generator = generator
).images[0]
return new_image
# マスク作成
def create_mask(image_width, image_height, rect_width, rect_height, rect_x, rect_y, offset = 10):
image = Image.new('RGB', (image_width, image_height), 'black') # 黒の画像を作成
draw = ImageDraw.Draw(image)
# offset分大きい真っ白の矩形を描画
draw.rectangle([rect_x-offset, rect_y-offset, rect_x + rect_width + offset, rect_y + rect_height + offset], fill = 'white')
# offset分小さい真っ黒の矩形を描描画
draw.rectangle([rect_x+offset, rect_y+offset, rect_x + rect_width - offset, rect_y + rect_height - offset], fill = 'black')
return image
# 画像の顔修正する
def face_style_change(model_path, file_name, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.3, seed = 0, device = 'cpu', bUp = False):
face_rects, face_org_rects = face_detection(file_name, offset = 30)
face_rect = face_rects[0]
face_org_rect = face_org_rects[0]
left, top, right, bottom = face_rect
left_org, top_org, right_org, bottom_org = face_org_rect
w = right - left
h = bottom - top
#オリジナル画像から顔部分を切り出す
init_img = Image.open(file_name)
new_img = init_img.copy()
face = new_img.crop(face_rect)
# 顔をアップスケール
if bUp:
upscaled_face = upscale(face, prompt='face', device = device) # upscale
else:
upscaled_face = face.resize((512, 512)) # resize
image_log(upscaled_face, 1)
# スタイル変更
new_face = style_change(model_path, upscaled_face, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device)
image_log(new_face, 1)
# 元の画像に貼り付け
new_img.paste(new_face.resize((w, h)), (left, top))
# image_log(new_img, 0)
# 顔の領域
draw = ImageDraw.Draw(init_img)
rectcolor = (0, 0, 255) # 矩形の色(RGB)
linewidth = 2 # 線の太さ
draw.rectangle([(left_org, top_org), (right_org, bottom_org)], outline=rectcolor, width=linewidth)
# image_log(init_img, 0)
# エッジ部分の修正のためのマスクを作成
image_width, image_height = new_img.size
mask = create_mask(image_width, image_height, h, w, left, top, offset=30)
# image_log(mask, 0)
return init_img, new_img, mask
# 画像生成
def image_generation(model_path, image_path, prompt, seed, num_inference_steps=20, width=512, height=512, guidance_scale=8.5, strength=0.4, neg_prompt = '', device='cpu'):
work_path = sdt.get_work_path(logger)
os.makedirs(work_path, exist_ok = True) # 作業フォルダ作成
src_path, mask_path = sdt.get_source_mask_path(image_path, logger) # ソース/マスク画像作成
image, new_img, mask = face_style_change(model_path, image_path, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device, bUp = False)
sdt.image_save2(image, save_path = src_path, dispname = src_path, maxsize = 800, wait_s = 1)
sdt.image_save2(mask, save_path = mask_path, dispname = '', maxsize = 800, wait_s = 1)
return new_img
# ** main関数 **
def main(opt, logger = None):
# パラメータ設定
device = sdt._get_device(opt, logger)
result_image_path = sdt._get_result_image_path(opt, logger)
result_path = sdt._get_result_path(opt, logger)
prompt = sdt._get_prompt(opt, logger)
src_image = sdt._get_source_image(opt, logger)
model_path = sdt._get_model_path(opt, logger)
height, width = sdt._get_image_size(opt, logger)
seed = sdt._get_seed_value(opt, logger)
num_inference_steps = sdt._get_inference_steps(opt, logger)
guidance_scale = sdt._get_guidance_scale(opt, logger)
strength = sdt._get_strength(opt, logger)
neg_prompt = sdt._get_negative_prompt(opt, logger)
image_path = sdt._get_source_image_path(opt, logger)
# 出力フォルダ
os.makedirs(result_path, exist_ok = True)
# 画像生成
image = image_generation(model_path, image_path, prompt, seed, num_inference_steps, width, height, guidance_scale, strength, neg_prompt = neg_prompt, device = device)
sdt.image_save2(image, result_image_path, result_image_path)
logger.info(f'result_file: {result_image_path}')
# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
parser = sdt.parse_args(None, opt_list)
opt = parser.parse_args()
sdt._get_device(opt)
sdt.display_info(opt, title)
# アプリケーション・ログ設定
module = os.path.basename(__file__)
module_name = os.path.splitext(module)[0]
logger = my_logging.get_module_logger_sel(module_name, int(opt.log))
main(opt, logger)
logger.info('\nFinished.\n')
※ 上記ソースコードは表示の都合上、半角コード '}' が 全角 '}'になっていることに注意