AI_Program4 のバックアップ(No.4) - PukiWiki

[ トップ ] [ 一覧 | 検索 | 履歴 | ログイン ]

私的AI研究会 > AI_Program4

生成 AI プログラミング４ == 編集中 == †

　これまで検証してきた結果をもとに、Python で生成 AI プログラムを書く

▲　目　次

生成 AI プログラミング４ == 編集中 ==
参考資料

※ 最終更新:2025/07/31　

diffusersではじめめる Stable Diffusion （応用編３） †

　画像生成のプログラムを書く

概要 †

動作環境 †

このプロジェクトは以下の Anaconda 仮想環境とプロジェクト・フォルダで動作する
```
(base) PS > conda activate sd_test
(sd_test) PS > cd workspace_3/sd_test
```

Step 50：顔の崩れを修正する †

はじめに
・全身の画像などの顔の面積が小さいときの画像生成では顔が崩れてしまうことが多い
・「Stable Diffusion」では拡張機能「ADetailer」が有効だが、「Diffusers」ではうまくいかない
・顔認識を利用して顔を抽出して拡大再生成した画像を埋め込む方法を実践してみる

元画像顔の抽出顔の修正修正画像

追加のパッケージ・インストール

 conda install dlib -c conda-forge

 pip install face_recognition

プログラムを実行する（実行時間：約 2秒 RTX 4070 Ti 12GB）

 python sd_050.py

(sd_test) PS > python sd_050.py

Stable Diffusion with diffusers(050)  Ver 0.06: Starting application...

 --result_image             :   results/image_050.png
 --cpu                      :   False
 --log                      :   3
 --model_dir                :   /StabilityMatrix/Data/Models/StableDiffusion
 --model_path               :   SD1.5/beautifulRealistic_brav5.safetensors
 --image_path               :   images/sd_050_test.jpg
 --max_size                 :   0
 --prompt                   :   masterpiece, high quality, very_high_resolution, large_filesize, full color, an extremely cute face, woman, symmetrical, HDR, real, realistic
 --seed                     :   12345678
 --width                    :   512
 --height                   :   512
 --step                     :   20
 --scale                    :   8.5
 --strength                 :   0.4
 --neg_prompt               :   lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, multiple legs, malformation

Fetching 11 files: 100%|███████████████████████████████| 11/11 [00:00<?, ?it/s]
Loading pipeline components...: 100%|████████████| 6/6 [00:00<00:00, 19.56it/s]
100%|██████████████████████████████████████████| 20/20 [00:02<00:00,  9.50it/s]
result_file: results/image_050.png

Finished.

画像ファイル「image_050.png」が生成される
実行例

元画像顔の抽出顔の修正修正画像

モジュール・ソースコード

▼「sd_050.py」

# -*- coding: utf-8 -*-
##--------------------------------------------------
##  Stable Diffusion with diffusers(050)   Ver 0.06
##
##               2025.07.31 Masahiro Izutsu
##--------------------------------------------------
## sd_050.py    顔の崩れを修正する
##  Ver 0.06    2025.07.31  sd_081 IP-Adapter 対応

# タイトル
title = 'Stable Diffusion with diffusers(050)  Ver 0.06'

import warnings
warnings.simplefilter('ignore')

# インポート＆初期設定
import os
import torch
from PIL import Image
from PIL import ImageDraw 
import face_recognition
from diffusers import StableDiffusionUpscalePipeline
from diffusers import StableDiffusionImg2ImgPipeline
from diffusers import StableDiffusionInpaintPipeline
from diffusers import logging

import my_logging
import sd_tools as sdt

logging.set_verbosity_error()

# 定数定義
DEF_MODEL_CNTL = 'control_v11p_sd15_inpaint_fp16.safetensors'
DEF_MODEL_BASE = 'SD1.5/beautifulRealistic_brav5.safetensors'
DEF_IMAGE_PATH = 'images/sd_050_test.jpg'
DEF_PROMPT = 'masterpiece, high quality, very_high_resolution, large_filesize, full color, an extremely cute face, woman, symmetrical, HDR, real, realistic'
DEF_NEG_PROMPT = 'lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, multiple legs, malformation'
FACE_RECOGNITION_MODEL_ID = "hog"
UPSCALE_MODEL_ID = "stabilityai/stable-diffusion-x4-upscaler"

# コマンドライン定義
opt_list = [
            ['pros_sel','','sd_050'],                                                                       #  0
            ['result_image', 'results/image_050.png', 'path to output image file'],                         #  1
            ['cpu', 'store_true', 'cpu mode'],                                                              #  2
            ['log', '3', 'Log level(-1/0/1/2/3/4/5) Default value is \'3\''],                               #  3
            ['model_dir', '/StabilityMatrix/Data/Models/StableDiffusion', 'Model directory'],               #  4
            ['model_path', DEF_MODEL_BASE, 'Model Path'],                                                   #  5
            ['image_path', DEF_IMAGE_PATH, 'Sourcs image file path'],                                       #  6
            ['max_size', 0, 'image max size (0=source)'],                                                   #  7
            ['prompt', DEF_PROMPT, 'Prompt text'],                                                          #  8
            ['seed', 12345678, 'Seed parameter (-1 = rundom)'],                                             #  9
            ['width', 512, 'image size width'],                                                             # 10
            ['height', 512, 'image size height'],                                                           # 11
            ['step', 20, 'infer step'],                                                                     # 12
            ['scale', 8.5, 'gaidanse scale'],                                                               # 13
            ['strength', 0.4, 'strength value'],                                                            # 15
            ['neg_prompt', DEF_NEG_PROMPT, 'Negative Prompt text'],                                         # 16
           ]
# 画像確認
def image_log(pil_image, wait_s = -1):
    if wait_s >= 0:
        sdt.image_save2(pil_image, save_path = '', dispname = 'Check image', maxsize = 800, wait_s = wait_s)

# 画像を 512x512 アップスケール
def upscale(image, prompt, device):
    if device == 'cpu':
        pipeline  = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID)
    else:
        pipeline  = StableDiffusionUpscalePipeline.from_pretrained(UPSCALE_MODEL_ID, torch_dtype = torch.float16)
    pipeline.to(device)

    low_image = image.convert("RGB")
    low_image = low_image.resize((128, 128))
    new_image = pipeline(prompt = prompt, image = low_image).images[0]
    return new_image

# 顔検出
def face_detection(file_name, offset=20):
    image = face_recognition.load_image_file(file_name)

    #顔部分を検出
    face_locs = face_recognition.face_locations(image, number_of_times_to_upsample = 1, model = FACE_RECOGNITION_MODEL_ID)

    face_org_rects = []
    face_rects = []
    if len(face_locs) == 0:
        return face_rects, face_org_rects

    for face_loc in face_locs:
        top, right, bottom, left  = face_loc
        face_org_rects.append((left, top, right, bottom))

        # 範囲が狭いとモデルが顔を認識できない時があるため、検出範囲の矩形をoffset分広げる。
        top -= offset
        right += offset
        bottom += offset
        left -= offset

        # 検出範囲を正方形にする
        w = right - left
        h = bottom - top
        if w > h:
            bottom += w-h
        else:
            right += h-w

        face_rects.append((left, top, right, bottom))

    return face_rects, face_org_rects

# 顔のスタイル変換
def style_change(model_path, image, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.4, seed = 0, device = 'cpu'):
    if device == 'cpu':
        pipeline  = StableDiffusionImg2ImgPipeline.from_single_file(model_path)
    else:
        pipeline  = StableDiffusionImg2ImgPipeline.from_single_file(model_path, torch_dtype = torch.float16)
    pipeline.to(device)

    generator = torch.Generator(device).manual_seed(seed)
    with torch.autocast(device):
        new_image = pipeline(
                            prompt = prompt,
                            negative_prompt = neg_prompt,
                            image = image,
                            guidance_scale = guidance_scale,
                            strength = strength,
                            generator = generator
                            ).images[0]

    return new_image

# マスク作成
def create_mask(image_width, image_height, rect_width, rect_height, rect_x, rect_y, offset = 10):
    image = Image.new('RGB', (image_width, image_height), 'black')              # 黒の画像を作成
    draw = ImageDraw.Draw(image)

    # offset分大きい真っ白の矩形を描画
    draw.rectangle([rect_x-offset, rect_y-offset, rect_x + rect_width + offset, rect_y + rect_height + offset], fill = 'white')

    # offset分小さい真っ黒の矩形を描描画
    draw.rectangle([rect_x+offset, rect_y+offset, rect_x + rect_width - offset, rect_y + rect_height - offset], fill = 'black')

    return image

# 画像の顔修正する
def face_style_change(model_path, file_name, prompt, neg_prompt, guidance_scale = 9.5, strength = 0.3, seed = 0, device = 'cpu', bUp = False):
    face_rects, face_org_rects = face_detection(file_name, offset = 30)
    face_rect = face_rects[0]
    face_org_rect = face_org_rects[0]

    left, top, right, bottom = face_rect
    left_org, top_org, right_org, bottom_org = face_org_rect
    w = right - left
    h = bottom - top

    #オリジナル画像から顔部分を切り出す
    init_img = Image.open(file_name)
    new_img = init_img.copy()
    face = new_img.crop(face_rect)

    # 顔をアップスケール
    if bUp:
        upscaled_face = upscale(face, prompt='face', device = device)           # upscale
    else:
        upscaled_face = face.resize((512, 512))                                 # resize
    image_log(upscaled_face, 1)

    # スタイル変更
    new_face = style_change(model_path, upscaled_face, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device)
    image_log(new_face, 1)

    # 元の画像に貼り付け
    new_img.paste(new_face.resize((w, h)), (left, top))
#    image_log(new_img, 0)

    # 顔の領域
    draw = ImageDraw.Draw(init_img)
    rectcolor = (0, 0, 255)                                                     # 矩形の色(RGB)
    linewidth = 2                                                               # 線の太さ
    draw.rectangle([(left_org, top_org), (right_org, bottom_org)], outline=rectcolor, width=linewidth)
#    image_log(init_img, 0)

    # エッジ部分の修正のためのマスクを作成
    image_width, image_height = new_img.size
    mask = create_mask(image_width, image_height, h, w, left, top, offset=30)
#    image_log(mask, 0)

    return init_img, new_img, mask

    # 画像生成
def image_generation(model_path, image_path, prompt, seed, num_inference_steps=20, width=512, height=512, guidance_scale=8.5, strength=0.4, neg_prompt = '', device='cpu'):
    work_path = sdt.get_work_path(logger)
    os.makedirs(work_path, exist_ok = True)                                     # 作業フォルダ作成
    src_path, mask_path = sdt.get_source_mask_path(image_path, logger)          # ソース/マスク画像作成

    image, new_img, mask = face_style_change(model_path, image_path, prompt, neg_prompt, guidance_scale = guidance_scale, strength = strength, seed = seed, device = device, bUp = False)
    sdt.image_save2(image, save_path = src_path, dispname = src_path, maxsize = 800, wait_s = 1)
    sdt.image_save2(mask, save_path = mask_path, dispname = '', maxsize = 800, wait_s = 1)

    return new_img

# ** main関数 **
def main(opt, logger = None):
    # パラメータ設定
    device = sdt._get_device(opt, logger)
    result_image_path = sdt._get_result_image_path(opt, logger)
    result_path = sdt._get_result_path(opt, logger)
    prompt = sdt._get_prompt(opt, logger)
    src_image = sdt._get_source_image(opt, logger)
    model_path = sdt._get_model_path(opt, logger)
    height, width = sdt._get_image_size(opt, logger)
    seed = sdt._get_seed_value(opt, logger)
    num_inference_steps = sdt._get_inference_steps(opt, logger)
    guidance_scale = sdt._get_guidance_scale(opt, logger)
    strength = sdt._get_strength(opt, logger)
    neg_prompt = sdt._get_negative_prompt(opt, logger)
    image_path = sdt._get_source_image_path(opt, logger)

    # 出力フォルダ
    os.makedirs(result_path, exist_ok = True)

    # 画像生成
    image = image_generation(model_path, image_path, prompt, seed, num_inference_steps, width, height, guidance_scale, strength, neg_prompt = neg_prompt, device = device)

    sdt.image_save2(image, result_image_path, result_image_path)
    logger.info(f'result_file: {result_image_path｝')


# main関数エントリーポイント(実行開始)
if __name__ == "__main__":
    parser = sdt.parse_args(None, opt_list)
    opt = parser.parse_args()
    sdt._get_device(opt)
    sdt.display_info(opt, title)

    # アプリケーション・ログ設定
    module = os.path.basename(__file__)
    module_name = os.path.splitext(module)[0]
    logger = my_logging.get_module_logger_sel(module_name, int(opt.log))

    main(opt, logger)

    logger.info('\nFinished.\n')

　※ 上記ソースコードは表示の都合上、半角コード '}' が全角 '｝'になっていることに注意

忘備録 †

更新履歴 †

2025/07/26 初版

参考資料 †

Image-to-Image/ControlNet/IP-Adapter

Diffusers

Inpainting
- GitHub: Inpainting
- ドキュメント版 Inpainting に沿って試してみる

face-recognition
- Pip: face-recognition 1.3.0
- GitHub: Face Recognition

Programming

書籍など
- 日経ソフトウエア 2025年7月号「ローカル生成AIプログラミング」
- Interface 2025年3月号「画像による異常検出＆ローカルLLM作り - 仕事のための生成AI」