# -*- coding: utf-8 -*-
##--------------------------------------------------
## sd_test basic tools Ver 0.05
##
## 2025.07.17 Masahiro Izutsu
##--------------------------------------------------
## sd_tools.py
## Ver 0.00 2025.07.17 Trial version
## Ver 0.05 2025.07.20 sd_100.py 統合版対応
# タイトル
title = 'sd_test basic tools Ver 0.05'
# Color Escape Code
GREEN = '\033[1;32m'
RED = '\033[1;31m'
NOCOLOR = '\033[0m'
YELLOW = '\033[1;33m'
CYAN = '\033[1;36m'
BLUE = '\033[1;34m'
# インポート&初期設定
import os
import re
import glob
import random
import argparse
import numpy as np
import cv2
from PIL import Image
import torch
import my_imagetool
# 定数定義
IMAGES_WORK_DIR = 'images_work'
## ----- コマンド入力関連 --------------
# 初期値
def_result_image = ''
def_cpu = 'store_true'
def_log = '3'
def_model_dir = ''
def_model_path = ''
def_ctrl_model_dir = ''
def_ctrl_model_path = ''
def_image_path = ''
def_control_image_path = ''
def_max_size = 0
def_prompt = ''
def_seed = -1
def_width = 512
def_height = 512
def_step = 30
def_scale = 7.0
def_image_scale = 1.5
def_cc_scale = 1.0
def_strength = 0.5
def_neg_prompt = ' '
# コマンドライン・オプション (argparse) 名前/初期値/ヘルプ
opt_list = [
['pros_sel','','sd_tools'], # 0
['result_image', def_result_image, 'path to output image file'], # 1
['cpu', def_cpu, 'cpu mode'], # 2
['log', def_log, 'Log level(-1/0/1/2/3/4/5) Default value is \'3\''], # 3
['model_dir', def_model_dir, 'Model directory'], # 4
['model_path', def_model_path, 'Model Path'], # 5
['ctrl_model_dir', def_ctrl_model_dir, 'ControlNet Model directory'], # 6
['ctrl_model_path', def_ctrl_model_path, 'ControlNet Model Path'], # 7
['image_path', def_image_path, 'Sourcs image file path'], # 8
['ctrl_image_path', '', 'Control image file path'], # 9
['max_size', def_max_size, 'image max size (0=source)'], # 10
['prompt', def_prompt, 'Prompt text'], # 11
['seed', def_seed, 'Seed parameter (-1 = rundom)'], # 12
['width', def_width, 'image size width'], # 13
['height', def_height, 'image size height'], # 14
['step', def_step, 'infer step'], # 15
['scale', def_scale, 'gaidanse scale'], # 16
['image_scale', def_image_scale, 'image gaidanse scale'], # 17
['cc_scale', def_cc_scale, 'controlnet conditioning scale'], # 18
['strength', def_strength, 'strength value'], # 19
['neg_prompt', def_neg_prompt, 'Negative Prompt text'], # 20
]
# コマンドライン・オプションの設定
def parse_args(parser, pars_list):
if parser == None:
parser = argparse.ArgumentParser()
for pars in pars_list:
p = '--' + pars[0]
if pars[1] == 'store_true' or pars[1] == 'store_false':
parser.add_argument(p, dest = pars[0], action = pars[1], help = pars[2])
else:
parser.add_argument(p, default = pars[1], help = pars[2])
return parser
# オプション・リストを変更する
def change_option(opt_list, key, val):
bf = False
for ol in opt_list:
if ol[0] == key:
ol[1] = val
bf = True
return bf
def _get_process_name(opt_list):
return opt_list[0][2]
def _show_opt_list(opt_list):
for ol in opt_list:
print(ol)
# 基本情報の表示
def display_info(opt, title):
if title != '':
print('\n' + GREEN + title + ': Starting application...' + '\n' + NOCOLOR)
opt_dict = vars(opt) # NameSpace型を辞書型に変換
keys = opt_dict.keys() # key一覧取得
for key in keys:
if opt_dict[key] != '':
s = key + ' ' * 24
s0 = s[:24]
print(f' --{YELLOW}{s0} : {NOCOLOR} {opt_dict[key]}')
if title != '':
print(' ')
# コマンドライン・オプションチェック
def is_option(opt, key):
opt_dict = vars(opt) # NameSpace型を辞書型に変換
res = opt_dict.get(key)
if res == '': res = None
return res
## ----- 画像関連 ----------------------
# イメージ変換 PIL → OpenCV
def pil2cv(image):
new_image = np.array(image, dtype=np.uint8)
if new_image.ndim == 2: # モノクロ
pass
elif new_image.shape[2] == 3: # カラー
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
elif new_image.shape[2] == 4: # 透過
new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
return new_image
def pil2cv_np(image):
new_image = np.array(image, dtype=np.uint8)
if new_image.ndim == 2: # モノクロ
pass
elif new_image.shape[2] == 3: # カラー
new_image = new_image[:, :, ::-1]
elif new_image.shape[2] == 4: # 透過
new_image = new_image[:, :, [2, 1, 0, 3]]
return new_image
# イメージ変換 OpenCV → PIL
def cv2pil(image):
new_image = image.copy()
if new_image.ndim == 2: # モノクロ
pass
elif new_image.shape[2] == 3: # カラー
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
elif new_image.shape[2] == 4: # 透過
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
new_image = Image.fromarray(new_image)
return new_image
def cv2pil_np(image):
new_image = image.copy()
if new_image.ndim == 2: # モノクロ
pass
elif new_image.shape[2] == 3: # カラー
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
elif new_image.shape[2] == 4: # 透過
new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
new_image = Image.fromarray(new_image)
return new_image
# イメージファイルの表示
def image_disp(image_path = '', dispname = '', maxsize = 800, wait_s = 0):
image = cv2.imread(image_path)
dispf = dispname != ''
my_imagetool.image_disp(image, winname = dispname, dispf = dispf, save_path = '', maxsize = maxsize, wait_s = wait_s)
# OpenCV イメージ 結果の保存と表示
def image_save(image, save_path = '', dispname = '', maxsize = 800, wait_s = 0):
dispf = dispname != ''
my_imagetool.image_disp(image, winname = dispname, dispf = dispf, save_path = save_path, maxsize = maxsize, wait_s = wait_s)
# PIL イメージ 結果の保存と表示
def image_save2(image, save_path = '', dispname = '', maxsize = 800, wait_s = 0):
image = pil2cv(image)
dispf = dispname != ''
my_imagetool.image_disp(image, winname = dispname, dispf = dispf, save_path = save_path, maxsize = maxsize, wait_s = wait_s)
# コントロールイメージを作成するメソッド
def make_inpaint_condition(image, image_mask):
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
image[image_mask > 0.5] = -1.0 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image
## -------------------------------------
# モデルを調べる(SD1.5 モデルは SD1.5/フォルダ内にある前提)
# in: model モデル名
# out: bool True = SD1.5, False = SDXL
def is_sd15(model):
return ('SD1.5' in model)
# ポーズ・ファイルか調べる
def is_pose(filepath):
s = os.path.splitext(filepath)
return s[0][-5:] == '_pose'
# フォルダ内で連番のファイル名を得る(ヘッダ + 連番 で検索)
# in: filename ヘッダー名.拡張子
# seq_digit 連番桁数
# ex 追加文字列
# out: 新しいファイル名
def make_filename_by_seq(dirname, filename, seq_digit = 3, ex = ''):
filename_without_ext, ext = os.path.splitext(filename)
pattern = f"{filename_without_ext}_([0-9]*){ext}"
prog = re.compile(pattern)
files = glob.glob(
os.path.join(dirname, f"{filename_without_ext}_[0-9]*{ext}")
)
max_seq = -1
kn = len(filename_without_ext) + seq_digit + 1 # ヘッダ + 連番桁数 + 1 (_)
for f in files:
fn0, _ = os.path.splitext(os.path.basename(f))
fn1 = fn0[: min(len(fn0), kn)] + ext
m = prog.match(os.path.basename(fn1)) # ファイル名 kn まで比較
if m:
max_seq = max(max_seq, int(m.group(1)))
new_filename = f"{filename_without_ext}_{max_seq+1:0{seq_digit}}_{ex}{ext}"
return new_filename
# 生成画像のファイル名からシード値を得る
def path2seed(filepath):
s = os.path.splitext(os.path.basename(filepath))[0]
n = s.rfind('_')
return int(s[n + 1:])
# 経過時間(秒)を hh:mm:ssフォーマットに変換
def elapsed_time_str(seconds):
seconds = int(seconds + 0.5) # 秒数を四捨五入
h = seconds // 3600 # 時の取得
m = (seconds - h * 3600) // 60 # 分の取得
s = seconds - h * 3600 - m * 60 # 秒の取得
return f"{h:02}:{m:02}:{s:02}" # hh:mm:ss形式の文字列で返す
# ランダムなシード値を得る
def get_random_seed_value(n):
seed = int(n)
if seed == -1:
seed = random.randint(0, 2**32-1)
return seed
# 日本語から英語に翻訳
def trans_jp2en(str):
from translate import Translator
if len(str) != len(str.encode('utf-8')):
trans = Translator('en','ja').translate
prompt = trans(str) # 日本語→英語
else:
prompt = str
return prompt
# メモリー開放
def device_empty_cache(device):
if device == 'cuda':
torch.cuda.empty_cache()
elif device == 'mps':
torch.mps.empty_cache()
# 作業フォルダ名を得る
def get_work_path(logger = None):
work_path = os.getcwd().replace(os.sep,'/') + '/' + IMAGES_WORK_DIR
log_debug(f'work_path: {work_path}', logger)
return work_path
# ソース・マスク画像ファイル名を得る
def get_source_mask_path(image_path, logger = None):
work_path = get_work_path()
file = work_path + '/' + os.path.basename(image_path)
s = os.path.splitext(file)
src_path = s[0] + '_src' + s[1]
mask_path = s[0] + '_mask' + s[1]
log_debug(f'src_path: {src_path}', logger)
log_debug(f'mask_path: {mask_path}', logger)
return src_path, mask_path
# ポーズ画像画像ファイル名を得る
def get_pose_path(image_path, logger = None):
work_path = get_work_path()
file = work_path + '/' + os.path.basename(image_path)
s = os.path.splitext(file)
pose_path = s[0] + '_pose' + s[1]
log_debug(f'pose_path: {pose_path}', logger)
return pose_path
## ----- diffusers parameter -----------
# log 出力
def log_debug(msg, logger):
if logger is not None:
logger.debug(msg)
def log_info(msg, logger):
if logger is not None:
logger.info(msg)
# -- device --
def _get_device(opt, logger = None):
gpu_d = torch.cuda.is_available() # GPU 確認
if not opt.cpu and not gpu_d:
opt.cpu = True
device = 'cpu' if opt.cpu else 'cuda'
log_debug(f'device: {device}', logger)
return device
# -- result_image_path --
def _get_result_image_path(opt, logger = None):
result_image_path = opt.result_image
log_debug(f'result_image_path: {result_image_path}', logger)
return result_image_path
# -- result_path --
def _get_result_path(opt, logger = None):
result_path = os.path.dirname(opt.result_image)
log_debug(f'result_path: {result_path}', logger)
return result_path
# -- result_file --
def _get_result_file(opt, logger = None):
result_file = os.path.basename(opt.result_image)
log_debug(f'result_file: {result_file}', logger)
return result_file
# -- prompt --
def _get_prompt(opt, logger = None):
prompt = def_prompt if is_option(opt, 'prompt') == None else trans_jp2en(opt.prompt)
log_info(f'prompt: {prompt}', logger)
return prompt
# -- negative prompt --
def _get_negative_prompt(opt, logger = None):
neg_prompt = def_neg_prompt if is_option(opt, 'neg_prompt') == None else trans_jp2en(opt.neg_prompt)
log_info(f'neg_prompt: {neg_prompt}', logger)
return neg_prompt
# -- model_dir --
def _get_model_dir(opt, logger = None):
model_dir = def_model_dir if is_option(opt, 'model_dir') == None else opt.model_dir
log_debug(f'model_dir: {model_dir}', logger)
return model_dir
# -- model_path --
def _get_model_path(opt, logger = None):
model_dir = _get_model_dir(opt, logger)
path = def_model_path if is_option(opt, 'model_path') == None else opt.model_path
model_path = path if model_dir == '' else model_dir + '/' + path
log_debug(f'model_path: {model_path}', logger)
return model_path
# -- controlnet model_dir --
def _get_controlnet_model_dir(opt, logger = None):
ctrl_model_dir = def_ctrl_model_dir if is_option(opt, 'ctrl_model_dir') == None else opt.ctrl_model_dir
log_debug(f'ctrl_model_dir: {ctrl_model_dir}', logger)
return ctrl_model_dir
# -- controlnet model_path --
def _get_controlnet_model_path(opt, logger = None):
ctrl_model_dir = _get_controlnet_model_dir(opt, logger)
path = def_ctrl_model_path if is_option(opt, 'ctrl_model_path') == None else opt.ctrl_model_path
ctrl_model_path = path if ctrl_model_dir == '' else ctrl_model_dir + '/' + path
log_debug(f'controlnet model_path: {ctrl_model_path}', logger)
return ctrl_model_path
# -- source image path --
def _get_source_image_path(opt, logger = None):
image_path = def_image_path if is_option(opt, 'image_path') == None else opt.image_path
log_debug(f'image_path: {image_path}', logger)
return image_path
# -- source image --
def _get_source_image(opt, logger = None):
image_path = def_image_path if is_option(opt, 'image_path') == None else opt.image_path
max_size = def_max_size if is_option(opt, 'max_size') == None else int(opt.max_size)
image = _get_resize_image(image_path, max_size, logger)
log_debug(f'image_path: {image_path}', logger)
return image
# -- control image path --
def _get_control_image_path(opt, logger = None):
ctrl_image_path = def_control_image_path if is_option(opt, 'ctrl_image_path') == None else opt.ctrl_image_path
log_debug(f'ctrl_image_path: {ctrl_image_path}', logger)
return ctrl_image_path
# -- control source image --
def _get_control_image(opt, logger = None):
ctrl_image_path = def_ctrl_image_path if is_option(opt, 'ctrl_image_path') == None else opt.ctrl_image_path
max_size = def_max_size if is_option(opt, 'max_size') == None else int(opt.max_size)
image = _get_resize_image(ctrl_image_path, max_size, logger)
log_debug(f'ctrl_image_path: {ctrl_image_path}', logger)
return image
# -- resize image --
def _get_resize_image(image_path, max_size, logger = None):
image = Image.open(image_path)
w, h = image.size
bf, h, w = my_imagetool.check_size(h, w, maxsize = max_size)
if bf:
image = image.resize((w, h), resample=Image.BICUBIC)
log_debug(f'image size: width = {w}, height = {h}', logger)
return image
# -- height, width --
def _get_image_size(opt, logger = None):
width = def_width if is_option(opt, 'width') == None else int(opt.width)
height = def_height if is_option(opt, 'height') == None else int(opt.height)
log_info(f'width: {width}, height: {height}', logger)
return height, width
# -- max_size --
def _get_max_size(opt, logger = None):
max_size = def_max_size if is_option(opt, 'max_size') == None else int(opt.max_size)
log_info(f'max_size: {max_size}', logger)
return max_size
# -- seed --
def _get_seed_value(opt, logger = None):
seed = def_seed if is_option(opt, 'seed') == None else get_random_seed_value(opt.seed)
log_info(f'seed: {seed}', logger)
return seed
# -- num_inference_steps --
def _get_inference_steps(opt, logger = None):
num_inference_steps = def_step if is_option(opt, 'step') == None else int(opt.step)
log_debug(f'step: {num_inference_steps}', logger)
return num_inference_steps
# -- guidance_scale --
def _get_guidance_scale(opt, logger = None):
guidance_scale = def_scale if is_option(opt, 'def_scale') == None else float(opt.def_scale)
log_debug(f'scale: {guidance_scale}', logger)
return guidance_scale
# -- image guidance_scale --
def _get_image_guidance_scale(opt, logger = None):
image_guidance_scale = def_image_scale if is_option(opt, 'image_scale') == None else float(opt.image_scale)
log_debug(f'image guidance scale: {image_guidance_scale}', logger)
return image_guidance_scale
# -- strength --
def _get_strength(opt, logger = None):
strength = def_strength if is_option(opt, 'strength') == None else float(opt.strength)
log_debug(f'strength: {strength}', logger)
return strength
# -- controlnet conditioning scale --
def _get_controlnet_conditioning_scale(opt, logger = None):
cc_scale = def_cc_scale if is_option(opt, 'cc_scale') == None else float(opt.cc_scale)
log_debug(f'controlnet conditioning scale: {cc_scale}', logger)
return cc_scale
## -------------------------------------
#-----Test routine-----
if __name__ == "__main__":
source_path = './images/kamo.jpg'
opt_list = [
['result_image', './sd_results/sd.png', 'path to output image file'],
['cpu', 'store_true', 'cpu mode'],
['log', '3', 'Log level(-1/0/1/2/3/4/5) Default value is \'3\''],
['model_dir', '/StabilityMatrix/Data/Models/StableDiffusion', 'Model directory'],
['model_path', 'SD1.5/v1-5-pruned-emaonly.safetensors', 'Model Path'],
['prompt', '満開の欄', 'Prompt text'],
['seed', -1, 'Seed parameter (-1 = rundom)'],
['width', 512, 'image size width'],
['height', 512, 'image size height'],
['step', 30, 'infer step'],
['scale', 7.0, 'gaidanse scale'],
]
parser = parse_args(None, opt_list)
opt = parser.parse_args()
display_info(opt, title)
#------------
print('*** 画像表示 test ***')
image_disp(source_path, source_path)
'''
# OpenCV 保存と表示
img = cv2.imread(source_path)
image_save(img, 'test1.png', dispname = title)
# PIL 保存と表示
img = Image.open(source_path)
image_save2(img, 'test2.png', dispname = title)
'''
#------------
print('*** diffusers parameter test ***')
print(f'device : {_get_device(opt)}')
#------------
※ 上記ソースコードは表示の都合上、半角コード '}' が 全角 '}'になっていることに注意