caption tab modernui support

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/3755/head
Vladimir Mandic 2025-02-17 10:59:22 -05:00
parent 41426c7516
commit b6990151c4
7 changed files with 38 additions and 33 deletions

View File

@ -1,13 +1,13 @@
# Change Log for SD.Next
## Update for 2025-02-16
## Update for 2025-02-17
### TODO
- VLM ModernUI support
- CogView4
### Highlight for 2025-02-16
### Highlight for 2025-02-17
We're back with another update with over 50 commits!
- Starting with massive UI update with full [localization](https://vladmandic.github.io/sdnext-docs/Locale/) for 8 languages
@ -24,7 +24,7 @@ We're back with another update with over 50 commits!
*...and more* - see [changelog](https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md) for full details!
### Details for 2025-02-16
### Details for 2025-02-17
- **User Interface**
- **Hints**
@ -72,6 +72,7 @@ We're back with another update with over 50 commits!
- Batch processing: VLM and CLiP
for example, can be used to caption your training dataset in one go
add option to append to captions file, can be used to run multiple captioning models in sequence
add option to run recursively on all subfolders
add progress bar
- Add additional VLM models:
[JoyTag](https://huggingface.co/fancyfeast/joytag)

@ -1 +1 @@
Subproject commit e0f6c7f8a8efc95d5013702275b8aac496c5a6fc
Subproject commit aa24200db2a42fc369adbf6fe372420c65f7500d

View File

@ -227,6 +227,8 @@ class BatchWriter:
def add(self, file, prompt):
txt_file = os.path.splitext(file)[0] + ".txt"
if self.mode == 'a':
prompt = '\n' + prompt
with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
f.write(prompt)
@ -339,14 +341,15 @@ def interrogate_image(image, clip_model, blip_model, mode):
return prompt
def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append):
def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append, recursive):
files = []
if batch_files is not None:
files += [f.name for f in batch_files]
if batch_folder is not None:
files += [f.name for f in batch_folder]
if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
from modules.files_cache import list_files
files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
if len(files) == 0:
shared.log.warning('Interrogate batch: type=clip no images')
return ''
@ -358,7 +361,7 @@ def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_mod
file_mode = 'w' if not append else 'a'
writer = BatchWriter(os.path.dirname(files[0]), mode=file_mode)
import rich.progress as rp
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
with pbar:
task = pbar.add_task(total=len(files), description='starting...')
for file in files:

View File

@ -457,7 +457,7 @@ def interrogate(question, prompt, image, model_name, quiet:bool=False):
return answer
def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append):
def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append, recursive):
class BatchWriter:
def __init__(self, folder, mode='w'):
self.folder = folder
@ -467,6 +467,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
def add(self, file, prompt):
txt_file = os.path.splitext(file)[0] + ".txt"
if self.mode == 'a':
prompt = '\n' + prompt
with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
f.write(prompt)
@ -480,7 +482,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
if batch_folder is not None:
files += [f.name for f in batch_folder]
if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
from modules.files_cache import list_files
files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
if len(files) == 0:
shared.log.warning('Interrogate batch: type=vlm no images')
return ''
@ -492,7 +495,7 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
orig_offload = shared.opts.interrogate_offload
shared.opts.interrogate_offload = False
import rich.progress as rp
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
with pbar:
task = pbar.add_task(total=len(files), description='starting...')
for file in files:

View File

@ -5,19 +5,16 @@ from modules.interrogate import openclip
def update_vlm_params(*args):
vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p = args
shared.opts.interrogate_vlm_max_length = vlm_max_tokens
shared.opts.interrogate_vlm_num_beams = vlm_num_beams
shared.opts.interrogate_vlm_temperature = vlm_temperature
shared.opts.interrogate_vlm_do_sample = vlm_do_sample
shared.opts.interrogate_vlm_top_k = vlm_top_k
shared.opts.interrogate_vlm_top_p = vlm_top_p
shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens)
shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams)
shared.opts.interrogate_vlm_temperature = float(vlm_temperature)
shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample)
shared.opts.interrogate_vlm_top_k = int(vlm_top_k)
shared.opts.interrogate_vlm_top_p = float(vlm_top_p)
shared.opts.save(shared.config_filename)
def update_clip_params(*args):
"""
"interrogate_clip_num_beams": OptionInfo(1, "CLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
"""
clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args
shared.opts.interrogate_clip_min_length = int(clip_min_length)
shared.opts.interrogate_clip_max_length = int(clip_max_length)
@ -31,12 +28,12 @@ def update_clip_params(*args):
def create_ui():
with gr.Row(equal_height=False, variant='compact', elem_classes="caption"):
with gr.Column(variant='compact'):
with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"):
with gr.Column(variant='compact', elem_id='interrogate_input'):
with gr.Row():
image = gr.Image(type='pil', label="Image")
image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image')
with gr.Tabs(elem_id="mode_caption"):
with gr.Tab("VLM Caption"):
with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"):
from modules.interrogate import vqa
with gr.Row():
vlm_question = gr.Dropdown(label="Predefined question", allow_custom_value=False, choices=vqa.vlm_prompts, value=vqa.vlm_prompts[2], elem_id='vlm_question')
@ -70,15 +67,16 @@ def create_ui():
with gr.Row():
vlm_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="vlm_save_output")
vlm_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="vlm_save_append")
vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive")
with gr.Row(elem_id='interrogate_buttons_batch'):
btn_vlm_caption_batch = gr.Button("Batch caption", variant='primary', elem_id="btn_vlm_caption_batch")
with gr.Row():
btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption")
with gr.Tab("CLiP Interrogate"):
with gr.Tab("CLiP Interrogate", elem_id='tab_clip_interrogate'):
with gr.Row():
clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP model', elem_id='clip_clip_model')
ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'refresh_interrogate_models')
blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='clip_blip_model')
ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_refresh_models')
blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='btN_clip_blip_model')
clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode')
with gr.Accordion(label='Advanced options', open=False, visible=True):
with gr.Row():
@ -108,15 +106,16 @@ def create_ui():
with gr.Row():
clip_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="clip_save_output")
clip_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="clip_save_append")
clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive")
with gr.Row():
btn_clip_interrogate_batch = gr.Button("Batch interrogate", variant='primary', elem_id="btn_clip_interrogate_batch")
with gr.Row():
btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img")
btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img")
with gr.Column(variant='compact'):
with gr.Row():
with gr.Column(variant='compact', elem_id='interrogate_output'):
with gr.Row(elem_id='interrogate_output_prompt'):
prompt = gr.Textbox(label="Answer", lines=8, placeholder="ai generated image description")
with gr.Row():
with gr.Row(elem_id='interrogate_output_classes'):
medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False)
artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False)
movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False)
@ -127,9 +126,9 @@ def create_ui():
btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt])
btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor])
btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append], outputs=[prompt])
btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt])
btn_vlm_caption.click(fn=vqa.interrogate, inputs=[vlm_question, vlm_prompt, image, vlm_model], outputs=[prompt])
btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append], outputs=[prompt])
btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive], outputs=[prompt])
for tabname, button in copy_interrogate_buttons.items():
generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,))

View File

@ -20,7 +20,6 @@ extra_ui = []
def create_ui():
dummy_component = gr.Label(visible=False)
with gr.Row(elem_id="models_tab"):
with gr.Column(elem_id='models_output_container', scale=1):
# models_output = gr.Text(elem_id="models_output", value="", show_label=False)

View File

@ -17,7 +17,7 @@ def submit_process(tab_index, extras_image, image_batch, extras_batch_input_dir,
def create_ui():
tab_index = gr.State(value=0) # pylint: disable=abstract-class-instantiated
with gr.Row(equal_height=False, variant='compact', elem_classes="extras"):
with gr.Row(equal_height=False, variant='compact', elem_classes="extras", elem_id="extras_tab"):
with gr.Column(variant='compact'):
with gr.Tabs(elem_id="mode_extras"):
with gr.Tab('Process Image', id="single_image", elem_id="extras_single_tab") as tab_single: