mirror of https://github.com/vladmandic/automatic
caption tab modernui support
Signed-off-by: Vladimir Mandic <mandic00@live.com>pull/3755/head
parent
41426c7516
commit
b6990151c4
|
|
@ -1,13 +1,13 @@
|
|||
# Change Log for SD.Next
|
||||
|
||||
## Update for 2025-02-16
|
||||
## Update for 2025-02-17
|
||||
|
||||
### TODO
|
||||
|
||||
- VLM ModernUI support
|
||||
- CogView4
|
||||
|
||||
### Highlight for 2025-02-16
|
||||
### Highlight for 2025-02-17
|
||||
|
||||
We're back with another update with over 50 commits!
|
||||
- Starting with massive UI update with full [localization](https://vladmandic.github.io/sdnext-docs/Locale/) for 8 languages
|
||||
|
|
@ -24,7 +24,7 @@ We're back with another update with over 50 commits!
|
|||
|
||||
*...and more* - see [changelog](https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md) for full details!
|
||||
|
||||
### Details for 2025-02-16
|
||||
### Details for 2025-02-17
|
||||
|
||||
- **User Interface**
|
||||
- **Hints**
|
||||
|
|
@ -72,6 +72,7 @@ We're back with another update with over 50 commits!
|
|||
- Batch processing: VLM and CLiP
|
||||
for example, can be used to caption your training dataset in one go
|
||||
add option to append to captions file, can be used to run multiple captioning models in sequence
|
||||
add option to run recursively on all subfolders
|
||||
add progress bar
|
||||
- Add additional VLM models:
|
||||
[JoyTag](https://huggingface.co/fancyfeast/joytag)
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Subproject commit e0f6c7f8a8efc95d5013702275b8aac496c5a6fc
|
||||
Subproject commit aa24200db2a42fc369adbf6fe372420c65f7500d
|
||||
|
|
@ -227,6 +227,8 @@ class BatchWriter:
|
|||
|
||||
def add(self, file, prompt):
|
||||
txt_file = os.path.splitext(file)[0] + ".txt"
|
||||
if self.mode == 'a':
|
||||
prompt = '\n' + prompt
|
||||
with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
|
||||
f.write(prompt)
|
||||
|
||||
|
|
@ -339,14 +341,15 @@ def interrogate_image(image, clip_model, blip_model, mode):
|
|||
return prompt
|
||||
|
||||
|
||||
def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append):
|
||||
def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append, recursive):
|
||||
files = []
|
||||
if batch_files is not None:
|
||||
files += [f.name for f in batch_files]
|
||||
if batch_folder is not None:
|
||||
files += [f.name for f in batch_folder]
|
||||
if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
|
||||
files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
|
||||
from modules.files_cache import list_files
|
||||
files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
|
||||
if len(files) == 0:
|
||||
shared.log.warning('Interrogate batch: type=clip no images')
|
||||
return ''
|
||||
|
|
@ -358,7 +361,7 @@ def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_mod
|
|||
file_mode = 'w' if not append else 'a'
|
||||
writer = BatchWriter(os.path.dirname(files[0]), mode=file_mode)
|
||||
import rich.progress as rp
|
||||
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
|
||||
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
|
||||
with pbar:
|
||||
task = pbar.add_task(total=len(files), description='starting...')
|
||||
for file in files:
|
||||
|
|
|
|||
|
|
@ -457,7 +457,7 @@ def interrogate(question, prompt, image, model_name, quiet:bool=False):
|
|||
return answer
|
||||
|
||||
|
||||
def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append):
|
||||
def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append, recursive):
|
||||
class BatchWriter:
|
||||
def __init__(self, folder, mode='w'):
|
||||
self.folder = folder
|
||||
|
|
@ -467,6 +467,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
|
|||
|
||||
def add(self, file, prompt):
|
||||
txt_file = os.path.splitext(file)[0] + ".txt"
|
||||
if self.mode == 'a':
|
||||
prompt = '\n' + prompt
|
||||
with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
|
||||
f.write(prompt)
|
||||
|
||||
|
|
@ -480,7 +482,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
|
|||
if batch_folder is not None:
|
||||
files += [f.name for f in batch_folder]
|
||||
if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
|
||||
files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
|
||||
from modules.files_cache import list_files
|
||||
files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
|
||||
if len(files) == 0:
|
||||
shared.log.warning('Interrogate batch: type=vlm no images')
|
||||
return ''
|
||||
|
|
@ -492,7 +495,7 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
|
|||
orig_offload = shared.opts.interrogate_offload
|
||||
shared.opts.interrogate_offload = False
|
||||
import rich.progress as rp
|
||||
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
|
||||
pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
|
||||
with pbar:
|
||||
task = pbar.add_task(total=len(files), description='starting...')
|
||||
for file in files:
|
||||
|
|
|
|||
|
|
@ -5,19 +5,16 @@ from modules.interrogate import openclip
|
|||
|
||||
def update_vlm_params(*args):
|
||||
vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p = args
|
||||
shared.opts.interrogate_vlm_max_length = vlm_max_tokens
|
||||
shared.opts.interrogate_vlm_num_beams = vlm_num_beams
|
||||
shared.opts.interrogate_vlm_temperature = vlm_temperature
|
||||
shared.opts.interrogate_vlm_do_sample = vlm_do_sample
|
||||
shared.opts.interrogate_vlm_top_k = vlm_top_k
|
||||
shared.opts.interrogate_vlm_top_p = vlm_top_p
|
||||
shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens)
|
||||
shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams)
|
||||
shared.opts.interrogate_vlm_temperature = float(vlm_temperature)
|
||||
shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample)
|
||||
shared.opts.interrogate_vlm_top_k = int(vlm_top_k)
|
||||
shared.opts.interrogate_vlm_top_p = float(vlm_top_p)
|
||||
shared.opts.save(shared.config_filename)
|
||||
|
||||
|
||||
def update_clip_params(*args):
|
||||
"""
|
||||
"interrogate_clip_num_beams": OptionInfo(1, "CLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
|
||||
"""
|
||||
clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args
|
||||
shared.opts.interrogate_clip_min_length = int(clip_min_length)
|
||||
shared.opts.interrogate_clip_max_length = int(clip_max_length)
|
||||
|
|
@ -31,12 +28,12 @@ def update_clip_params(*args):
|
|||
|
||||
|
||||
def create_ui():
|
||||
with gr.Row(equal_height=False, variant='compact', elem_classes="caption"):
|
||||
with gr.Column(variant='compact'):
|
||||
with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"):
|
||||
with gr.Column(variant='compact', elem_id='interrogate_input'):
|
||||
with gr.Row():
|
||||
image = gr.Image(type='pil', label="Image")
|
||||
image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image')
|
||||
with gr.Tabs(elem_id="mode_caption"):
|
||||
with gr.Tab("VLM Caption"):
|
||||
with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"):
|
||||
from modules.interrogate import vqa
|
||||
with gr.Row():
|
||||
vlm_question = gr.Dropdown(label="Predefined question", allow_custom_value=False, choices=vqa.vlm_prompts, value=vqa.vlm_prompts[2], elem_id='vlm_question')
|
||||
|
|
@ -70,15 +67,16 @@ def create_ui():
|
|||
with gr.Row():
|
||||
vlm_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="vlm_save_output")
|
||||
vlm_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="vlm_save_append")
|
||||
vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive")
|
||||
with gr.Row(elem_id='interrogate_buttons_batch'):
|
||||
btn_vlm_caption_batch = gr.Button("Batch caption", variant='primary', elem_id="btn_vlm_caption_batch")
|
||||
with gr.Row():
|
||||
btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption")
|
||||
with gr.Tab("CLiP Interrogate"):
|
||||
with gr.Tab("CLiP Interrogate", elem_id='tab_clip_interrogate'):
|
||||
with gr.Row():
|
||||
clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP model', elem_id='clip_clip_model')
|
||||
ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'refresh_interrogate_models')
|
||||
blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='clip_blip_model')
|
||||
ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_refresh_models')
|
||||
blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='btN_clip_blip_model')
|
||||
clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode')
|
||||
with gr.Accordion(label='Advanced options', open=False, visible=True):
|
||||
with gr.Row():
|
||||
|
|
@ -108,15 +106,16 @@ def create_ui():
|
|||
with gr.Row():
|
||||
clip_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="clip_save_output")
|
||||
clip_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="clip_save_append")
|
||||
clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive")
|
||||
with gr.Row():
|
||||
btn_clip_interrogate_batch = gr.Button("Batch interrogate", variant='primary', elem_id="btn_clip_interrogate_batch")
|
||||
with gr.Row():
|
||||
btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img")
|
||||
btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img")
|
||||
with gr.Column(variant='compact'):
|
||||
with gr.Row():
|
||||
with gr.Column(variant='compact', elem_id='interrogate_output'):
|
||||
with gr.Row(elem_id='interrogate_output_prompt'):
|
||||
prompt = gr.Textbox(label="Answer", lines=8, placeholder="ai generated image description")
|
||||
with gr.Row():
|
||||
with gr.Row(elem_id='interrogate_output_classes'):
|
||||
medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False)
|
||||
artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False)
|
||||
movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False)
|
||||
|
|
@ -127,9 +126,9 @@ def create_ui():
|
|||
|
||||
btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt])
|
||||
btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor])
|
||||
btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append], outputs=[prompt])
|
||||
btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt])
|
||||
btn_vlm_caption.click(fn=vqa.interrogate, inputs=[vlm_question, vlm_prompt, image, vlm_model], outputs=[prompt])
|
||||
btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append], outputs=[prompt])
|
||||
btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive], outputs=[prompt])
|
||||
|
||||
for tabname, button in copy_interrogate_buttons.items():
|
||||
generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,))
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ extra_ui = []
|
|||
|
||||
def create_ui():
|
||||
dummy_component = gr.Label(visible=False)
|
||||
|
||||
with gr.Row(elem_id="models_tab"):
|
||||
with gr.Column(elem_id='models_output_container', scale=1):
|
||||
# models_output = gr.Text(elem_id="models_output", value="", show_label=False)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ def submit_process(tab_index, extras_image, image_batch, extras_batch_input_dir,
|
|||
|
||||
def create_ui():
|
||||
tab_index = gr.State(value=0) # pylint: disable=abstract-class-instantiated
|
||||
with gr.Row(equal_height=False, variant='compact', elem_classes="extras"):
|
||||
with gr.Row(equal_height=False, variant='compact', elem_classes="extras", elem_id="extras_tab"):
|
||||
with gr.Column(variant='compact'):
|
||||
with gr.Tabs(elem_id="mode_extras"):
|
||||
with gr.Tab('Process Image', id="single_image", elem_id="extras_single_tab") as tab_single:
|
||||
|
|
|
|||
Loading…
Reference in New Issue