diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f0e8def9..0659f5a8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,13 @@ # Change Log for SD.Next -## Update for 2025-02-16 +## Update for 2025-02-17 ### TODO - VLM ModernUI support - CogView4 -### Highlight for 2025-02-16 +### Highlight for 2025-02-17 We're back with another update with over 50 commits! - Starting with massive UI update with full [localization](https://vladmandic.github.io/sdnext-docs/Locale/) for 8 languages @@ -24,7 +24,7 @@ We're back with another update with over 50 commits! *...and more* - see [changelog](https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md) for full details! -### Details for 2025-02-16 +### Details for 2025-02-17 - **User Interface** - **Hints** @@ -72,6 +72,7 @@ We're back with another update with over 50 commits! - Batch processing: VLM and CLiP for example, can be used to caption your training dataset in one go add option to append to captions file, can be used to run multiple captioning models in sequence + add option to run recursively on all subfolders add progress bar - Add additional VLM models: [JoyTag](https://huggingface.co/fancyfeast/joytag) diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index e0f6c7f8a..aa24200db 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit e0f6c7f8a8efc95d5013702275b8aac496c5a6fc +Subproject commit aa24200db2a42fc369adbf6fe372420c65f7500d diff --git a/modules/interrogate/openclip.py b/modules/interrogate/openclip.py index 68bd90b46..b8b43092c 100644 --- a/modules/interrogate/openclip.py +++ b/modules/interrogate/openclip.py @@ -227,6 +227,8 @@ class BatchWriter: def add(self, file, prompt): txt_file = os.path.splitext(file)[0] + ".txt" + if self.mode == 'a': + prompt = '\n' + prompt with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f: f.write(prompt) @@ -339,14 +341,15 @@ def interrogate_image(image, clip_model, blip_model, mode): return prompt -def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append): +def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append, recursive): files = [] if batch_files is not None: files += [f.name for f in batch_files] if batch_folder is not None: files += [f.name for f in batch_folder] if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str): - files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] + from modules.files_cache import list_files + files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive)) if len(files) == 0: shared.log.warning('Interrogate batch: type=clip no images') return '' @@ -358,7 +361,7 @@ def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_mod file_mode = 'w' if not append else 'a' writer = BatchWriter(os.path.dirname(files[0]), mode=file_mode) import rich.progress as rp - pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console) + pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console) with pbar: task = pbar.add_task(total=len(files), description='starting...') for file in files: diff --git a/modules/interrogate/vqa.py b/modules/interrogate/vqa.py index 38f3d4723..c12067014 100644 --- a/modules/interrogate/vqa.py +++ b/modules/interrogate/vqa.py @@ -457,7 +457,7 @@ def interrogate(question, prompt, image, model_name, quiet:bool=False): return answer -def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append): +def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append, recursive): class BatchWriter: def __init__(self, folder, mode='w'): self.folder = folder @@ -467,6 +467,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr def add(self, file, prompt): txt_file = os.path.splitext(file)[0] + ".txt" + if self.mode == 'a': + prompt = '\n' + prompt with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f: f.write(prompt) @@ -480,7 +482,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr if batch_folder is not None: files += [f.name for f in batch_folder] if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str): - files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] + from modules.files_cache import list_files + files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive)) if len(files) == 0: shared.log.warning('Interrogate batch: type=vlm no images') return '' @@ -492,7 +495,7 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr orig_offload = shared.opts.interrogate_offload shared.opts.interrogate_offload = False import rich.progress as rp - pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console) + pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console) with pbar: task = pbar.add_task(total=len(files), description='starting...') for file in files: diff --git a/modules/ui_caption.py b/modules/ui_caption.py index a13d2f846..04a16702c 100644 --- a/modules/ui_caption.py +++ b/modules/ui_caption.py @@ -5,19 +5,16 @@ from modules.interrogate import openclip def update_vlm_params(*args): vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p = args - shared.opts.interrogate_vlm_max_length = vlm_max_tokens - shared.opts.interrogate_vlm_num_beams = vlm_num_beams - shared.opts.interrogate_vlm_temperature = vlm_temperature - shared.opts.interrogate_vlm_do_sample = vlm_do_sample - shared.opts.interrogate_vlm_top_k = vlm_top_k - shared.opts.interrogate_vlm_top_p = vlm_top_p + shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens) + shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams) + shared.opts.interrogate_vlm_temperature = float(vlm_temperature) + shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample) + shared.opts.interrogate_vlm_top_k = int(vlm_top_k) + shared.opts.interrogate_vlm_top_p = float(vlm_top_p) shared.opts.save(shared.config_filename) def update_clip_params(*args): - """ - "interrogate_clip_num_beams": OptionInfo(1, "CLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}), - """ clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args shared.opts.interrogate_clip_min_length = int(clip_min_length) shared.opts.interrogate_clip_max_length = int(clip_max_length) @@ -31,12 +28,12 @@ def update_clip_params(*args): def create_ui(): - with gr.Row(equal_height=False, variant='compact', elem_classes="caption"): - with gr.Column(variant='compact'): + with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"): + with gr.Column(variant='compact', elem_id='interrogate_input'): with gr.Row(): - image = gr.Image(type='pil', label="Image") + image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image') with gr.Tabs(elem_id="mode_caption"): - with gr.Tab("VLM Caption"): + with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"): from modules.interrogate import vqa with gr.Row(): vlm_question = gr.Dropdown(label="Predefined question", allow_custom_value=False, choices=vqa.vlm_prompts, value=vqa.vlm_prompts[2], elem_id='vlm_question') @@ -70,15 +67,16 @@ def create_ui(): with gr.Row(): vlm_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="vlm_save_output") vlm_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="vlm_save_append") + vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive") with gr.Row(elem_id='interrogate_buttons_batch'): btn_vlm_caption_batch = gr.Button("Batch caption", variant='primary', elem_id="btn_vlm_caption_batch") with gr.Row(): btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption") - with gr.Tab("CLiP Interrogate"): + with gr.Tab("CLiP Interrogate", elem_id='tab_clip_interrogate'): with gr.Row(): clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP model', elem_id='clip_clip_model') - ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'refresh_interrogate_models') - blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='clip_blip_model') + ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_refresh_models') + blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='btN_clip_blip_model') clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode') with gr.Accordion(label='Advanced options', open=False, visible=True): with gr.Row(): @@ -108,15 +106,16 @@ def create_ui(): with gr.Row(): clip_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="clip_save_output") clip_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="clip_save_append") + clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive") with gr.Row(): btn_clip_interrogate_batch = gr.Button("Batch interrogate", variant='primary', elem_id="btn_clip_interrogate_batch") with gr.Row(): btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img") btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img") - with gr.Column(variant='compact'): - with gr.Row(): + with gr.Column(variant='compact', elem_id='interrogate_output'): + with gr.Row(elem_id='interrogate_output_prompt'): prompt = gr.Textbox(label="Answer", lines=8, placeholder="ai generated image description") - with gr.Row(): + with gr.Row(elem_id='interrogate_output_classes'): medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False) artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False) movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False) @@ -127,9 +126,9 @@ def create_ui(): btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]) btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor]) - btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append], outputs=[prompt]) + btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]) btn_vlm_caption.click(fn=vqa.interrogate, inputs=[vlm_question, vlm_prompt, image, vlm_model], outputs=[prompt]) - btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append], outputs=[prompt]) + btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive], outputs=[prompt]) for tabname, button in copy_interrogate_buttons.items(): generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,)) diff --git a/modules/ui_models.py b/modules/ui_models.py index 8c172b9cd..4f6355a9a 100644 --- a/modules/ui_models.py +++ b/modules/ui_models.py @@ -20,7 +20,6 @@ extra_ui = [] def create_ui(): dummy_component = gr.Label(visible=False) - with gr.Row(elem_id="models_tab"): with gr.Column(elem_id='models_output_container', scale=1): # models_output = gr.Text(elem_id="models_output", value="", show_label=False) diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py index 27806e35c..411fd2e4e 100644 --- a/modules/ui_postprocessing.py +++ b/modules/ui_postprocessing.py @@ -17,7 +17,7 @@ def submit_process(tab_index, extras_image, image_batch, extras_batch_input_dir, def create_ui(): tab_index = gr.State(value=0) # pylint: disable=abstract-class-instantiated - with gr.Row(equal_height=False, variant='compact', elem_classes="extras"): + with gr.Row(equal_height=False, variant='compact', elem_classes="extras", elem_id="extras_tab"): with gr.Column(variant='compact'): with gr.Tabs(elem_id="mode_extras"): with gr.Tab('Process Image', id="single_image", elem_id="extras_single_tab") as tab_single: