caption tab modernui support

Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-02-17 10:59:22 -05:00 · 2025-02-17 10:59:22 -05:00 · b6990151c4
parent 41426c7516
commit b6990151c4
7 changed files with 38 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,13 +1,13 @@
 # Change Log for SD.Next

-## Update for 2025-02-16
+## Update for 2025-02-17

 ### TODO

 - VLM ModernUI support  
 - CogView4  

-### Highlight for 2025-02-16
+### Highlight for 2025-02-17

 We're back with another update with over 50 commits!  
 - Starting with massive UI update with full [localization](https://vladmandic.github.io/sdnext-docs/Locale/) for 8 languages 
@ -24,7 +24,7 @@ We're back with another update with over 50 commits!

 *...and more* - see [changelog](https://github.com/vladmandic/sdnext/blob/dev/CHANGELOG.md) for full details!  

-### Details for 2025-02-16
+### Details for 2025-02-17

 - **User Interface**  
  - **Hints**  
@ -72,6 +72,7 @@ We're back with another update with over 50 commits!
  - Batch processing: VLM and CLiP  
    for example, can be used to caption your training dataset in one go  
    add option to append to captions file, can be used to run multiple captioning models in sequence  
+    add option to run recursively on all subfolders  
    add progress bar  
  - Add additional VLM models:  
    [JoyTag](https://huggingface.co/fancyfeast/joytag)  
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@ -1 +1 @@
-Subproject commit e0f6c7f8a8efc95d5013702275b8aac496c5a6fc
+Subproject commit aa24200db2a42fc369adbf6fe372420c65f7500d
--- a/modules/interrogate/openclip.py
+++ b/modules/interrogate/openclip.py
@ -227,6 +227,8 @@ class BatchWriter:

    def add(self, file, prompt):
        txt_file = os.path.splitext(file)[0] + ".txt"
+        if self.mode == 'a':
+            prompt = '\n' + prompt
        with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
            f.write(prompt)

@ -339,14 +341,15 @@ def interrogate_image(image, clip_model, blip_model, mode):
    return prompt


-def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append):
+def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_model, mode, write, append, recursive):
    files = []
    if batch_files is not None:
        files += [f.name for f in batch_files]
    if batch_folder is not None:
        files += [f.name for f in batch_folder]
    if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
-        files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
+        from modules.files_cache import list_files
+        files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
    if len(files) == 0:
        shared.log.warning('Interrogate batch: type=clip no images')
        return ''
@ -358,7 +361,7 @@ def interrogate_batch(batch_files, batch_folder, batch_str, clip_model, blip_mod
        file_mode = 'w' if not append else 'a'
        writer = BatchWriter(os.path.dirname(files[0]), mode=file_mode)
    import rich.progress as rp
-    pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+    pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
    with pbar:
        task = pbar.add_task(total=len(files), description='starting...')
        for file in files:
--- a/modules/interrogate/vqa.py
+++ b/modules/interrogate/vqa.py
@ -457,7 +457,7 @@ def interrogate(question, prompt, image, model_name, quiet:bool=False):
    return answer


-def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append):
+def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, write, append, recursive):
    class BatchWriter:
        def __init__(self, folder, mode='w'):
            self.folder = folder
@ -467,6 +467,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr

        def add(self, file, prompt):
            txt_file = os.path.splitext(file)[0] + ".txt"
+            if self.mode == 'a':
+                prompt = '\n' + prompt
            with open(os.path.join(self.folder, txt_file), self.mode, encoding='utf-8') as f:
                f.write(prompt)

@ -480,7 +482,8 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
    if batch_folder is not None:
        files += [f.name for f in batch_folder]
    if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
-        files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
+        from modules.files_cache import list_files
+        files += list(list_files(batch_str, ext_filter=['.png', '.jpg', '.jpeg', '.webp'], recursive=recursive))
    if len(files) == 0:
        shared.log.warning('Interrogate batch: type=vlm no images')
        return ''
@ -492,7 +495,7 @@ def batch(model_name, batch_files, batch_folder, batch_str, question, prompt, wr
    orig_offload = shared.opts.interrogate_offload
    shared.opts.interrogate_offload = False
    import rich.progress as rp
-    pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+    pbar = rp.Progress(rp.TextColumn('[cyan]Caption:'), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
    with pbar:
        task = pbar.add_task(total=len(files), description='starting...')
        for file in files:
--- a/modules/ui_caption.py
+++ b/modules/ui_caption.py
@ -5,19 +5,16 @@ from modules.interrogate import openclip

 def update_vlm_params(*args):
    vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p = args
-    shared.opts.interrogate_vlm_max_length = vlm_max_tokens
-    shared.opts.interrogate_vlm_num_beams = vlm_num_beams
-    shared.opts.interrogate_vlm_temperature = vlm_temperature
-    shared.opts.interrogate_vlm_do_sample = vlm_do_sample
-    shared.opts.interrogate_vlm_top_k = vlm_top_k
-    shared.opts.interrogate_vlm_top_p = vlm_top_p
+    shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens)
+    shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams)
+    shared.opts.interrogate_vlm_temperature = float(vlm_temperature)
+    shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample)
+    shared.opts.interrogate_vlm_top_k = int(vlm_top_k)
+    shared.opts.interrogate_vlm_top_p = float(vlm_top_p)
    shared.opts.save(shared.config_filename)


 def update_clip_params(*args):
-    """
-    "interrogate_clip_num_beams": OptionInfo(1, "CLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
-    """
    clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args
    shared.opts.interrogate_clip_min_length = int(clip_min_length)
    shared.opts.interrogate_clip_max_length = int(clip_max_length)
@ -31,12 +28,12 @@ def update_clip_params(*args):


 def create_ui():
-    with gr.Row(equal_height=False, variant='compact', elem_classes="caption"):
-        with gr.Column(variant='compact'):
+    with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"):
+        with gr.Column(variant='compact', elem_id='interrogate_input'):
            with gr.Row():
-                image = gr.Image(type='pil', label="Image")
+                image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image')
            with gr.Tabs(elem_id="mode_caption"):
-                with gr.Tab("VLM Caption"):
+                with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"):
                    from modules.interrogate import vqa
                    with gr.Row():
                        vlm_question = gr.Dropdown(label="Predefined question", allow_custom_value=False, choices=vqa.vlm_prompts, value=vqa.vlm_prompts[2], elem_id='vlm_question')
@ -70,15 +67,16 @@ def create_ui():
                        with gr.Row():
                            vlm_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="vlm_save_output")
                            vlm_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="vlm_save_append")
+                            vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive")
                        with gr.Row(elem_id='interrogate_buttons_batch'):
                            btn_vlm_caption_batch = gr.Button("Batch caption", variant='primary', elem_id="btn_vlm_caption_batch")
                    with gr.Row():
                        btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption")
-                with gr.Tab("CLiP Interrogate"):
+                with gr.Tab("CLiP Interrogate", elem_id='tab_clip_interrogate'):
                    with gr.Row():
                        clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP model', elem_id='clip_clip_model')
-                        ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'refresh_interrogate_models')
-                        blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='clip_blip_model')
+                        ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_refresh_models')
+                        blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption model', elem_id='btN_clip_blip_model')
                        clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode')
                    with gr.Accordion(label='Advanced options', open=False, visible=True):
                        with gr.Row():
@ -108,15 +106,16 @@ def create_ui():
                        with gr.Row():
                            clip_save_output = gr.Checkbox(label='Save caption files', value=True, elem_id="clip_save_output")
                            clip_save_append = gr.Checkbox(label='Append caption files', value=False, elem_id="clip_save_append")
+                            clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive")
                        with gr.Row():
                            btn_clip_interrogate_batch = gr.Button("Batch interrogate", variant='primary', elem_id="btn_clip_interrogate_batch")
                    with gr.Row():
                        btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img")
                        btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img")
-        with gr.Column(variant='compact'):
-            with gr.Row():
+        with gr.Column(variant='compact', elem_id='interrogate_output'):
+            with gr.Row(elem_id='interrogate_output_prompt'):
                prompt = gr.Textbox(label="Answer", lines=8, placeholder="ai generated image description")
-            with gr.Row():
+            with gr.Row(elem_id='interrogate_output_classes'):
                medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False)
                artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False)
                movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False)
@ -127,9 +126,9 @@ def create_ui():

    btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt])
    btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor])
-    btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append], outputs=[prompt])
+    btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt])
    btn_vlm_caption.click(fn=vqa.interrogate, inputs=[vlm_question, vlm_prompt, image, vlm_model], outputs=[prompt])
-    btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append], outputs=[prompt])
+    btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive], outputs=[prompt])

    for tabname, button in copy_interrogate_buttons.items():
        generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,))
--- a/modules/ui_models.py
+++ b/modules/ui_models.py
@ -20,7 +20,6 @@ extra_ui = []

 def create_ui():
    dummy_component = gr.Label(visible=False)
-
    with gr.Row(elem_id="models_tab"):
        with gr.Column(elem_id='models_output_container', scale=1):
            # models_output = gr.Text(elem_id="models_output", value="", show_label=False)
--- a/modules/ui_postprocessing.py
+++ b/modules/ui_postprocessing.py
@ -17,7 +17,7 @@ def submit_process(tab_index, extras_image, image_batch, extras_batch_input_dir,

 def create_ui():
    tab_index = gr.State(value=0) # pylint: disable=abstract-class-instantiated
-    with gr.Row(equal_height=False, variant='compact', elem_classes="extras"):
+    with gr.Row(equal_height=False, variant='compact', elem_classes="extras", elem_id="extras_tab"):
        with gr.Column(variant='compact'):
            with gr.Tabs(elem_id="mode_extras"):
                with gr.Tab('Process Image', id="single_image", elem_id="extras_single_tab") as tab_single: