From 61b031ada5f2230f38e3671718247931096610a9 Mon Sep 17 00:00:00 2001 From: CalamitousFelicitousness Date: Mon, 26 Jan 2026 01:15:42 +0000 Subject: [PATCH] refactor: update imports for caption module rename Update all imports from modules.interrogate to modules.caption across: - modules/shared.py, modules/shared_legacy.py - modules/ui_caption.py, modules/ui_common.py - modules/ui_control.py, modules/ui_control_helpers.py - modules/ui_img2img.py, modules/ui_sections.py - modules/ui_symbols.py, modules/ui_video_vlm.py --- modules/shared.py | 54 ++++++------ modules/shared_legacy.py | 2 - modules/ui_caption.py | 160 +++++++++++++++++----------------- modules/ui_common.py | 2 +- modules/ui_control.py | 6 +- modules/ui_control_helpers.py | 10 +-- modules/ui_img2img.py | 18 ++-- modules/ui_sections.py | 10 +-- modules/ui_symbols.py | 2 +- modules/ui_video_vlm.py | 4 +- 10 files changed, 133 insertions(+), 135 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index d549b9a7a..7e0c4a4a9 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -22,8 +22,8 @@ from modules.memstats import memory_stats, ram_stats # pylint: disable=unused-im log.debug('Initializing: pipelines') from modules import shared_items -from modules.interrogate.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models -from modules.interrogate.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default +from modules.caption.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models +from modules.caption.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default if TYPE_CHECKING: @@ -207,7 +207,7 @@ options_templates.update(options_section(('offload', "Model Offloading"), { "offload_sep": OptionInfo("

Model Offloading

", "", gr.HTML), "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'group', 'model', 'sequential']}), "diffusers_offload_nonblocking": OptionInfo(False, "Non-blocking move operations"), - "interrogate_offload": OptionInfo(True, "Offload caption models"), + "caption_offload": OptionInfo(True, "Offload caption models"), "offload_balanced_sep": OptionInfo("

Balanced Offload

", "", gr.HTML), "diffusers_offload_pre": OptionInfo(True, "Offload during pre-forward"), "diffusers_offload_streams": OptionInfo(False, "Offload using streams"), @@ -742,31 +742,31 @@ options_templates.update(options_section(('hidden_options', "Hidden options"), { "sd_checkpoint_hash": OptionInfo("", "SHA256 hash of the current checkpoint", gr.Textbox, {"visible": False}), "tooltips": OptionInfo("UI Tooltips", "UI tooltips", gr.Radio, {"choices": ["None", "Browser default", "UI tooltips"], "visible": False}), - # Caption/Interrogate settings (controlled via Caption Tab UI) - "interrogate_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}), + # Caption settings (controlled via Caption Tab UI) + "caption_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}), "tagger_show_scores": OptionInfo(False, "Tagger: show confidence scores in results", gr.Checkbox, {"visible": False}), - "interrogate_clip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models), - "interrogate_clip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}), - "interrogate_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}), - "interrogate_clip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}), - "interrogate_clip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}), - "interrogate_clip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}), - "interrogate_clip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}), - "interrogate_clip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}), - "interrogate_clip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}), - "interrogate_clip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}), - "interrogate_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}), - "interrogate_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}), - "interrogate_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}), - "interrogate_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}), - "interrogate_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}), - "interrogate_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}), - "interrogate_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}), - "interrogate_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}), - "interrogate_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}), - "interrogate_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}), - "interrogate_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}), - "interrogate_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}), + "caption_openclip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models), + "caption_openclip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}), + "caption_openclip_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}), + "caption_openclip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}), + "caption_openclip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}), + "caption_openclip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}), + "caption_openclip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}), + "caption_openclip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}), + "caption_openclip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}), + "caption_openclip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}), + "caption_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}), + "caption_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}), + "caption_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}), + "caption_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}), + "caption_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}), + "caption_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}), + "caption_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}), + "caption_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}), + "caption_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}), + "caption_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}), + "caption_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}), + "caption_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}), "tagger_threshold": OptionInfo(0.50, "Tagger: general tag threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}), "tagger_include_rating": OptionInfo(False, "Tagger: include rating tags", gr.Checkbox, {"visible": False}), "tagger_max_tags": OptionInfo(74, "Tagger: max tags", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}), diff --git a/modules/shared_legacy.py b/modules/shared_legacy.py index f96b29fa1..af08da28b 100644 --- a/modules/shared_legacy.py +++ b/modules/shared_legacy.py @@ -11,7 +11,6 @@ class LegacyOption(OptionInfo): legacy_options = options_section(('legacy_options', "Legacy options"), { "ldsr_models_path": LegacyOption(os.path.join(paths.models_path, 'LDSR'), "LDSR Path", gr.Textbox, { "visible": False}), - "interrogate_clip_skip_categories": LegacyOption(["artists", "movements", "flavors"], "CLiP: skip categories", gr.CheckboxGroup, {"choices": [], "visible":False}), "lora_legacy": LegacyOption(False, "LoRA load using legacy method", gr.Checkbox, {"visible": False}), "lora_preferred_name": LegacyOption("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}), "img2img_extra_noise": LegacyOption(0.0, "Extra noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": False}), @@ -43,7 +42,6 @@ legacy_options = options_section(('legacy_options', "Legacy options"), { "grid_save_to_dirs": LegacyOption(False, "Save grids to a subdirectory", gr.Checkbox, {"visible": False}), "hypernetwork_enabled": LegacyOption(False, "Enable Hypernetwork support", gr.Checkbox, {"visible": False}), "img2img_fix_steps": LegacyOption(False, "For image processing do exact number of steps as specified", gr.Checkbox, { "visible": False }), - "interrogate_clip_dict_limit": LegacyOption(2048, "CLIP: maximum number of lines in text file", gr.Slider, { "visible": False }), "keyedit_delimiters": LegacyOption(r".,\/!?%^*;:{}=`~()", "Ctrl+up/down word delimiters", gr.Textbox, { "visible": False }), "keyedit_precision_attention": LegacyOption(0.1, "Ctrl+up/down precision when editing (attention:1.1)", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}), "keyedit_precision_extra": LegacyOption(0.05, "Ctrl+up/down precision when editing ", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}), diff --git a/modules/ui_caption.py b/modules/ui_caption.py index b2eef1d3b..5867edd79 100644 --- a/modules/ui_caption.py +++ b/modules/ui_caption.py @@ -1,14 +1,14 @@ import gradio as gr from modules import shared, ui_common, generation_parameters_copypaste -from modules.interrogate import openclip +from modules.caption import openclip default_task = "Short Caption" def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, prefill, thinking_mode): - """Wrapper for vqa.interrogate that handles annotated image display.""" - from modules.interrogate import vqa - answer = vqa.interrogate(question, system_prompt, prompt, image, model_name, prefill, thinking_mode) + """Wrapper for vqa.caption that handles annotated image display.""" + from modules.caption import vqa + answer = vqa.caption(question, system_prompt, prompt, image, model_name, prefill, thinking_mode) annotated_image = vqa.get_last_annotated_image() if annotated_image is not None: return answer, gr.update(value=annotated_image, visible=True) @@ -17,35 +17,35 @@ def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, pref def update_vlm_prompts_for_model(model_name): """Update the task dropdown choices based on selected model.""" - from modules.interrogate import vqa + from modules.caption import vqa prompts = vqa.get_prompts_for_model(model_name) return gr.update(choices=prompts, value=prompts[0] if prompts else default_task) def update_vlm_prompt_placeholder(question): """Update the prompt field placeholder based on selected task.""" - from modules.interrogate import vqa + from modules.caption import vqa placeholder = vqa.get_prompt_placeholder(question) return gr.update(placeholder=placeholder) def update_vlm_params(*args): vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode = args - shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens) - shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams) - shared.opts.interrogate_vlm_temperature = float(vlm_temperature) - shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample) - shared.opts.interrogate_vlm_top_k = int(vlm_top_k) - shared.opts.interrogate_vlm_top_p = float(vlm_top_p) - shared.opts.interrogate_vlm_keep_prefill = bool(vlm_keep_prefill) - shared.opts.interrogate_vlm_keep_thinking = bool(vlm_keep_thinking) - shared.opts.interrogate_vlm_thinking_mode = bool(vlm_thinking_mode) + shared.opts.caption_vlm_max_length = int(vlm_max_tokens) + shared.opts.caption_vlm_num_beams = int(vlm_num_beams) + shared.opts.caption_vlm_temperature = float(vlm_temperature) + shared.opts.caption_vlm_do_sample = bool(vlm_do_sample) + shared.opts.caption_vlm_top_k = int(vlm_top_k) + shared.opts.caption_vlm_top_p = float(vlm_top_p) + shared.opts.caption_vlm_keep_prefill = bool(vlm_keep_prefill) + shared.opts.caption_vlm_keep_thinking = bool(vlm_keep_thinking) + shared.opts.caption_vlm_thinking_mode = bool(vlm_thinking_mode) shared.opts.save() def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets): """Wrapper for tagger.tag that maps UI inputs to function parameters.""" - from modules.interrogate import tagger + from modules.caption import tagger return tagger.tag( image=image, model_name=model_name, @@ -62,7 +62,7 @@ def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold def tagger_batch_wrapper(model_name, batch_files, batch_folder, batch_str, save_output, save_append, recursive, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets): """Wrapper for tagger.batch that maps UI inputs to function parameters.""" - from modules.interrogate import tagger + from modules.caption import tagger return tagger.batch( model_name=model_name, batch_files=batch_files, @@ -88,7 +88,7 @@ def update_tagger_ui(model_name): When DeepBooru is selected, character_threshold is disabled since DeepBooru doesn't support separate character threshold. """ - from modules.interrogate import tagger + from modules.caption import tagger is_db = tagger.is_deepbooru(model_name) return [ gr.update(interactive=not is_db), # character_threshold @@ -113,48 +113,48 @@ def update_tagger_params(model_name, general_threshold, character_threshold, inc def update_clip_params(*args): clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args - shared.opts.interrogate_clip_min_length = int(clip_min_length) - shared.opts.interrogate_clip_max_length = int(clip_max_length) - shared.opts.interrogate_clip_min_flavors = int(clip_min_flavors) - shared.opts.interrogate_clip_max_flavors = int(clip_max_flavors) - shared.opts.interrogate_clip_num_beams = int(clip_num_beams) - shared.opts.interrogate_clip_flavor_count = int(clip_flavor_count) - shared.opts.interrogate_clip_chunk_size = int(clip_chunk_size) + shared.opts.caption_openclip_min_length = int(clip_min_length) + shared.opts.caption_openclip_max_length = int(clip_max_length) + shared.opts.caption_openclip_min_flavors = int(clip_min_flavors) + shared.opts.caption_openclip_max_flavors = int(clip_max_flavors) + shared.opts.caption_openclip_num_beams = int(clip_num_beams) + shared.opts.caption_openclip_flavor_count = int(clip_flavor_count) + shared.opts.caption_openclip_chunk_size = int(clip_chunk_size) shared.opts.save() - openclip.update_interrogate_params() + openclip.update_caption_params() def update_clip_model_params(clip_model, blip_model, clip_mode): """Save CLiP model settings to shared.opts when UI controls change.""" - shared.opts.interrogate_clip_model = str(clip_model) - shared.opts.interrogate_blip_model = str(blip_model) - shared.opts.interrogate_clip_mode = str(clip_mode) + shared.opts.caption_openclip_model = str(clip_model) + shared.opts.caption_openclip_blip_model = str(blip_model) + shared.opts.caption_openclip_mode = str(clip_mode) shared.opts.save() def update_vlm_model_params(vlm_model, vlm_system): """Save VLM model settings to shared.opts when UI controls change.""" - shared.opts.interrogate_vlm_model = str(vlm_model) - shared.opts.interrogate_vlm_system = str(vlm_system) + shared.opts.caption_vlm_model = str(vlm_model) + shared.opts.caption_vlm_system = str(vlm_system) shared.opts.save() def update_default_caption_type(caption_type): """Save the default caption type to shared.opts.""" - shared.opts.interrogate_default_type = str(caption_type) + shared.opts.caption_default_type = str(caption_type) shared.opts.save() def create_ui(): shared.log.debug('UI initialize: tab=caption') with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"): - with gr.Column(variant='compact', elem_id='interrogate_input'): + with gr.Column(variant='compact', elem_id='caption_input'): with gr.Row(): - image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image') + image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='caption_image') with gr.Tabs(elem_id="mode_caption"): with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"): - from modules.interrogate import vqa - current_vlm_model = shared.opts.interrogate_vlm_model or vqa.vlm_default + from modules.caption import vqa + current_vlm_model = shared.opts.caption_vlm_model or vqa.vlm_default initial_prompts = vqa.get_prompts_for_model(current_vlm_model) with gr.Row(): vlm_system = gr.Textbox(label="System Prompt", value=vqa.vlm_system, lines=1, elem_id='vlm_system') @@ -162,25 +162,25 @@ def create_ui(): vlm_question = gr.Dropdown(label="Task", allow_custom_value=False, choices=initial_prompts, value=default_task, elem_id='vlm_question') with gr.Row(): vlm_prompt = gr.Textbox(label="Prompt", placeholder=vqa.get_prompt_placeholder(initial_prompts[0]), lines=2, elem_id='vlm_prompt') - with gr.Row(elem_id='interrogate_buttons_query'): + with gr.Row(elem_id='caption_buttons_query'): vlm_model = gr.Dropdown(list(vqa.vlm_models), value=current_vlm_model, label='VLM Model', elem_id='vlm_model') with gr.Row(): vlm_load_btn = gr.Button(value='Load', elem_id='vlm_load', variant='secondary') vlm_unload_btn = gr.Button(value='Unload', elem_id='vlm_unload', variant='secondary') with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True): with gr.Row(): - vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.interrogate_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens') - vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.interrogate_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams') - vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.interrogate_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature') + vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.caption_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens') + vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.caption_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams') + vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.caption_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature') with gr.Row(): - vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.interrogate_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k') - vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.interrogate_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p') + vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.caption_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k') + vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.caption_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p') with gr.Row(): - vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.interrogate_vlm_do_sample, elem_id='vlm_do_sample') - vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.interrogate_vlm_thinking_mode, elem_id='vlm_thinking_mode') + vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.caption_vlm_do_sample, elem_id='vlm_do_sample') + vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.caption_vlm_thinking_mode, elem_id='vlm_thinking_mode') with gr.Row(): - vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.interrogate_vlm_keep_thinking, elem_id='vlm_keep_thinking') - vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.interrogate_vlm_keep_prefill, elem_id='vlm_keep_prefill') + vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.caption_vlm_keep_thinking, elem_id='vlm_keep_thinking') + vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.caption_vlm_keep_prefill, elem_id='vlm_keep_prefill') with gr.Row(): vlm_prefill = gr.Textbox(label='Prefill Text', value='', lines=1, elem_id='vlm_prefill', placeholder='Optional prefill text for model to continue from') vlm_max_tokens.change(fn=update_vlm_params, inputs=[vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode], outputs=[]) @@ -203,27 +203,27 @@ def create_ui(): vlm_save_output = gr.Checkbox(label='Save Caption Files', value=True, elem_id="vlm_save_output") vlm_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="vlm_save_append") vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive") - with gr.Row(elem_id='interrogate_buttons_batch'): + with gr.Row(elem_id='caption_buttons_batch'): btn_vlm_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_vlm_caption_batch") with gr.Row(): btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption") - with gr.Tab("OpenCLiP", elem_id='tab_clip_interrogate'): + with gr.Tab("OpenCLiP", elem_id='tab_openclip'): with gr.Row(): - clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP Model', elem_id='clip_clip_model') + clip_model = gr.Dropdown([], value=shared.opts.caption_openclip_model, label='CLiP Model', elem_id='clip_clip_model') ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_models_refresh') - blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption Model', elem_id='btN_clip_blip_model') + blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.caption_openclip_blip_model, label='Caption Model', elem_id='btN_clip_blip_model') clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode') with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True): with gr.Row(): - clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.interrogate_clip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length') - clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.interrogate_clip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length') - clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.interrogate_clip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size') + clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.caption_openclip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length') + clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.caption_openclip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length') + clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.caption_openclip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size') with gr.Row(): - clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.interrogate_clip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors') - clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.interrogate_clip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors') - clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.interrogate_clip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count') + clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.caption_openclip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors') + clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.caption_openclip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors') + clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.caption_openclip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count') with gr.Row(): - clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.interrogate_clip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams') + clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.caption_openclip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams') clip_min_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[]) clip_max_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[]) clip_chunk_size.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[]) @@ -243,12 +243,12 @@ def create_ui(): clip_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="clip_save_append") clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive") with gr.Row(): - btn_clip_interrogate_batch = gr.Button("Batch Interrogate", variant='primary', elem_id="btn_clip_interrogate_batch") + btn_clip_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_clip_caption_batch") with gr.Row(): - btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img") + btn_clip_caption_img = gr.Button("Caption", variant='primary', elem_id="btn_clip_caption_img") btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img") with gr.Tab("Tagger", elem_id='tab_tagger'): - from modules.interrogate import tagger + from modules.caption import tagger with gr.Row(): wd_model = gr.Dropdown(tagger.get_models(), value=shared.opts.waifudiffusion_model, label='Tagger Model', elem_id='wd_model') ui_common.create_refresh_button(wd_model, tagger.refresh_models, lambda: {"choices": tagger.get_models()}, 'wd_models_refresh') @@ -286,32 +286,32 @@ def create_ui(): btn_wd_tag_batch = gr.Button("Batch Tag", variant='primary', elem_id="btn_wd_tag_batch") with gr.Row(): btn_wd_tag = gr.Button("Tag", variant='primary', elem_id="btn_wd_tag") - with gr.Tab("Interrogate", elem_id='tab_interrogate'): + with gr.Tab("Default", elem_id='tab_caption_default'): with gr.Row(): default_caption_type = gr.Radio( choices=["VLM", "OpenCLiP", "Tagger"], - value=shared.opts.interrogate_default_type, + value=shared.opts.caption_default_type, label="Default Caption Type", elem_id="default_caption_type" ) - with gr.Column(variant='compact', elem_id='interrogate_output'): - with gr.Row(elem_id='interrogate_output_prompt'): + with gr.Column(variant='compact', elem_id='caption_output'): + with gr.Row(elem_id='caption_output_prompt'): prompt = gr.Textbox(label="Answer", lines=12, placeholder="ai generated image description") - with gr.Row(elem_id='interrogate_output_image'): - output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='interrogate_output_image_display') - with gr.Row(elem_id='interrogate_output_classes'): - medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False) - artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False) - movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False) - trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5, visible=False) - flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5, visible=False) - clip_labels_text = gr.Textbox(elem_id="interrogate_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False) - with gr.Row(elem_id='copy_buttons_interrogate'): - copy_interrogate_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"]) + with gr.Row(elem_id='caption_output_image'): + output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='caption_output_image_display') + with gr.Row(elem_id='caption_output_classes'): + medium = gr.Label(elem_id="caption_label_medium", label="Medium", num_top_classes=5, visible=False) + artist = gr.Label(elem_id="caption_label_artist", label="Artist", num_top_classes=5, visible=False) + movement = gr.Label(elem_id="caption_label_movement", label="Movement", num_top_classes=5, visible=False) + trending = gr.Label(elem_id="caption_label_trending", label="Trending", num_top_classes=5, visible=False) + flavor = gr.Label(elem_id="caption_label_flavor", label="Flavor", num_top_classes=5, visible=False) + clip_labels_text = gr.Textbox(elem_id="caption_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False) + with gr.Row(elem_id='copy_buttons_caption'): + copy_caption_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"]) - btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) + btn_clip_caption_img.click(openclip.caption_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor, clip_labels_text]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) - btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) + btn_clip_caption_batch.click(fn=openclip.caption_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) btn_vlm_caption.click(fn=vlm_caption_wrapper, inputs=[vlm_question, vlm_system, vlm_prompt, image, vlm_model, vlm_prefill, vlm_thinking_mode], outputs=[prompt, output_image]) btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_system, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive, vlm_prefill, vlm_thinking_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) btn_wd_tag.click(fn=tagger_tag_wrapper, inputs=[image, wd_model, wd_general_threshold, wd_character_threshold, wd_include_rating, wd_exclude_tags, wd_max_tags, wd_sort_alpha, wd_use_spaces, wd_escape], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image]) @@ -325,10 +325,10 @@ def create_ui(): vlm_load_btn.click(fn=vqa.load_model, inputs=[vlm_model], outputs=[]) vlm_unload_btn.click(fn=vqa.unload_model, inputs=[], outputs=[]) def tagger_load_wrapper(model_name): - from modules.interrogate import tagger + from modules.caption import tagger return tagger.load_model(model_name) def tagger_unload_wrapper(): - from modules.interrogate import tagger + from modules.caption import tagger return tagger.unload_model() wd_load_btn.click(fn=tagger_load_wrapper, inputs=[wd_model], outputs=[]) wd_unload_btn.click(fn=tagger_unload_wrapper, inputs=[], outputs=[]) @@ -363,6 +363,6 @@ def create_ui(): # Save default caption type to shared.opts when UI control changes default_caption_type.change(fn=update_default_caption_type, inputs=[default_caption_type], outputs=[], show_progress=False) - for tabname, button in copy_interrogate_buttons.items(): + for tabname, button in copy_caption_buttons.items(): generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,)) generation_parameters_copypaste.add_paste_fields("caption", image, None) diff --git a/modules/ui_common.py b/modules/ui_common.py index 5a6299afe..89dd2038a 100644 --- a/modules/ui_common.py +++ b/modules/ui_common.py @@ -273,7 +273,7 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None, transfe elem_classes=["gallery_main"], ) if prompt is not None: - ui_sections.create_interrogate_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output') + ui_sections.create_caption_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output') button_image_fit = gr.Button(ui_symbols.resize, elem_id=f"{tabname}_image_fit", elem_classes=['image-fit']) button_image_fit.click(fn=None, _js="cycleImageFit", inputs=[], outputs=[]) diff --git a/modules/ui_control.py b/modules/ui_control.py index dc31db6c2..1049b5f54 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -226,7 +226,7 @@ def create_ui(_blocks: gr.Blocks=None): else: input_image = gr.HTML(value='

Kanvas not initialized

', elem_id='kanvas-container') input_changed = gr.Button('Kanvas change', elem_id='kanvas-change-button', visible=False) - btn_interrogate = ui_sections.create_interrogate_button('control', what='input') + btn_caption = ui_sections.create_caption_button('control', what='input') with gr.Tab('Video', id='in-video') as tab_video: input_video = gr.Video(label="Input", show_label=False, interactive=True, height=gr_height, elem_classes=['control-image']) with gr.Tab('Batch', id='in-batch') as tab_batch: @@ -303,8 +303,8 @@ def create_ui(_blocks: gr.Blocks=None): ) input_changed.click(**select_dict) - btn_interrogate.click(**select_dict) # need to fetch input first - btn_interrogate.click(fn=helpers.interrogate, inputs=[], outputs=[prompt]) + btn_caption.click(**select_dict) # need to fetch input first + btn_caption.click(fn=helpers.caption, inputs=[], outputs=[prompt]) prompt.submit(**select_dict) negative.submit(**select_dict) diff --git a/modules/ui_control_helpers.py b/modules/ui_control_helpers.py index f1545dd07..cbe3c4df8 100644 --- a/modules/ui_control_helpers.py +++ b/modules/ui_control_helpers.py @@ -48,16 +48,16 @@ def initialize(): scripts_manager.scripts_control.initialize_scripts(is_img2img=False, is_control=True) -def interrogate(): +def caption(): prompt = None if input_source is None or len(input_source) == 0: - shared.log.warning('Interrogate: no input source') + shared.log.warning('Caption: no input source') return prompt try: - from modules.interrogate.interrogate import interrogate as interrogate_fn - prompt = interrogate_fn(input_source[0]) + from modules.caption.caption import caption as caption_fn + prompt = caption_fn(input_source[0]) except Exception as e: - shared.log.error(f'Interrogate: {e}') + shared.log.error(f'Caption: {e}') return prompt diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py index d6e1591a2..5e651de91 100644 --- a/modules/ui_img2img.py +++ b/modules/ui_img2img.py @@ -3,21 +3,21 @@ from modules import timer, shared, call_queue, generation_parameters_copypaste, from modules import ui_common, ui_sections, ui_guidance -def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles): +def process_caption(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles): import os from PIL import Image - from modules.interrogate.interrogate import interrogate + from modules.caption.caption import caption mode = int(mode) if mode in {0, 1, 3, 4}: - return [interrogate(ii_singles[mode]), None] + return [caption(ii_singles[mode]), None] if mode == 2: - return [interrogate(ii_singles[mode]["image"]), None] + return [caption(ii_singles[mode]["image"]), None] if mode == 5: if len(ii_input_files) > 0: images = [f.name for f in ii_input_files] else: if not os.path.isdir(ii_input_dir): - shared.log.error(f"Interrogate: Input directory not found: {ii_input_dir}") + shared.log.error(f"Caption: Input directory not found: {ii_input_dir}") return [gr.update(), None] images = os.listdir(ii_input_dir) if ii_output_dir != "": @@ -28,7 +28,7 @@ def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_s img = Image.open(image) filename = os.path.basename(image) left, _ = os.path.splitext(filename) - print(interrogate(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with + print(caption(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with return [gr.update(), None] @@ -70,7 +70,7 @@ def create_ui(): state = gr.Textbox(value='', visible=False) with gr.TabItem('Image', id='img2img_image', elem_id="img2img_image_tab") as tab_img2img: img_init = gr.Image(label="", elem_id="img2img_image", show_label=False, interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512) - interrogate_btn = ui_sections.create_interrogate_button(tab='img2img', what='input') + caption_btn = ui_sections.create_caption_button(tab='img2img', what='input') add_copy_image_controls('img2img', img_init) with gr.TabItem('Inpaint', id='img2img_inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint: @@ -215,7 +215,7 @@ def create_ui(): img2img_reprocess[2].click(**img2img_dict) # hires-refine img2img_reprocess[3].click(**img2img_dict) # face-restore - interrogate_args = dict( + caption_args = dict( _js="get_img2img_tab_index", inputs=[ dummy_component, @@ -227,7 +227,7 @@ def create_ui(): ], outputs=[img2img_prompt, dummy_component], ) - interrogate_btn.click(fn=lambda *args: process_interrogate(*args), **interrogate_args) + caption_btn.click(fn=lambda *args: process_caption(*args), **caption_args) img2img_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_prompt], outputs=[img2img_token_counter], show_progress = 'hidden') img2img_negative_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_negative_prompt], outputs=[img2img_negative_token_counter], show_progress = 'hidden') diff --git a/modules/ui_sections.py b/modules/ui_sections.py index 2c4e11b78..4930446f2 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -1,7 +1,7 @@ import gradio as gr from modules import shared, modelloader, ui_symbols, ui_common, sd_samplers from modules.ui_components import ToolButton -from modules.interrogate import interrogate +from modules.caption import caption def create_toprow(is_img2img: bool = False, id_part: str = None, generate_visible: bool = True, negative_visible: bool = True, reprocess_visible: bool = True): @@ -91,11 +91,11 @@ def create_resolution_inputs(tab, default_width=1024, default_height=1024): return width, height -def create_interrogate_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''): - button_interrogate = gr.Button(ui_symbols.interrogate, elem_id=f"{tab}_interrogate_{what}", elem_classes=['interrogate']) +def create_caption_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''): + button_caption = gr.Button(ui_symbols.caption, elem_id=f"{tab}_caption_{what}", elem_classes=['caption']) if inputs is not None and outputs is not None: - button_interrogate.click(fn=interrogate.interrogate, inputs=inputs, outputs=[outputs]) - return button_interrogate + button_caption.click(fn=caption.caption, inputs=inputs, outputs=[outputs]) + return button_caption def create_batch_inputs(tab, accordion=True): diff --git a/modules/ui_symbols.py b/modules/ui_symbols.py index c880a4a8f..e379be27a 100644 --- a/modules/ui_symbols.py +++ b/modules/ui_symbols.py @@ -33,7 +33,7 @@ search = '🔍' preview = '🖼️' image = '🖌️' resize = '⁜' -interrogate = '\uf46b' # Telescope icon in Noto Sans. Previously '♻' +caption = '\uf46b' # Telescope icon in Noto Sans. Previously '♻' bullet = '⃝' vision = '\uf06e' # Font Awesome eye icon (more minimalistic) reasoning = '\uf0eb' # Font Awesome lightbulb icon (represents thinking/reasoning) diff --git a/modules/ui_video_vlm.py b/modules/ui_video_vlm.py index 96cf8933b..b39ba4fe3 100644 --- a/modules/ui_video_vlm.py +++ b/modules/ui_video_vlm.py @@ -22,7 +22,7 @@ system_prompts = { def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', system_prompt:str='', nsfw:bool=True): - from modules.interrogate import vqa + from modules.caption import vqa if not enable: return prompt if model is None or len(model) < 4: @@ -46,7 +46,7 @@ def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', syste system_prompt += system_prompts['nsfw_ok'] if nsfw else system_prompts['nsfw_no'] system_prompt += f" {system_prompts['suffix']} {system_prompts['example']}" shared.log.debug(f'Video prompt enhance: model="{model}" image={image} nsfw={nsfw} prompt="{prompt}"') - answer = vqa.interrogate(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False) + answer = vqa.caption(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False) shared.log.debug(f'Video prompt enhance: answer="{answer}"') return answer