From 61b031ada5f2230f38e3671718247931096610a9 Mon Sep 17 00:00:00 2001
From: CalamitousFelicitousness <iowasovereign@gmail.com>
Date: Mon, 26 Jan 2026 01:15:42 +0000
Subject: [PATCH] refactor: update imports for caption module rename

Update all imports from modules.interrogate to modules.caption across:
- modules/shared.py, modules/shared_legacy.py
- modules/ui_caption.py, modules/ui_common.py
- modules/ui_control.py, modules/ui_control_helpers.py
- modules/ui_img2img.py, modules/ui_sections.py
- modules/ui_symbols.py, modules/ui_video_vlm.py
---
 modules/shared.py             |  54 ++++++------
 modules/shared_legacy.py      |   2 -
 modules/ui_caption.py         | 160 +++++++++++++++++-----------------
 modules/ui_common.py          |   2 +-
 modules/ui_control.py         |   6 +-
 modules/ui_control_helpers.py |  10 +--
 modules/ui_img2img.py         |  18 ++--
 modules/ui_sections.py        |  10 +--
 modules/ui_symbols.py         |   2 +-
 modules/ui_video_vlm.py       |   4 +-
 10 files changed, 133 insertions(+), 135 deletions(-)
diff --git a/modules/shared.py b/modules/shared.py
index d549b9a7a..7e0c4a4a9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -22,8 +22,8 @@ from modules.memstats import memory_stats, ram_stats # pylint: disable=unused-im
 
 log.debug('Initializing: pipelines')
 from modules import shared_items
-from modules.interrogate.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models
-from modules.interrogate.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default
+from modules.caption.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models
+from modules.caption.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default
 
 
 if TYPE_CHECKING:
@@ -207,7 +207,7 @@ options_templates.update(options_section(('offload', "Model Offloading"), {
     "offload_sep": OptionInfo("<h2>Model Offloading</h2>", "", gr.HTML),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'group', 'model', 'sequential']}),
     "diffusers_offload_nonblocking": OptionInfo(False, "Non-blocking move operations"),
-    "interrogate_offload": OptionInfo(True, "Offload caption models"),
+    "caption_offload": OptionInfo(True, "Offload caption models"),
     "offload_balanced_sep": OptionInfo("<h2>Balanced Offload</h2>", "", gr.HTML),
     "diffusers_offload_pre": OptionInfo(True, "Offload during pre-forward"),
     "diffusers_offload_streams": OptionInfo(False, "Offload using streams"),
@@ -742,31 +742,31 @@ options_templates.update(options_section(('hidden_options', "Hidden options"), {
     "sd_checkpoint_hash": OptionInfo("", "SHA256 hash of the current checkpoint", gr.Textbox, {"visible": False}),
     "tooltips": OptionInfo("UI Tooltips", "UI tooltips", gr.Radio, {"choices": ["None", "Browser default", "UI tooltips"], "visible": False}),
 
-    # Caption/Interrogate settings (controlled via Caption Tab UI)
-    "interrogate_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}),
+    # Caption settings (controlled via Caption Tab UI)
+    "caption_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}),
     "tagger_show_scores": OptionInfo(False, "Tagger: show confidence scores in results", gr.Checkbox, {"visible": False}),
-    "interrogate_clip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models),
-    "interrogate_clip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}),
-    "interrogate_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}),
-    "interrogate_clip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
-    "interrogate_clip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}),
-    "interrogate_clip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
-    "interrogate_clip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
-    "interrogate_clip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
-    "interrogate_clip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
-    "interrogate_clip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
-    "interrogate_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}),
-    "interrogate_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}),
-    "interrogate_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}),
-    "interrogate_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
-    "interrogate_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}),
-    "interrogate_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}),
-    "interrogate_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
-    "interrogate_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}),
-    "interrogate_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
-    "interrogate_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}),
-    "interrogate_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}),
-    "interrogate_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}),
+    "caption_openclip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models),
+    "caption_openclip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}),
+    "caption_openclip_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}),
+    "caption_openclip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
+    "caption_openclip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}),
+    "caption_openclip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
+    "caption_openclip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
+    "caption_openclip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
+    "caption_openclip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
+    "caption_openclip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
+    "caption_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}),
+    "caption_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}),
+    "caption_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}),
+    "caption_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
+    "caption_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}),
+    "caption_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}),
+    "caption_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
+    "caption_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}),
+    "caption_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
+    "caption_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}),
+    "caption_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}),
+    "caption_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}),
     "tagger_threshold": OptionInfo(0.50, "Tagger: general tag threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
     "tagger_include_rating": OptionInfo(False, "Tagger: include rating tags", gr.Checkbox, {"visible": False}),
     "tagger_max_tags": OptionInfo(74, "Tagger: max tags", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
diff --git a/modules/shared_legacy.py b/modules/shared_legacy.py
index f96b29fa1..af08da28b 100644
--- a/modules/shared_legacy.py
+++ b/modules/shared_legacy.py
@@ -11,7 +11,6 @@ class LegacyOption(OptionInfo):
 
 legacy_options = options_section(('legacy_options', "Legacy options"), {
     "ldsr_models_path": LegacyOption(os.path.join(paths.models_path, 'LDSR'), "LDSR Path", gr.Textbox, { "visible": False}),
-    "interrogate_clip_skip_categories": LegacyOption(["artists", "movements", "flavors"], "CLiP: skip categories", gr.CheckboxGroup, {"choices": [], "visible":False}),
     "lora_legacy": LegacyOption(False, "LoRA load using legacy method", gr.Checkbox, {"visible": False}),
     "lora_preferred_name": LegacyOption("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
     "img2img_extra_noise": LegacyOption(0.0, "Extra noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": False}),
@@ -43,7 +42,6 @@ legacy_options = options_section(('legacy_options', "Legacy options"), {
     "grid_save_to_dirs": LegacyOption(False, "Save grids to a subdirectory", gr.Checkbox, {"visible": False}),
     "hypernetwork_enabled": LegacyOption(False, "Enable Hypernetwork support", gr.Checkbox, {"visible": False}),
     "img2img_fix_steps": LegacyOption(False, "For image processing do exact number of steps as specified", gr.Checkbox, { "visible": False }),
-    "interrogate_clip_dict_limit": LegacyOption(2048, "CLIP: maximum number of lines in text file", gr.Slider, { "visible": False }),
     "keyedit_delimiters": LegacyOption(r".,\/!?%^*;:{}=`~()", "Ctrl+up/down word delimiters", gr.Textbox, { "visible": False }),
     "keyedit_precision_attention": LegacyOption(0.1, "Ctrl+up/down precision when editing (attention:1.1)", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}),
     "keyedit_precision_extra": LegacyOption(0.05, "Ctrl+up/down precision when editing <extra networks:0.9>", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}),
diff --git a/modules/ui_caption.py b/modules/ui_caption.py
index b2eef1d3b..5867edd79 100644
--- a/modules/ui_caption.py
+++ b/modules/ui_caption.py
@@ -1,14 +1,14 @@
 import gradio as gr
 from modules import shared, ui_common, generation_parameters_copypaste
-from modules.interrogate import openclip
+from modules.caption import openclip
 
 
 default_task = "Short Caption"
 
 def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, prefill, thinking_mode):
-    """Wrapper for vqa.interrogate that handles annotated image display."""
-    from modules.interrogate import vqa
-    answer = vqa.interrogate(question, system_prompt, prompt, image, model_name, prefill, thinking_mode)
+    """Wrapper for vqa.caption that handles annotated image display."""
+    from modules.caption import vqa
+    answer = vqa.caption(question, system_prompt, prompt, image, model_name, prefill, thinking_mode)
     annotated_image = vqa.get_last_annotated_image()
     if annotated_image is not None:
         return answer, gr.update(value=annotated_image, visible=True)
@@ -17,35 +17,35 @@ def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, pref
 
 def update_vlm_prompts_for_model(model_name):
     """Update the task dropdown choices based on selected model."""
-    from modules.interrogate import vqa
+    from modules.caption import vqa
     prompts = vqa.get_prompts_for_model(model_name)
     return gr.update(choices=prompts, value=prompts[0] if prompts else default_task)
 
 
 def update_vlm_prompt_placeholder(question):
     """Update the prompt field placeholder based on selected task."""
-    from modules.interrogate import vqa
+    from modules.caption import vqa
     placeholder = vqa.get_prompt_placeholder(question)
     return gr.update(placeholder=placeholder)
 
 
 def update_vlm_params(*args):
     vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode = args
-    shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens)
-    shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams)
-    shared.opts.interrogate_vlm_temperature = float(vlm_temperature)
-    shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample)
-    shared.opts.interrogate_vlm_top_k = int(vlm_top_k)
-    shared.opts.interrogate_vlm_top_p = float(vlm_top_p)
-    shared.opts.interrogate_vlm_keep_prefill = bool(vlm_keep_prefill)
-    shared.opts.interrogate_vlm_keep_thinking = bool(vlm_keep_thinking)
-    shared.opts.interrogate_vlm_thinking_mode = bool(vlm_thinking_mode)
+    shared.opts.caption_vlm_max_length = int(vlm_max_tokens)
+    shared.opts.caption_vlm_num_beams = int(vlm_num_beams)
+    shared.opts.caption_vlm_temperature = float(vlm_temperature)
+    shared.opts.caption_vlm_do_sample = bool(vlm_do_sample)
+    shared.opts.caption_vlm_top_k = int(vlm_top_k)
+    shared.opts.caption_vlm_top_p = float(vlm_top_p)
+    shared.opts.caption_vlm_keep_prefill = bool(vlm_keep_prefill)
+    shared.opts.caption_vlm_keep_thinking = bool(vlm_keep_thinking)
+    shared.opts.caption_vlm_thinking_mode = bool(vlm_thinking_mode)
     shared.opts.save()
 
 
 def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets):
     """Wrapper for tagger.tag that maps UI inputs to function parameters."""
-    from modules.interrogate import tagger
+    from modules.caption import tagger
     return tagger.tag(
         image=image,
         model_name=model_name,
@@ -62,7 +62,7 @@ def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold
 
 def tagger_batch_wrapper(model_name, batch_files, batch_folder, batch_str, save_output, save_append, recursive, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets):
     """Wrapper for tagger.batch that maps UI inputs to function parameters."""
-    from modules.interrogate import tagger
+    from modules.caption import tagger
     return tagger.batch(
         model_name=model_name,
         batch_files=batch_files,
@@ -88,7 +88,7 @@ def update_tagger_ui(model_name):
     When DeepBooru is selected, character_threshold is disabled since DeepBooru
     doesn't support separate character threshold.
     """
-    from modules.interrogate import tagger
+    from modules.caption import tagger
     is_db = tagger.is_deepbooru(model_name)
     return [
         gr.update(interactive=not is_db),  # character_threshold
@@ -113,48 +113,48 @@ def update_tagger_params(model_name, general_threshold, character_threshold, inc
 
 def update_clip_params(*args):
     clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args
-    shared.opts.interrogate_clip_min_length = int(clip_min_length)
-    shared.opts.interrogate_clip_max_length = int(clip_max_length)
-    shared.opts.interrogate_clip_min_flavors = int(clip_min_flavors)
-    shared.opts.interrogate_clip_max_flavors = int(clip_max_flavors)
-    shared.opts.interrogate_clip_num_beams = int(clip_num_beams)
-    shared.opts.interrogate_clip_flavor_count = int(clip_flavor_count)
-    shared.opts.interrogate_clip_chunk_size = int(clip_chunk_size)
+    shared.opts.caption_openclip_min_length = int(clip_min_length)
+    shared.opts.caption_openclip_max_length = int(clip_max_length)
+    shared.opts.caption_openclip_min_flavors = int(clip_min_flavors)
+    shared.opts.caption_openclip_max_flavors = int(clip_max_flavors)
+    shared.opts.caption_openclip_num_beams = int(clip_num_beams)
+    shared.opts.caption_openclip_flavor_count = int(clip_flavor_count)
+    shared.opts.caption_openclip_chunk_size = int(clip_chunk_size)
     shared.opts.save()
-    openclip.update_interrogate_params()
+    openclip.update_caption_params()
 
 
 def update_clip_model_params(clip_model, blip_model, clip_mode):
     """Save CLiP model settings to shared.opts when UI controls change."""
-    shared.opts.interrogate_clip_model = str(clip_model)
-    shared.opts.interrogate_blip_model = str(blip_model)
-    shared.opts.interrogate_clip_mode = str(clip_mode)
+    shared.opts.caption_openclip_model = str(clip_model)
+    shared.opts.caption_openclip_blip_model = str(blip_model)
+    shared.opts.caption_openclip_mode = str(clip_mode)
     shared.opts.save()
 
 
 def update_vlm_model_params(vlm_model, vlm_system):
     """Save VLM model settings to shared.opts when UI controls change."""
-    shared.opts.interrogate_vlm_model = str(vlm_model)
-    shared.opts.interrogate_vlm_system = str(vlm_system)
+    shared.opts.caption_vlm_model = str(vlm_model)
+    shared.opts.caption_vlm_system = str(vlm_system)
     shared.opts.save()
 
 
 def update_default_caption_type(caption_type):
     """Save the default caption type to shared.opts."""
-    shared.opts.interrogate_default_type = str(caption_type)
+    shared.opts.caption_default_type = str(caption_type)
     shared.opts.save()
 
 
 def create_ui():
     shared.log.debug('UI initialize: tab=caption')
     with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"):
-        with gr.Column(variant='compact', elem_id='interrogate_input'):
+        with gr.Column(variant='compact', elem_id='caption_input'):
             with gr.Row():
-                image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image')
+                image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='caption_image')
             with gr.Tabs(elem_id="mode_caption"):
                 with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"):
-                    from modules.interrogate import vqa
-                    current_vlm_model = shared.opts.interrogate_vlm_model or vqa.vlm_default
+                    from modules.caption import vqa
+                    current_vlm_model = shared.opts.caption_vlm_model or vqa.vlm_default
                     initial_prompts = vqa.get_prompts_for_model(current_vlm_model)
                     with gr.Row():
                         vlm_system = gr.Textbox(label="System Prompt", value=vqa.vlm_system, lines=1, elem_id='vlm_system')
@@ -162,25 +162,25 @@ def create_ui():
                         vlm_question = gr.Dropdown(label="Task", allow_custom_value=False, choices=initial_prompts, value=default_task, elem_id='vlm_question')
                     with gr.Row():
                         vlm_prompt = gr.Textbox(label="Prompt", placeholder=vqa.get_prompt_placeholder(initial_prompts[0]), lines=2, elem_id='vlm_prompt')
-                    with gr.Row(elem_id='interrogate_buttons_query'):
+                    with gr.Row(elem_id='caption_buttons_query'):
                         vlm_model = gr.Dropdown(list(vqa.vlm_models), value=current_vlm_model, label='VLM Model', elem_id='vlm_model')
                     with gr.Row():
                         vlm_load_btn = gr.Button(value='Load', elem_id='vlm_load', variant='secondary')
                         vlm_unload_btn = gr.Button(value='Unload', elem_id='vlm_unload', variant='secondary')
                     with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True):
                         with gr.Row():
-                            vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.interrogate_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens')
-                            vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.interrogate_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams')
-                            vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.interrogate_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature')
+                            vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.caption_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens')
+                            vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.caption_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams')
+                            vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.caption_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature')
                         with gr.Row():
-                            vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.interrogate_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k')
-                            vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.interrogate_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p')
+                            vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.caption_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k')
+                            vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.caption_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p')
                         with gr.Row():
-                            vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.interrogate_vlm_do_sample, elem_id='vlm_do_sample')
-                            vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.interrogate_vlm_thinking_mode, elem_id='vlm_thinking_mode')
+                            vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.caption_vlm_do_sample, elem_id='vlm_do_sample')
+                            vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.caption_vlm_thinking_mode, elem_id='vlm_thinking_mode')
                         with gr.Row():
-                            vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.interrogate_vlm_keep_thinking, elem_id='vlm_keep_thinking')
-                            vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.interrogate_vlm_keep_prefill, elem_id='vlm_keep_prefill')
+                            vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.caption_vlm_keep_thinking, elem_id='vlm_keep_thinking')
+                            vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.caption_vlm_keep_prefill, elem_id='vlm_keep_prefill')
                         with gr.Row():
                             vlm_prefill = gr.Textbox(label='Prefill Text', value='', lines=1, elem_id='vlm_prefill', placeholder='Optional prefill text for model to continue from')
                         vlm_max_tokens.change(fn=update_vlm_params, inputs=[vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode], outputs=[])
@@ -203,27 +203,27 @@ def create_ui():
                             vlm_save_output = gr.Checkbox(label='Save Caption Files', value=True, elem_id="vlm_save_output")
                             vlm_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="vlm_save_append")
                             vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive")
-                        with gr.Row(elem_id='interrogate_buttons_batch'):
+                        with gr.Row(elem_id='caption_buttons_batch'):
                             btn_vlm_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_vlm_caption_batch")
                     with gr.Row():
                         btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption")
-                with gr.Tab("OpenCLiP", elem_id='tab_clip_interrogate'):
+                with gr.Tab("OpenCLiP", elem_id='tab_openclip'):
                     with gr.Row():
-                        clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP Model', elem_id='clip_clip_model')
+                        clip_model = gr.Dropdown([], value=shared.opts.caption_openclip_model, label='CLiP Model', elem_id='clip_clip_model')
                         ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_models_refresh')
-                        blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption Model', elem_id='btN_clip_blip_model')
+                        blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.caption_openclip_blip_model, label='Caption Model', elem_id='btN_clip_blip_model')
                         clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode')
                     with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True):
                         with gr.Row():
-                            clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.interrogate_clip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length')
-                            clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.interrogate_clip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length')
-                            clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.interrogate_clip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size')
+                            clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.caption_openclip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length')
+                            clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.caption_openclip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length')
+                            clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.caption_openclip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size')
                         with gr.Row():
-                            clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.interrogate_clip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors')
-                            clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.interrogate_clip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors')
-                            clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.interrogate_clip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count')
+                            clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.caption_openclip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors')
+                            clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.caption_openclip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors')
+                            clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.caption_openclip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count')
                         with gr.Row():
-                            clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.interrogate_clip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams')
+                            clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.caption_openclip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams')
                         clip_min_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
                         clip_max_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
                         clip_chunk_size.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
@@ -243,12 +243,12 @@ def create_ui():
                             clip_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="clip_save_append")
                             clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive")
                         with gr.Row():
-                            btn_clip_interrogate_batch = gr.Button("Batch Interrogate", variant='primary', elem_id="btn_clip_interrogate_batch")
+                            btn_clip_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_clip_caption_batch")
                     with gr.Row():
-                        btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img")
+                        btn_clip_caption_img = gr.Button("Caption", variant='primary', elem_id="btn_clip_caption_img")
                         btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img")
                 with gr.Tab("Tagger", elem_id='tab_tagger'):
-                    from modules.interrogate import tagger
+                    from modules.caption import tagger
                     with gr.Row():
                         wd_model = gr.Dropdown(tagger.get_models(), value=shared.opts.waifudiffusion_model, label='Tagger Model', elem_id='wd_model')
                         ui_common.create_refresh_button(wd_model, tagger.refresh_models, lambda: {"choices": tagger.get_models()}, 'wd_models_refresh')
@@ -286,32 +286,32 @@ def create_ui():
                             btn_wd_tag_batch = gr.Button("Batch Tag", variant='primary', elem_id="btn_wd_tag_batch")
                     with gr.Row():
                         btn_wd_tag = gr.Button("Tag", variant='primary', elem_id="btn_wd_tag")
-                with gr.Tab("Interrogate", elem_id='tab_interrogate'):
+                with gr.Tab("Default", elem_id='tab_caption_default'):
                     with gr.Row():
                         default_caption_type = gr.Radio(
                             choices=["VLM", "OpenCLiP", "Tagger"],
-                            value=shared.opts.interrogate_default_type,
+                            value=shared.opts.caption_default_type,
                             label="Default Caption Type",
                             elem_id="default_caption_type"
                         )
-        with gr.Column(variant='compact', elem_id='interrogate_output'):
-            with gr.Row(elem_id='interrogate_output_prompt'):
+        with gr.Column(variant='compact', elem_id='caption_output'):
+            with gr.Row(elem_id='caption_output_prompt'):
                 prompt = gr.Textbox(label="Answer", lines=12, placeholder="ai generated image description")
-            with gr.Row(elem_id='interrogate_output_image'):
-                output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='interrogate_output_image_display')
-            with gr.Row(elem_id='interrogate_output_classes'):
-                medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False)
-                artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False)
-                movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False)
-                trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5, visible=False)
-                flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5, visible=False)
-                clip_labels_text = gr.Textbox(elem_id="interrogate_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False)
-            with gr.Row(elem_id='copy_buttons_interrogate'):
-                copy_interrogate_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"])
+            with gr.Row(elem_id='caption_output_image'):
+                output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='caption_output_image_display')
+            with gr.Row(elem_id='caption_output_classes'):
+                medium = gr.Label(elem_id="caption_label_medium", label="Medium", num_top_classes=5, visible=False)
+                artist = gr.Label(elem_id="caption_label_artist", label="Artist", num_top_classes=5, visible=False)
+                movement = gr.Label(elem_id="caption_label_movement", label="Movement", num_top_classes=5, visible=False)
+                trending = gr.Label(elem_id="caption_label_trending", label="Trending", num_top_classes=5, visible=False)
+                flavor = gr.Label(elem_id="caption_label_flavor", label="Flavor", num_top_classes=5, visible=False)
+                clip_labels_text = gr.Textbox(elem_id="caption_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False)
+            with gr.Row(elem_id='copy_buttons_caption'):
+                copy_caption_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"])
 
-    btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
+    btn_clip_caption_img.click(openclip.caption_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
     btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor, clip_labels_text]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
-    btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
+    btn_clip_caption_batch.click(fn=openclip.caption_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
     btn_vlm_caption.click(fn=vlm_caption_wrapper, inputs=[vlm_question, vlm_system, vlm_prompt, image, vlm_model, vlm_prefill, vlm_thinking_mode], outputs=[prompt, output_image])
     btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_system, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive, vlm_prefill, vlm_thinking_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
     btn_wd_tag.click(fn=tagger_tag_wrapper, inputs=[image, wd_model, wd_general_threshold, wd_character_threshold, wd_include_rating, wd_exclude_tags, wd_max_tags, wd_sort_alpha, wd_use_spaces, wd_escape], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
@@ -325,10 +325,10 @@ def create_ui():
     vlm_load_btn.click(fn=vqa.load_model, inputs=[vlm_model], outputs=[])
     vlm_unload_btn.click(fn=vqa.unload_model, inputs=[], outputs=[])
     def tagger_load_wrapper(model_name):
-        from modules.interrogate import tagger
+        from modules.caption import tagger
         return tagger.load_model(model_name)
     def tagger_unload_wrapper():
-        from modules.interrogate import tagger
+        from modules.caption import tagger
         return tagger.unload_model()
     wd_load_btn.click(fn=tagger_load_wrapper, inputs=[wd_model], outputs=[])
     wd_unload_btn.click(fn=tagger_unload_wrapper, inputs=[], outputs=[])
@@ -363,6 +363,6 @@ def create_ui():
     # Save default caption type to shared.opts when UI control changes
     default_caption_type.change(fn=update_default_caption_type, inputs=[default_caption_type], outputs=[], show_progress=False)
 
-    for tabname, button in copy_interrogate_buttons.items():
+    for tabname, button in copy_caption_buttons.items():
         generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,))
     generation_parameters_copypaste.add_paste_fields("caption", image, None)
diff --git a/modules/ui_common.py b/modules/ui_common.py
index 5a6299afe..89dd2038a 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -273,7 +273,7 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None, transfe
                                         elem_classes=["gallery_main"],
                                        )
             if prompt is not None:
-                ui_sections.create_interrogate_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output')
+                ui_sections.create_caption_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output')
             button_image_fit = gr.Button(ui_symbols.resize, elem_id=f"{tabname}_image_fit", elem_classes=['image-fit'])
             button_image_fit.click(fn=None, _js="cycleImageFit", inputs=[], outputs=[])
 
diff --git a/modules/ui_control.py b/modules/ui_control.py
index dc31db6c2..1049b5f54 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -226,7 +226,7 @@ def create_ui(_blocks: gr.Blocks=None):
                             else:
                                 input_image = gr.HTML(value='<h1 style="text-align:center;color:var(--color-error);margin:1em;">Kanvas not initialized</h1>', elem_id='kanvas-container')
                             input_changed = gr.Button('Kanvas change', elem_id='kanvas-change-button', visible=False)
-                            btn_interrogate = ui_sections.create_interrogate_button('control', what='input')
+                            btn_caption = ui_sections.create_caption_button('control', what='input')
                         with gr.Tab('Video', id='in-video') as tab_video:
                             input_video = gr.Video(label="Input", show_label=False, interactive=True, height=gr_height, elem_classes=['control-image'])
                         with gr.Tab('Batch', id='in-batch') as tab_batch:
@@ -303,8 +303,8 @@ def create_ui(_blocks: gr.Blocks=None):
             )
 
             input_changed.click(**select_dict)
-            btn_interrogate.click(**select_dict) # need to fetch input first
-            btn_interrogate.click(fn=helpers.interrogate, inputs=[], outputs=[prompt])
+            btn_caption.click(**select_dict) # need to fetch input first
+            btn_caption.click(fn=helpers.caption, inputs=[], outputs=[prompt])
 
             prompt.submit(**select_dict)
             negative.submit(**select_dict)
diff --git a/modules/ui_control_helpers.py b/modules/ui_control_helpers.py
index f1545dd07..cbe3c4df8 100644
--- a/modules/ui_control_helpers.py
+++ b/modules/ui_control_helpers.py
@@ -48,16 +48,16 @@ def initialize():
     scripts_manager.scripts_control.initialize_scripts(is_img2img=False, is_control=True)
 
 
-def interrogate():
+def caption():
     prompt = None
     if input_source is None or len(input_source) == 0:
-        shared.log.warning('Interrogate: no input source')
+        shared.log.warning('Caption: no input source')
         return prompt
     try:
-        from modules.interrogate.interrogate import interrogate as interrogate_fn
-        prompt = interrogate_fn(input_source[0])
+        from modules.caption.caption import caption as caption_fn
+        prompt = caption_fn(input_source[0])
     except Exception as e:
-        shared.log.error(f'Interrogate: {e}')
+        shared.log.error(f'Caption: {e}')
     return prompt
 
 
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index d6e1591a2..5e651de91 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -3,21 +3,21 @@ from modules import timer, shared, call_queue, generation_parameters_copypaste,
 from modules import ui_common, ui_sections, ui_guidance
 
 
-def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles):
+def process_caption(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles):
     import os
     from PIL import Image
-    from modules.interrogate.interrogate import interrogate
+    from modules.caption.caption import caption
     mode = int(mode)
     if mode in {0, 1, 3, 4}:
-        return [interrogate(ii_singles[mode]), None]
+        return [caption(ii_singles[mode]), None]
     if mode == 2:
-        return [interrogate(ii_singles[mode]["image"]), None]
+        return [caption(ii_singles[mode]["image"]), None]
     if mode == 5:
         if len(ii_input_files) > 0:
             images = [f.name for f in ii_input_files]
         else:
             if not os.path.isdir(ii_input_dir):
-                shared.log.error(f"Interrogate: Input directory not found: {ii_input_dir}")
+                shared.log.error(f"Caption: Input directory not found: {ii_input_dir}")
                 return [gr.update(), None]
             images = os.listdir(ii_input_dir)
         if ii_output_dir != "":
@@ -28,7 +28,7 @@ def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_s
             img = Image.open(image)
             filename = os.path.basename(image)
             left, _ = os.path.splitext(filename)
-            print(interrogate(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with
+            print(caption(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with
     return [gr.update(), None]
 
 
@@ -70,7 +70,7 @@ def create_ui():
                     state = gr.Textbox(value='', visible=False)
                     with gr.TabItem('Image', id='img2img_image', elem_id="img2img_image_tab") as tab_img2img:
                         img_init = gr.Image(label="", elem_id="img2img_image", show_label=False, interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
-                        interrogate_btn = ui_sections.create_interrogate_button(tab='img2img', what='input')
+                        caption_btn = ui_sections.create_caption_button(tab='img2img', what='input')
                         add_copy_image_controls('img2img', img_init)
 
                     with gr.TabItem('Inpaint', id='img2img_inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
@@ -215,7 +215,7 @@ def create_ui():
             img2img_reprocess[2].click(**img2img_dict) # hires-refine
             img2img_reprocess[3].click(**img2img_dict) # face-restore
 
-            interrogate_args = dict(
+            caption_args = dict(
                 _js="get_img2img_tab_index",
                 inputs=[
                     dummy_component,
@@ -227,7 +227,7 @@ def create_ui():
                 ],
                 outputs=[img2img_prompt, dummy_component],
             )
-            interrogate_btn.click(fn=lambda *args: process_interrogate(*args), **interrogate_args)
+            caption_btn.click(fn=lambda *args: process_caption(*args), **caption_args)
 
             img2img_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_prompt], outputs=[img2img_token_counter], show_progress = 'hidden')
             img2img_negative_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_negative_prompt], outputs=[img2img_negative_token_counter], show_progress = 'hidden')
diff --git a/modules/ui_sections.py b/modules/ui_sections.py
index 2c4e11b78..4930446f2 100644
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@@ -1,7 +1,7 @@
 import gradio as gr
 from modules import shared, modelloader, ui_symbols, ui_common, sd_samplers
 from modules.ui_components import ToolButton
-from modules.interrogate import interrogate
+from modules.caption import caption
 
 
 def create_toprow(is_img2img: bool = False, id_part: str = None, generate_visible: bool = True, negative_visible: bool = True, reprocess_visible: bool = True):
@@ -91,11 +91,11 @@ def create_resolution_inputs(tab, default_width=1024, default_height=1024):
     return width, height
 
 
-def create_interrogate_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''):
-    button_interrogate = gr.Button(ui_symbols.interrogate, elem_id=f"{tab}_interrogate_{what}", elem_classes=['interrogate'])
+def create_caption_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''):
+    button_caption = gr.Button(ui_symbols.caption, elem_id=f"{tab}_caption_{what}", elem_classes=['caption'])
     if inputs is not None and outputs is not None:
-        button_interrogate.click(fn=interrogate.interrogate, inputs=inputs, outputs=[outputs])
-    return button_interrogate
+        button_caption.click(fn=caption.caption, inputs=inputs, outputs=[outputs])
+    return button_caption
 
 
 def create_batch_inputs(tab, accordion=True):
diff --git a/modules/ui_symbols.py b/modules/ui_symbols.py
index c880a4a8f..e379be27a 100644
--- a/modules/ui_symbols.py
+++ b/modules/ui_symbols.py
@@ -33,7 +33,7 @@ search = '🔍'
 preview = '🖼️'
 image = '🖌️'
 resize = '⁜'
-interrogate = '\uf46b' # Telescope icon in Noto Sans. Previously '♻'
+caption = '\uf46b' # Telescope icon in Noto Sans. Previously '♻'
 bullet = '⃝'
 vision = '\uf06e'  # Font Awesome eye icon (more minimalistic)
 reasoning = '\uf0eb'  # Font Awesome lightbulb icon (represents thinking/reasoning)
diff --git a/modules/ui_video_vlm.py b/modules/ui_video_vlm.py
index 96cf8933b..b39ba4fe3 100644
--- a/modules/ui_video_vlm.py
+++ b/modules/ui_video_vlm.py
@@ -22,7 +22,7 @@ system_prompts = {
 
 
 def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', system_prompt:str='', nsfw:bool=True):
-    from modules.interrogate import vqa
+    from modules.caption import vqa
     if not enable:
         return prompt
     if model is None or len(model) < 4:
@@ -46,7 +46,7 @@ def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', syste
         system_prompt += system_prompts['nsfw_ok'] if nsfw else system_prompts['nsfw_no']
         system_prompt += f" {system_prompts['suffix']} {system_prompts['example']}"
     shared.log.debug(f'Video prompt enhance: model="{model}" image={image} nsfw={nsfw} prompt="{prompt}"')
-    answer = vqa.interrogate(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False)
+    answer = vqa.caption(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False)
     shared.log.debug(f'Video prompt enhance: answer="{answer}"')
     return answer