diff --git a/modules/shared.py b/modules/shared.py
index d549b9a7a..7e0c4a4a9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -22,8 +22,8 @@ from modules.memstats import memory_stats, ram_stats # pylint: disable=unused-im
log.debug('Initializing: pipelines')
from modules import shared_items
-from modules.interrogate.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models
-from modules.interrogate.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default
+from modules.caption.openclip import caption_models, caption_types, get_clip_models, refresh_clip_models
+from modules.caption.vqa import vlm_models, vlm_prompts, vlm_system, vlm_default
if TYPE_CHECKING:
@@ -207,7 +207,7 @@ options_templates.update(options_section(('offload', "Model Offloading"), {
"offload_sep": OptionInfo("
Model Offloading
", "", gr.HTML),
"diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'group', 'model', 'sequential']}),
"diffusers_offload_nonblocking": OptionInfo(False, "Non-blocking move operations"),
- "interrogate_offload": OptionInfo(True, "Offload caption models"),
+ "caption_offload": OptionInfo(True, "Offload caption models"),
"offload_balanced_sep": OptionInfo("Balanced Offload
", "", gr.HTML),
"diffusers_offload_pre": OptionInfo(True, "Offload during pre-forward"),
"diffusers_offload_streams": OptionInfo(False, "Offload using streams"),
@@ -742,31 +742,31 @@ options_templates.update(options_section(('hidden_options', "Hidden options"), {
"sd_checkpoint_hash": OptionInfo("", "SHA256 hash of the current checkpoint", gr.Textbox, {"visible": False}),
"tooltips": OptionInfo("UI Tooltips", "UI tooltips", gr.Radio, {"choices": ["None", "Browser default", "UI tooltips"], "visible": False}),
- # Caption/Interrogate settings (controlled via Caption Tab UI)
- "interrogate_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}),
+ # Caption settings (controlled via Caption Tab UI)
+ "caption_default_type": OptionInfo("VLM", "Default caption type", gr.Radio, {"choices": ["VLM", "OpenCLiP", "Tagger"], "visible": False}),
"tagger_show_scores": OptionInfo(False, "Tagger: show confidence scores in results", gr.Checkbox, {"visible": False}),
- "interrogate_clip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models),
- "interrogate_clip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}),
- "interrogate_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}),
- "interrogate_clip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
- "interrogate_clip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}),
- "interrogate_clip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
- "interrogate_clip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
- "interrogate_clip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
- "interrogate_clip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
- "interrogate_clip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
- "interrogate_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}),
- "interrogate_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}),
- "interrogate_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}),
- "interrogate_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
- "interrogate_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}),
- "interrogate_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}),
- "interrogate_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
- "interrogate_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}),
- "interrogate_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
- "interrogate_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}),
- "interrogate_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}),
- "interrogate_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}),
+ "caption_openclip_model": OptionInfo("ViT-L-14/openai", "OpenCLiP: default model", gr.Dropdown, lambda: {"choices": get_clip_models(), "visible": False}, refresh=refresh_clip_models),
+ "caption_openclip_mode": OptionInfo(caption_types[0], "OpenCLiP: default mode", gr.Dropdown, {"choices": caption_types, "visible": False}),
+ "caption_openclip_blip_model": OptionInfo(list(caption_models)[0], "OpenCLiP: default captioner", gr.Dropdown, {"choices": list(caption_models), "visible": False}),
+ "caption_openclip_num_beams": OptionInfo(1, "OpenCLiP: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
+ "caption_openclip_min_length": OptionInfo(32, "OpenCLiP: min length", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1, "visible": False}),
+ "caption_openclip_max_length": OptionInfo(74, "OpenCLiP: max length", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
+ "caption_openclip_min_flavors": OptionInfo(2, "OpenCLiP: min flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
+ "caption_openclip_max_flavors": OptionInfo(16, "OpenCLiP: max flavors", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1, "visible": False}),
+ "caption_openclip_flavor_count": OptionInfo(1024, "OpenCLiP: intermediate flavors", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
+ "caption_openclip_chunk_size": OptionInfo(1024, "OpenCLiP: chunk size", gr.Slider, {"minimum": 256, "maximum": 4096, "step": 64, "visible": False}),
+ "caption_vlm_model": OptionInfo(vlm_default, "VLM: default model", gr.Dropdown, {"choices": list(vlm_models), "visible": False}),
+ "caption_vlm_prompt": OptionInfo(vlm_prompts[2], "VLM: default prompt", DropdownEditable, {"choices": vlm_prompts, "visible": False}),
+ "caption_vlm_system": OptionInfo(vlm_system, "VLM: system prompt", gr.Textbox, {"visible": False}),
+ "caption_vlm_num_beams": OptionInfo(1, "VLM: num beams", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1, "visible": False}),
+ "caption_vlm_max_length": OptionInfo(512, "VLM: max length", gr.Slider, {"minimum": 1, "maximum": 4096, "step": 1, "visible": False}),
+ "caption_vlm_do_sample": OptionInfo(True, "VLM: use sample method", gr.Checkbox, {"visible": False}),
+ "caption_vlm_temperature": OptionInfo(0.8, "VLM: temperature", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
+ "caption_vlm_top_k": OptionInfo(0, "VLM: top-k", gr.Slider, {"minimum": 0, "maximum": 99, "step": 1, "visible": False}),
+ "caption_vlm_top_p": OptionInfo(0, "VLM: top-p", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.01, "visible": False}),
+ "caption_vlm_keep_prefill": OptionInfo(False, "VLM: keep prefill text in output", gr.Checkbox, {"visible": False}),
+ "caption_vlm_keep_thinking": OptionInfo(False, "VLM: keep reasoning trace in output", gr.Checkbox, {"visible": False}),
+ "caption_vlm_thinking_mode": OptionInfo(False, "VLM: enable thinking/reasoning mode", gr.Checkbox, {"visible": False}),
"tagger_threshold": OptionInfo(0.50, "Tagger: general tag threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
"tagger_include_rating": OptionInfo(False, "Tagger: include rating tags", gr.Checkbox, {"visible": False}),
"tagger_max_tags": OptionInfo(74, "Tagger: max tags", gr.Slider, {"minimum": 1, "maximum": 512, "step": 1, "visible": False}),
diff --git a/modules/shared_legacy.py b/modules/shared_legacy.py
index f96b29fa1..af08da28b 100644
--- a/modules/shared_legacy.py
+++ b/modules/shared_legacy.py
@@ -11,7 +11,6 @@ class LegacyOption(OptionInfo):
legacy_options = options_section(('legacy_options', "Legacy options"), {
"ldsr_models_path": LegacyOption(os.path.join(paths.models_path, 'LDSR'), "LDSR Path", gr.Textbox, { "visible": False}),
- "interrogate_clip_skip_categories": LegacyOption(["artists", "movements", "flavors"], "CLiP: skip categories", gr.CheckboxGroup, {"choices": [], "visible":False}),
"lora_legacy": LegacyOption(False, "LoRA load using legacy method", gr.Checkbox, {"visible": False}),
"lora_preferred_name": LegacyOption("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
"img2img_extra_noise": LegacyOption(0.0, "Extra noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": False}),
@@ -43,7 +42,6 @@ legacy_options = options_section(('legacy_options', "Legacy options"), {
"grid_save_to_dirs": LegacyOption(False, "Save grids to a subdirectory", gr.Checkbox, {"visible": False}),
"hypernetwork_enabled": LegacyOption(False, "Enable Hypernetwork support", gr.Checkbox, {"visible": False}),
"img2img_fix_steps": LegacyOption(False, "For image processing do exact number of steps as specified", gr.Checkbox, { "visible": False }),
- "interrogate_clip_dict_limit": LegacyOption(2048, "CLIP: maximum number of lines in text file", gr.Slider, { "visible": False }),
"keyedit_delimiters": LegacyOption(r".,\/!?%^*;:{}=`~()", "Ctrl+up/down word delimiters", gr.Textbox, { "visible": False }),
"keyedit_precision_attention": LegacyOption(0.1, "Ctrl+up/down precision when editing (attention:1.1)", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}),
"keyedit_precision_extra": LegacyOption(0.05, "Ctrl+up/down precision when editing ", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001, "visible": False}),
diff --git a/modules/ui_caption.py b/modules/ui_caption.py
index b2eef1d3b..5867edd79 100644
--- a/modules/ui_caption.py
+++ b/modules/ui_caption.py
@@ -1,14 +1,14 @@
import gradio as gr
from modules import shared, ui_common, generation_parameters_copypaste
-from modules.interrogate import openclip
+from modules.caption import openclip
default_task = "Short Caption"
def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, prefill, thinking_mode):
- """Wrapper for vqa.interrogate that handles annotated image display."""
- from modules.interrogate import vqa
- answer = vqa.interrogate(question, system_prompt, prompt, image, model_name, prefill, thinking_mode)
+ """Wrapper for vqa.caption that handles annotated image display."""
+ from modules.caption import vqa
+ answer = vqa.caption(question, system_prompt, prompt, image, model_name, prefill, thinking_mode)
annotated_image = vqa.get_last_annotated_image()
if annotated_image is not None:
return answer, gr.update(value=annotated_image, visible=True)
@@ -17,35 +17,35 @@ def vlm_caption_wrapper(question, system_prompt, prompt, image, model_name, pref
def update_vlm_prompts_for_model(model_name):
"""Update the task dropdown choices based on selected model."""
- from modules.interrogate import vqa
+ from modules.caption import vqa
prompts = vqa.get_prompts_for_model(model_name)
return gr.update(choices=prompts, value=prompts[0] if prompts else default_task)
def update_vlm_prompt_placeholder(question):
"""Update the prompt field placeholder based on selected task."""
- from modules.interrogate import vqa
+ from modules.caption import vqa
placeholder = vqa.get_prompt_placeholder(question)
return gr.update(placeholder=placeholder)
def update_vlm_params(*args):
vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode = args
- shared.opts.interrogate_vlm_max_length = int(vlm_max_tokens)
- shared.opts.interrogate_vlm_num_beams = int(vlm_num_beams)
- shared.opts.interrogate_vlm_temperature = float(vlm_temperature)
- shared.opts.interrogate_vlm_do_sample = bool(vlm_do_sample)
- shared.opts.interrogate_vlm_top_k = int(vlm_top_k)
- shared.opts.interrogate_vlm_top_p = float(vlm_top_p)
- shared.opts.interrogate_vlm_keep_prefill = bool(vlm_keep_prefill)
- shared.opts.interrogate_vlm_keep_thinking = bool(vlm_keep_thinking)
- shared.opts.interrogate_vlm_thinking_mode = bool(vlm_thinking_mode)
+ shared.opts.caption_vlm_max_length = int(vlm_max_tokens)
+ shared.opts.caption_vlm_num_beams = int(vlm_num_beams)
+ shared.opts.caption_vlm_temperature = float(vlm_temperature)
+ shared.opts.caption_vlm_do_sample = bool(vlm_do_sample)
+ shared.opts.caption_vlm_top_k = int(vlm_top_k)
+ shared.opts.caption_vlm_top_p = float(vlm_top_p)
+ shared.opts.caption_vlm_keep_prefill = bool(vlm_keep_prefill)
+ shared.opts.caption_vlm_keep_thinking = bool(vlm_keep_thinking)
+ shared.opts.caption_vlm_thinking_mode = bool(vlm_thinking_mode)
shared.opts.save()
def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets):
"""Wrapper for tagger.tag that maps UI inputs to function parameters."""
- from modules.interrogate import tagger
+ from modules.caption import tagger
return tagger.tag(
image=image,
model_name=model_name,
@@ -62,7 +62,7 @@ def tagger_tag_wrapper(image, model_name, general_threshold, character_threshold
def tagger_batch_wrapper(model_name, batch_files, batch_folder, batch_str, save_output, save_append, recursive, general_threshold, character_threshold, include_rating, exclude_tags, max_tags, sort_alpha, use_spaces, escape_brackets):
"""Wrapper for tagger.batch that maps UI inputs to function parameters."""
- from modules.interrogate import tagger
+ from modules.caption import tagger
return tagger.batch(
model_name=model_name,
batch_files=batch_files,
@@ -88,7 +88,7 @@ def update_tagger_ui(model_name):
When DeepBooru is selected, character_threshold is disabled since DeepBooru
doesn't support separate character threshold.
"""
- from modules.interrogate import tagger
+ from modules.caption import tagger
is_db = tagger.is_deepbooru(model_name)
return [
gr.update(interactive=not is_db), # character_threshold
@@ -113,48 +113,48 @@ def update_tagger_params(model_name, general_threshold, character_threshold, inc
def update_clip_params(*args):
clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams = args
- shared.opts.interrogate_clip_min_length = int(clip_min_length)
- shared.opts.interrogate_clip_max_length = int(clip_max_length)
- shared.opts.interrogate_clip_min_flavors = int(clip_min_flavors)
- shared.opts.interrogate_clip_max_flavors = int(clip_max_flavors)
- shared.opts.interrogate_clip_num_beams = int(clip_num_beams)
- shared.opts.interrogate_clip_flavor_count = int(clip_flavor_count)
- shared.opts.interrogate_clip_chunk_size = int(clip_chunk_size)
+ shared.opts.caption_openclip_min_length = int(clip_min_length)
+ shared.opts.caption_openclip_max_length = int(clip_max_length)
+ shared.opts.caption_openclip_min_flavors = int(clip_min_flavors)
+ shared.opts.caption_openclip_max_flavors = int(clip_max_flavors)
+ shared.opts.caption_openclip_num_beams = int(clip_num_beams)
+ shared.opts.caption_openclip_flavor_count = int(clip_flavor_count)
+ shared.opts.caption_openclip_chunk_size = int(clip_chunk_size)
shared.opts.save()
- openclip.update_interrogate_params()
+ openclip.update_caption_params()
def update_clip_model_params(clip_model, blip_model, clip_mode):
"""Save CLiP model settings to shared.opts when UI controls change."""
- shared.opts.interrogate_clip_model = str(clip_model)
- shared.opts.interrogate_blip_model = str(blip_model)
- shared.opts.interrogate_clip_mode = str(clip_mode)
+ shared.opts.caption_openclip_model = str(clip_model)
+ shared.opts.caption_openclip_blip_model = str(blip_model)
+ shared.opts.caption_openclip_mode = str(clip_mode)
shared.opts.save()
def update_vlm_model_params(vlm_model, vlm_system):
"""Save VLM model settings to shared.opts when UI controls change."""
- shared.opts.interrogate_vlm_model = str(vlm_model)
- shared.opts.interrogate_vlm_system = str(vlm_system)
+ shared.opts.caption_vlm_model = str(vlm_model)
+ shared.opts.caption_vlm_system = str(vlm_system)
shared.opts.save()
def update_default_caption_type(caption_type):
"""Save the default caption type to shared.opts."""
- shared.opts.interrogate_default_type = str(caption_type)
+ shared.opts.caption_default_type = str(caption_type)
shared.opts.save()
def create_ui():
shared.log.debug('UI initialize: tab=caption')
with gr.Row(equal_height=False, variant='compact', elem_classes="caption", elem_id="caption_tab"):
- with gr.Column(variant='compact', elem_id='interrogate_input'):
+ with gr.Column(variant='compact', elem_id='caption_input'):
with gr.Row():
- image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='interrogate_image')
+ image = gr.Image(type='pil', label="Image", height=512, visible=True, image_mode='RGB', elem_id='caption_image')
with gr.Tabs(elem_id="mode_caption"):
with gr.Tab("VLM Caption", elem_id="tab_vlm_caption"):
- from modules.interrogate import vqa
- current_vlm_model = shared.opts.interrogate_vlm_model or vqa.vlm_default
+ from modules.caption import vqa
+ current_vlm_model = shared.opts.caption_vlm_model or vqa.vlm_default
initial_prompts = vqa.get_prompts_for_model(current_vlm_model)
with gr.Row():
vlm_system = gr.Textbox(label="System Prompt", value=vqa.vlm_system, lines=1, elem_id='vlm_system')
@@ -162,25 +162,25 @@ def create_ui():
vlm_question = gr.Dropdown(label="Task", allow_custom_value=False, choices=initial_prompts, value=default_task, elem_id='vlm_question')
with gr.Row():
vlm_prompt = gr.Textbox(label="Prompt", placeholder=vqa.get_prompt_placeholder(initial_prompts[0]), lines=2, elem_id='vlm_prompt')
- with gr.Row(elem_id='interrogate_buttons_query'):
+ with gr.Row(elem_id='caption_buttons_query'):
vlm_model = gr.Dropdown(list(vqa.vlm_models), value=current_vlm_model, label='VLM Model', elem_id='vlm_model')
with gr.Row():
vlm_load_btn = gr.Button(value='Load', elem_id='vlm_load', variant='secondary')
vlm_unload_btn = gr.Button(value='Unload', elem_id='vlm_unload', variant='secondary')
with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True):
with gr.Row():
- vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.interrogate_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens')
- vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.interrogate_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams')
- vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.interrogate_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature')
+ vlm_max_tokens = gr.Slider(label='VLM Max Tokens', value=shared.opts.caption_vlm_max_length, minimum=16, maximum=4096, step=1, elem_id='vlm_max_tokens')
+ vlm_num_beams = gr.Slider(label='VLM Num Beams', value=shared.opts.caption_vlm_num_beams, minimum=1, maximum=16, step=1, elem_id='vlm_num_beams')
+ vlm_temperature = gr.Slider(label='VLM Temperature', value=shared.opts.caption_vlm_temperature, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_temperature')
with gr.Row():
- vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.interrogate_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k')
- vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.interrogate_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p')
+ vlm_top_k = gr.Slider(label='Top-K', value=shared.opts.caption_vlm_top_k, minimum=0, maximum=99, step=1, elem_id='vlm_top_k')
+ vlm_top_p = gr.Slider(label='Top-P', value=shared.opts.caption_vlm_top_p, minimum=0.0, maximum=1.0, step=0.01, elem_id='vlm_top_p')
with gr.Row():
- vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.interrogate_vlm_do_sample, elem_id='vlm_do_sample')
- vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.interrogate_vlm_thinking_mode, elem_id='vlm_thinking_mode')
+ vlm_do_sample = gr.Checkbox(label='Use Samplers', value=shared.opts.caption_vlm_do_sample, elem_id='vlm_do_sample')
+ vlm_thinking_mode = gr.Checkbox(label='Thinking Mode', value=shared.opts.caption_vlm_thinking_mode, elem_id='vlm_thinking_mode')
with gr.Row():
- vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.interrogate_vlm_keep_thinking, elem_id='vlm_keep_thinking')
- vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.interrogate_vlm_keep_prefill, elem_id='vlm_keep_prefill')
+ vlm_keep_thinking = gr.Checkbox(label='Keep Thinking Trace', value=shared.opts.caption_vlm_keep_thinking, elem_id='vlm_keep_thinking')
+ vlm_keep_prefill = gr.Checkbox(label='Keep Prefill', value=shared.opts.caption_vlm_keep_prefill, elem_id='vlm_keep_prefill')
with gr.Row():
vlm_prefill = gr.Textbox(label='Prefill Text', value='', lines=1, elem_id='vlm_prefill', placeholder='Optional prefill text for model to continue from')
vlm_max_tokens.change(fn=update_vlm_params, inputs=[vlm_max_tokens, vlm_num_beams, vlm_temperature, vlm_do_sample, vlm_top_k, vlm_top_p, vlm_keep_prefill, vlm_keep_thinking, vlm_thinking_mode], outputs=[])
@@ -203,27 +203,27 @@ def create_ui():
vlm_save_output = gr.Checkbox(label='Save Caption Files', value=True, elem_id="vlm_save_output")
vlm_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="vlm_save_append")
vlm_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="vlm_folder_recursive")
- with gr.Row(elem_id='interrogate_buttons_batch'):
+ with gr.Row(elem_id='caption_buttons_batch'):
btn_vlm_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_vlm_caption_batch")
with gr.Row():
btn_vlm_caption = gr.Button("Caption", variant='primary', elem_id="btn_vlm_caption")
- with gr.Tab("OpenCLiP", elem_id='tab_clip_interrogate'):
+ with gr.Tab("OpenCLiP", elem_id='tab_openclip'):
with gr.Row():
- clip_model = gr.Dropdown([], value=shared.opts.interrogate_clip_model, label='CLiP Model', elem_id='clip_clip_model')
+ clip_model = gr.Dropdown([], value=shared.opts.caption_openclip_model, label='CLiP Model', elem_id='clip_clip_model')
ui_common.create_refresh_button(clip_model, openclip.refresh_clip_models, lambda: {"choices": openclip.refresh_clip_models()}, 'clip_models_refresh')
- blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.interrogate_blip_model, label='Caption Model', elem_id='btN_clip_blip_model')
+ blip_model = gr.Dropdown(list(openclip.caption_models), value=shared.opts.caption_openclip_blip_model, label='Caption Model', elem_id='btN_clip_blip_model')
clip_mode = gr.Dropdown(openclip.caption_types, label='Mode', value='fast', elem_id='clip_clip_mode')
with gr.Accordion(label='Caption: Advanced Options', open=False, visible=True):
with gr.Row():
- clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.interrogate_clip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length')
- clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.interrogate_clip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length')
- clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.interrogate_clip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size')
+ clip_min_length = gr.Slider(label='clip: min length', value=shared.opts.caption_openclip_min_length, minimum=8, maximum=75, step=1, elem_id='clip_caption_min_length')
+ clip_max_length = gr.Slider(label='clip: max length', value=shared.opts.caption_openclip_max_length, minimum=16, maximum=1024, step=1, elem_id='clip_caption_max_length')
+ clip_chunk_size = gr.Slider(label='clip: chunk size', value=shared.opts.caption_openclip_chunk_size, minimum=256, maximum=4096, step=8, elem_id='clip_chunk_size')
with gr.Row():
- clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.interrogate_clip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors')
- clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.interrogate_clip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors')
- clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.interrogate_clip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count')
+ clip_min_flavors = gr.Slider(label='clip: min flavors', value=shared.opts.caption_openclip_min_flavors, minimum=1, maximum=16, step=1, elem_id='clip_min_flavors')
+ clip_max_flavors = gr.Slider(label='clip: max flavors', value=shared.opts.caption_openclip_max_flavors, minimum=1, maximum=64, step=1, elem_id='clip_max_flavors')
+ clip_flavor_count = gr.Slider(label='clip: intermediates', value=shared.opts.caption_openclip_flavor_count, minimum=256, maximum=4096, step=8, elem_id='clip_flavor_intermediate_count')
with gr.Row():
- clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.interrogate_clip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams')
+ clip_num_beams = gr.Slider(label='clip: num beams', value=shared.opts.caption_openclip_num_beams, minimum=1, maximum=16, step=1, elem_id='clip_num_beams')
clip_min_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
clip_max_length.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
clip_chunk_size.change(fn=update_clip_params, inputs=[clip_min_length, clip_max_length, clip_chunk_size, clip_min_flavors, clip_max_flavors, clip_flavor_count, clip_num_beams], outputs=[])
@@ -243,12 +243,12 @@ def create_ui():
clip_save_append = gr.Checkbox(label='Append Caption Files', value=False, elem_id="clip_save_append")
clip_folder_recursive = gr.Checkbox(label='Recursive', value=False, elem_id="clip_folder_recursive")
with gr.Row():
- btn_clip_interrogate_batch = gr.Button("Batch Interrogate", variant='primary', elem_id="btn_clip_interrogate_batch")
+ btn_clip_caption_batch = gr.Button("Batch Caption", variant='primary', elem_id="btn_clip_caption_batch")
with gr.Row():
- btn_clip_interrogate_img = gr.Button("Interrogate", variant='primary', elem_id="btn_clip_interrogate_img")
+ btn_clip_caption_img = gr.Button("Caption", variant='primary', elem_id="btn_clip_caption_img")
btn_clip_analyze_img = gr.Button("Analyze", variant='primary', elem_id="btn_clip_analyze_img")
with gr.Tab("Tagger", elem_id='tab_tagger'):
- from modules.interrogate import tagger
+ from modules.caption import tagger
with gr.Row():
wd_model = gr.Dropdown(tagger.get_models(), value=shared.opts.waifudiffusion_model, label='Tagger Model', elem_id='wd_model')
ui_common.create_refresh_button(wd_model, tagger.refresh_models, lambda: {"choices": tagger.get_models()}, 'wd_models_refresh')
@@ -286,32 +286,32 @@ def create_ui():
btn_wd_tag_batch = gr.Button("Batch Tag", variant='primary', elem_id="btn_wd_tag_batch")
with gr.Row():
btn_wd_tag = gr.Button("Tag", variant='primary', elem_id="btn_wd_tag")
- with gr.Tab("Interrogate", elem_id='tab_interrogate'):
+ with gr.Tab("Default", elem_id='tab_caption_default'):
with gr.Row():
default_caption_type = gr.Radio(
choices=["VLM", "OpenCLiP", "Tagger"],
- value=shared.opts.interrogate_default_type,
+ value=shared.opts.caption_default_type,
label="Default Caption Type",
elem_id="default_caption_type"
)
- with gr.Column(variant='compact', elem_id='interrogate_output'):
- with gr.Row(elem_id='interrogate_output_prompt'):
+ with gr.Column(variant='compact', elem_id='caption_output'):
+ with gr.Row(elem_id='caption_output_prompt'):
prompt = gr.Textbox(label="Answer", lines=12, placeholder="ai generated image description")
- with gr.Row(elem_id='interrogate_output_image'):
- output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='interrogate_output_image_display')
- with gr.Row(elem_id='interrogate_output_classes'):
- medium = gr.Label(elem_id="interrogate_label_medium", label="Medium", num_top_classes=5, visible=False)
- artist = gr.Label(elem_id="interrogate_label_artist", label="Artist", num_top_classes=5, visible=False)
- movement = gr.Label(elem_id="interrogate_label_movement", label="Movement", num_top_classes=5, visible=False)
- trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5, visible=False)
- flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5, visible=False)
- clip_labels_text = gr.Textbox(elem_id="interrogate_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False)
- with gr.Row(elem_id='copy_buttons_interrogate'):
- copy_interrogate_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"])
+ with gr.Row(elem_id='caption_output_image'):
+ output_image = gr.Image(type='pil', label="Annotated Image", interactive=False, visible=False, elem_id='caption_output_image_display')
+ with gr.Row(elem_id='caption_output_classes'):
+ medium = gr.Label(elem_id="caption_label_medium", label="Medium", num_top_classes=5, visible=False)
+ artist = gr.Label(elem_id="caption_label_artist", label="Artist", num_top_classes=5, visible=False)
+ movement = gr.Label(elem_id="caption_label_movement", label="Movement", num_top_classes=5, visible=False)
+ trending = gr.Label(elem_id="caption_label_trending", label="Trending", num_top_classes=5, visible=False)
+ flavor = gr.Label(elem_id="caption_label_flavor", label="Flavor", num_top_classes=5, visible=False)
+ clip_labels_text = gr.Textbox(elem_id="caption_clip_labels_text", label="CLIP Analysis", lines=15, interactive=False, visible=False, show_label=False)
+ with gr.Row(elem_id='copy_buttons_caption'):
+ copy_caption_buttons = generation_parameters_copypaste.create_buttons(["txt2img", "img2img", "control", "extras"])
- btn_clip_interrogate_img.click(openclip.interrogate_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
+ btn_clip_caption_img.click(openclip.caption_image, inputs=[image, clip_model, blip_model, clip_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
btn_clip_analyze_img.click(openclip.analyze_image, inputs=[image, clip_model, blip_model], outputs=[medium, artist, movement, trending, flavor, clip_labels_text]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
- btn_clip_interrogate_batch.click(fn=openclip.interrogate_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
+ btn_clip_caption_batch.click(fn=openclip.caption_batch, inputs=[clip_batch_files, clip_batch_folder, clip_batch_str, clip_model, blip_model, clip_mode, clip_save_output, clip_save_append, clip_folder_recursive], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
btn_vlm_caption.click(fn=vlm_caption_wrapper, inputs=[vlm_question, vlm_system, vlm_prompt, image, vlm_model, vlm_prefill, vlm_thinking_mode], outputs=[prompt, output_image])
btn_vlm_caption_batch.click(fn=vqa.batch, inputs=[vlm_model, vlm_system, vlm_batch_files, vlm_batch_folder, vlm_batch_str, vlm_question, vlm_prompt, vlm_save_output, vlm_save_append, vlm_folder_recursive, vlm_prefill, vlm_thinking_mode], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
btn_wd_tag.click(fn=tagger_tag_wrapper, inputs=[image, wd_model, wd_general_threshold, wd_character_threshold, wd_include_rating, wd_exclude_tags, wd_max_tags, wd_sort_alpha, wd_use_spaces, wd_escape], outputs=[prompt]).then(fn=lambda: gr.update(visible=False), inputs=[], outputs=[output_image])
@@ -325,10 +325,10 @@ def create_ui():
vlm_load_btn.click(fn=vqa.load_model, inputs=[vlm_model], outputs=[])
vlm_unload_btn.click(fn=vqa.unload_model, inputs=[], outputs=[])
def tagger_load_wrapper(model_name):
- from modules.interrogate import tagger
+ from modules.caption import tagger
return tagger.load_model(model_name)
def tagger_unload_wrapper():
- from modules.interrogate import tagger
+ from modules.caption import tagger
return tagger.unload_model()
wd_load_btn.click(fn=tagger_load_wrapper, inputs=[wd_model], outputs=[])
wd_unload_btn.click(fn=tagger_unload_wrapper, inputs=[], outputs=[])
@@ -363,6 +363,6 @@ def create_ui():
# Save default caption type to shared.opts when UI control changes
default_caption_type.change(fn=update_default_caption_type, inputs=[default_caption_type], outputs=[], show_progress=False)
- for tabname, button in copy_interrogate_buttons.items():
+ for tabname, button in copy_caption_buttons.items():
generation_parameters_copypaste.register_paste_params_button(generation_parameters_copypaste.ParamBinding(paste_button=button, tabname=tabname, source_text_component=prompt, source_image_component=image,))
generation_parameters_copypaste.add_paste_fields("caption", image, None)
diff --git a/modules/ui_common.py b/modules/ui_common.py
index 5a6299afe..89dd2038a 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -273,7 +273,7 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None, transfe
elem_classes=["gallery_main"],
)
if prompt is not None:
- ui_sections.create_interrogate_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output')
+ ui_sections.create_caption_button(tab=tabname, inputs=result_gallery, outputs=prompt, what='output')
button_image_fit = gr.Button(ui_symbols.resize, elem_id=f"{tabname}_image_fit", elem_classes=['image-fit'])
button_image_fit.click(fn=None, _js="cycleImageFit", inputs=[], outputs=[])
diff --git a/modules/ui_control.py b/modules/ui_control.py
index dc31db6c2..1049b5f54 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -226,7 +226,7 @@ def create_ui(_blocks: gr.Blocks=None):
else:
input_image = gr.HTML(value='Kanvas not initialized
', elem_id='kanvas-container')
input_changed = gr.Button('Kanvas change', elem_id='kanvas-change-button', visible=False)
- btn_interrogate = ui_sections.create_interrogate_button('control', what='input')
+ btn_caption = ui_sections.create_caption_button('control', what='input')
with gr.Tab('Video', id='in-video') as tab_video:
input_video = gr.Video(label="Input", show_label=False, interactive=True, height=gr_height, elem_classes=['control-image'])
with gr.Tab('Batch', id='in-batch') as tab_batch:
@@ -303,8 +303,8 @@ def create_ui(_blocks: gr.Blocks=None):
)
input_changed.click(**select_dict)
- btn_interrogate.click(**select_dict) # need to fetch input first
- btn_interrogate.click(fn=helpers.interrogate, inputs=[], outputs=[prompt])
+ btn_caption.click(**select_dict) # need to fetch input first
+ btn_caption.click(fn=helpers.caption, inputs=[], outputs=[prompt])
prompt.submit(**select_dict)
negative.submit(**select_dict)
diff --git a/modules/ui_control_helpers.py b/modules/ui_control_helpers.py
index f1545dd07..cbe3c4df8 100644
--- a/modules/ui_control_helpers.py
+++ b/modules/ui_control_helpers.py
@@ -48,16 +48,16 @@ def initialize():
scripts_manager.scripts_control.initialize_scripts(is_img2img=False, is_control=True)
-def interrogate():
+def caption():
prompt = None
if input_source is None or len(input_source) == 0:
- shared.log.warning('Interrogate: no input source')
+ shared.log.warning('Caption: no input source')
return prompt
try:
- from modules.interrogate.interrogate import interrogate as interrogate_fn
- prompt = interrogate_fn(input_source[0])
+ from modules.caption.caption import caption as caption_fn
+ prompt = caption_fn(input_source[0])
except Exception as e:
- shared.log.error(f'Interrogate: {e}')
+ shared.log.error(f'Caption: {e}')
return prompt
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index d6e1591a2..5e651de91 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -3,21 +3,21 @@ from modules import timer, shared, call_queue, generation_parameters_copypaste,
from modules import ui_common, ui_sections, ui_guidance
-def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles):
+def process_caption(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_singles):
import os
from PIL import Image
- from modules.interrogate.interrogate import interrogate
+ from modules.caption.caption import caption
mode = int(mode)
if mode in {0, 1, 3, 4}:
- return [interrogate(ii_singles[mode]), None]
+ return [caption(ii_singles[mode]), None]
if mode == 2:
- return [interrogate(ii_singles[mode]["image"]), None]
+ return [caption(ii_singles[mode]["image"]), None]
if mode == 5:
if len(ii_input_files) > 0:
images = [f.name for f in ii_input_files]
else:
if not os.path.isdir(ii_input_dir):
- shared.log.error(f"Interrogate: Input directory not found: {ii_input_dir}")
+ shared.log.error(f"Caption: Input directory not found: {ii_input_dir}")
return [gr.update(), None]
images = os.listdir(ii_input_dir)
if ii_output_dir != "":
@@ -28,7 +28,7 @@ def process_interrogate(mode, ii_input_files, ii_input_dir, ii_output_dir, *ii_s
img = Image.open(image)
filename = os.path.basename(image)
left, _ = os.path.splitext(filename)
- print(interrogate(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with
+ print(caption(img), file=open(os.path.join(ii_output_dir, f"{left}.txt"), 'a', encoding='utf-8')) # pylint: disable=consider-using-with
return [gr.update(), None]
@@ -70,7 +70,7 @@ def create_ui():
state = gr.Textbox(value='', visible=False)
with gr.TabItem('Image', id='img2img_image', elem_id="img2img_image_tab") as tab_img2img:
img_init = gr.Image(label="", elem_id="img2img_image", show_label=False, interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
- interrogate_btn = ui_sections.create_interrogate_button(tab='img2img', what='input')
+ caption_btn = ui_sections.create_caption_button(tab='img2img', what='input')
add_copy_image_controls('img2img', img_init)
with gr.TabItem('Inpaint', id='img2img_inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
@@ -215,7 +215,7 @@ def create_ui():
img2img_reprocess[2].click(**img2img_dict) # hires-refine
img2img_reprocess[3].click(**img2img_dict) # face-restore
- interrogate_args = dict(
+ caption_args = dict(
_js="get_img2img_tab_index",
inputs=[
dummy_component,
@@ -227,7 +227,7 @@ def create_ui():
],
outputs=[img2img_prompt, dummy_component],
)
- interrogate_btn.click(fn=lambda *args: process_interrogate(*args), **interrogate_args)
+ caption_btn.click(fn=lambda *args: process_caption(*args), **caption_args)
img2img_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_prompt], outputs=[img2img_token_counter], show_progress = 'hidden')
img2img_negative_token_button.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[img2img_negative_prompt], outputs=[img2img_negative_token_counter], show_progress = 'hidden')
diff --git a/modules/ui_sections.py b/modules/ui_sections.py
index 2c4e11b78..4930446f2 100644
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@@ -1,7 +1,7 @@
import gradio as gr
from modules import shared, modelloader, ui_symbols, ui_common, sd_samplers
from modules.ui_components import ToolButton
-from modules.interrogate import interrogate
+from modules.caption import caption
def create_toprow(is_img2img: bool = False, id_part: str = None, generate_visible: bool = True, negative_visible: bool = True, reprocess_visible: bool = True):
@@ -91,11 +91,11 @@ def create_resolution_inputs(tab, default_width=1024, default_height=1024):
return width, height
-def create_interrogate_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''):
- button_interrogate = gr.Button(ui_symbols.interrogate, elem_id=f"{tab}_interrogate_{what}", elem_classes=['interrogate'])
+def create_caption_button(tab: str, inputs: list = None, outputs: str = None, what: str = ''):
+ button_caption = gr.Button(ui_symbols.caption, elem_id=f"{tab}_caption_{what}", elem_classes=['caption'])
if inputs is not None and outputs is not None:
- button_interrogate.click(fn=interrogate.interrogate, inputs=inputs, outputs=[outputs])
- return button_interrogate
+ button_caption.click(fn=caption.caption, inputs=inputs, outputs=[outputs])
+ return button_caption
def create_batch_inputs(tab, accordion=True):
diff --git a/modules/ui_symbols.py b/modules/ui_symbols.py
index c880a4a8f..e379be27a 100644
--- a/modules/ui_symbols.py
+++ b/modules/ui_symbols.py
@@ -33,7 +33,7 @@ search = '🔍'
preview = '🖼️'
image = '🖌️'
resize = '⁜'
-interrogate = '\uf46b' # Telescope icon in Noto Sans. Previously '♻'
+caption = '\uf46b' # Telescope icon in Noto Sans. Previously '♻'
bullet = '⃝'
vision = '\uf06e' # Font Awesome eye icon (more minimalistic)
reasoning = '\uf0eb' # Font Awesome lightbulb icon (represents thinking/reasoning)
diff --git a/modules/ui_video_vlm.py b/modules/ui_video_vlm.py
index 96cf8933b..b39ba4fe3 100644
--- a/modules/ui_video_vlm.py
+++ b/modules/ui_video_vlm.py
@@ -22,7 +22,7 @@ system_prompts = {
def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', system_prompt:str='', nsfw:bool=True):
- from modules.interrogate import vqa
+ from modules.caption import vqa
if not enable:
return prompt
if model is None or len(model) < 4:
@@ -46,7 +46,7 @@ def enhance_prompt(enable:bool, model:str=None, image=None, prompt:str='', syste
system_prompt += system_prompts['nsfw_ok'] if nsfw else system_prompts['nsfw_no']
system_prompt += f" {system_prompts['suffix']} {system_prompts['example']}"
shared.log.debug(f'Video prompt enhance: model="{model}" image={image} nsfw={nsfw} prompt="{prompt}"')
- answer = vqa.interrogate(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False)
+ answer = vqa.caption(question='', prompt=prompt, system_prompt=system_prompt, image=image, model_name=model, quiet=False)
shared.log.debug(f'Video prompt enhance: answer="{answer}"')
return answer