From ec7da546003864d012d90f972f0ee99c2e148adb Mon Sep 17 00:00:00 2001 From: CodeExplode <79982924+CodeExplode@users.noreply.github.com> Date: Mon, 7 Nov 2022 18:12:36 +1100 Subject: [PATCH] Added weight markers for user-provided embeddings --- javascript/embedding_editor.js | 76 ++++++++++++++++++++++ scripts/embedding_editor.py | 112 ++++++++++++++++++++++++--------- style.css | 7 +++ 3 files changed, 166 insertions(+), 29 deletions(-) create mode 100644 javascript/embedding_editor.js diff --git a/javascript/embedding_editor.js b/javascript/embedding_editor.js new file mode 100644 index 0000000..e754d65 --- /dev/null +++ b/javascript/embedding_editor.js @@ -0,0 +1,76 @@ +function embedding_editor_update_guidance(col_weights) { + let weightsColsVals = {}; + for (let i=0; i<768; i++) + weightsColsVals[i] = []; + + for (let color in col_weights) { + let weights = Object.values(col_weights[color]); + + for (let i=0; i<768; i++) + embedding_editor_sorted_insert(weightsColsVals[i], color, weights[i]); + } + + for (let i=0; i<768; i++) { + let guidanceElem = embedding_editor_get_guidance_bar(i), + variants = weightsColsVals[i]; + + // e.g. linear-gradient(to right, transparent 0%, transparent 31%, red 31%, red 34%, transparent 34%, transparent 100%) + let transparent = '#00000000', + bg = 'linear-gradient(to right', + previous = 0; + + for (let j=0, jLen=variants.length; j weight) + break; + + set.splice(insertAt, 0, { color : color, weight: weight }); +} + +function embedding_editor_get_guidance_bar(weightNum) { + let guidanceClassType = "embedding_editor_guidance_bar", + gradioSlider = document.querySelector("gradio-app").shadowRoot.getElementById("embedding_editor_weight_slider_" + weightNum), + childElems = Array.from(gradioSlider.childNodes), + lastChild = childElems[childElems.length-1]; + + if (!lastChild.classList.contains(guidanceClassType)) { + // currently pointing to the range input. Move it slightly lower to line up with the new div, then insert the new div + lastChild.style.verticalAlign = 'text-bottom'; // could do this with CSS, have the selector for it, and then won't change on first time pressing button + + let newElem = document.createElement("div"); + newElem.style.height='6px'; + newElem.classList.add(guidanceClassType); + gradioSlider.appendChild(newElem); + lastChild = newElem; + } + + return lastChild; // could just cache these +} + +onUiUpdate(function(){ + +}) diff --git a/scripts/embedding_editor.py b/scripts/embedding_editor.py index 33d96b8..af20681 100644 --- a/scripts/embedding_editor.py +++ b/scripts/embedding_editor.py @@ -14,7 +14,7 @@ import gradio.utils import torch # ISSUES -# distribution shouldn't be fetched until the first embedding is opened +# distribution shouldn't be fetched until the first embedding is opened, and can probably be converted into a numpy array # most functions need to verify that an embedding is selected # vector numbers aren't verified (might be better as a slider) # weight slider values are lost when changing vector number @@ -27,20 +27,17 @@ import torch # add the ability to shift all weights towards another embedding with a master slider # add a strength slider (multiply all weights) # print out the closest word(s) in the original embeddings list to the current embedding, with torch.abs(embedding1.vec - embedding2.vec).mean() or maybe sum -# also maybe print a mouseover or have an expandable info box per weight slider for the closest embedding(s) for that weight value -# maybe add per-weight notes, and possibly a way to save them per embedding (and vector), or save them under a class name (e.g. 'animal', 'clothing', 'face') -# add option to vary individual weights one at a time and generate outputs, potentially also varied combinations of weights. Potentially use scoring system to determine size of change (maybe latents or clip interrogator) +# also maybe print a mouseover or have an expandable per weight slider for the closest embedding(s) for that weight value +# maybe allowing per-weight notes, and possibly a way to save them per embedding vector +# add option to vary individual weights one at a time and geneerate outputs, potentially also combinations of weights. Potentially use scoring system to determine size of change (maybe latents or clip interrogator) # add option to 'move' around current embedding position and generate outputs (a 768-dimensional vector spiral)? -# potentially represent all weights 2 or 3 magnitudes larger, so that it's more obvious when a weight is larger than most or very small +embedding_editor_weight_visual_scalar = 1 def determine_embedding_distribution(): cond_model = shared.sd_model.cond_stage_model embedding_layer = cond_model.wrapped.transformer.text_model.embeddings - distribution_floor = torch.zeros(768) - distribution_ceiling = torch.zeros(768) - for i in range(49405): # guessing that's the range of CLIP tokens given that 49406 and 49407 are special tokens presumably appended to the end embedding = embedding_layer.token_embedding.wrapped(torch.LongTensor([i]).to(devices.device)).squeeze(0) if i == 0: @@ -50,17 +47,21 @@ def determine_embedding_distribution(): distribution_floor = torch.minimum(distribution_floor, embedding) distribution_ceiling = torch.maximum(distribution_ceiling, embedding) - return distribution_floor, distribution_ceiling + # a hack but don't know how else to get these values into gradio event functions, short of maybe caching them in an invisible gradio html element + global embedding_editor_distribution_floor, embedding_editor_distribution_ceiling + embedding_editor_distribution_floor = distribution_floor + embedding_editor_distribution_ceiling = distribution_ceiling -def build_slider(index, default, distribution_floor, distribution_ceiling, weight_sliders): - floor = distribution_floor[index].item() - ceil = distribution_ceiling[index].item() - slider = gr.Slider(minimum=floor, maximum=ceil, step=0.00001, label=f"w{index}", value=default, interactive=True) +def build_slider(index, default, weight_sliders): + floor = embedding_editor_distribution_floor[index].item() * embedding_editor_weight_visual_scalar + ceil = embedding_editor_distribution_ceiling[index].item() * embedding_editor_weight_visual_scalar + + slider = gr.Slider(minimum=floor, maximum=ceil, step="any", label=f"w{index}", value=default, interactive=True, elem_id=f'embedding_editor_weight_slider_{index}') weight_sliders.append(slider) def on_ui_tabs(): - distribution_floor, distribution_ceiling = determine_embedding_distribution() + determine_embedding_distribution() weight_sliders = [] with gr.Blocks(analytics_enabled=False) as embedding_editor_interface: @@ -72,16 +73,25 @@ def on_ui_tabs(): vector_num = gr.Number(label='Vector', value=0, step=1, interactive=True) refresh_embeddings_button = gr.Button(value="Refresh Embeddings", variant='secondary') save_embedding_button = gr.Button(value="Save Embedding", variant='primary') + + instructions = gr.HTML(f""" +

Enter words and color hexes to mark weights on the sliders for guidance. Hint: Use the txt2img prompt token counter or webui-tokenizer to see which words are constructed using multiple sub-words, e.g. 'computer' doesn't exist in stable diffusion's CLIP dictionary and instead 'compu' and 'ter' are used (1 word but 2 embedding vectors) +

+ """) + with gr.Row(): + guidance_embeddings = gr.Textbox(value="apple:#FF0000, banana:#FECE26, strawberry:#FF00FF", placeholder="symbol:color-hex, symbol:color-hex, ...", show_label=False, interactive=True) + guidance_update_button = gr.Button(value='\U0001f504', elem_id='embedding_editor_refresh_guidance') + guidance_hidden_cache = gr.HTML(value="", visible=False) with gr.Column(elem_id='embedding_editor_weight_sliders_container'): for i in range(0, 128): with gr.Row(): - build_slider(i*6+0, 0, distribution_floor, distribution_ceiling, weight_sliders) - build_slider(i*6+1, 0, distribution_floor, distribution_ceiling, weight_sliders) - build_slider(i*6+2, 0, distribution_floor, distribution_ceiling, weight_sliders) - build_slider(i*6+3, 0, distribution_floor, distribution_ceiling, weight_sliders) - build_slider(i*6+4, 0, distribution_floor, distribution_ceiling, weight_sliders) - build_slider(i*6+5, 0, distribution_floor, distribution_ceiling, weight_sliders) + build_slider(i*6+0, 0, weight_sliders) + build_slider(i*6+1, 0, weight_sliders) + build_slider(i*6+2, 0, weight_sliders) + build_slider(i*6+3, 0, weight_sliders) + build_slider(i*6+4, 0, weight_sliders) + build_slider(i*6+5, 0, weight_sliders) with gr.Column(scale=1): gallery = gr.Gallery(label='Output', show_label=False, elem_id="embedding_editor_gallery").style(grid=4) @@ -99,7 +109,7 @@ def on_ui_tabs(): preview_args = dict( fn=wrap_gradio_gpu_call(generate_embedding_preview), - _js="submit", + #_js="submit", inputs=[ embedding_name, vector_num, @@ -156,7 +166,22 @@ def on_ui_tabs(): ] + weight_sliders, outputs=[], ) - + + guidance_embeddings.change( + fn=update_guidance_embeddings, + inputs=[guidance_embeddings], + outputs=[guidance_hidden_cache] + ) + + guidance_update_button.click( + fn=None, + _js="embedding_editor_update_guidance", + inputs=[guidance_hidden_cache], + outputs=[] + ) + + guidance_hidden_cache.value = update_guidance_embeddings(guidance_embeddings.value) + return [(embedding_editor_interface, "Embedding Editor", "embedding_editor_interface")] def select_embedding(embedding_name, vector_num): @@ -165,23 +190,23 @@ def select_embedding(embedding_name, vector_num): weights = [] for i in range(0, 768): - weights.append( vec[i].item() ) + weights.append( vec[i].item() * embedding_editor_weight_visual_scalar ) return weights -def update_embedding_weights(embedding_name, vector_num, weights): +def apply_slider_weights(embedding_name, vector_num, weights): embedding = sd_hijack.model_hijack.embedding_db.word_embeddings[embedding_name] vec = embedding.vec[int(vector_num)] old_weights = [] for i in range(0, 768): old_weights.append(vec[i].item()) - vec[i] = weights[i] + vec[i] = weights[i] / embedding_editor_weight_visual_scalar return old_weights def generate_embedding_preview(embedding_name, vector_num, prompt: str, steps: int, cfg_scale: float, seed: int, batch_count: int, *weights): - old_weights = update_embedding_weights(embedding_name, vector_num, weights) + old_weights = apply_slider_weights(embedding_name, vector_num, weights) p = StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, @@ -207,18 +232,47 @@ def generate_embedding_preview(embedding_name, vector_num, prompt: str, steps: i if opts.samples_log_stdout: print(generation_info_js) - update_embedding_weights(embedding_name, vector_num, old_weights) # restore + apply_slider_weights(embedding_name, vector_num, old_weights) # restore return processed.images, generation_info_js, plaintext_to_html(processed.info) def save_embedding_weights(embedding_name, vector_num, *weights): - update_embedding_weights(embedding_name, vector_num, weights) + apply_slider_weights(embedding_name, vector_num, weights) embedding = sd_hijack.model_hijack.embedding_db.word_embeddings[embedding_name] checkpoint = sd_models.select_checkpoint() filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True) +def update_guidance_embeddings(text): + try: + cond_model = shared.sd_model.cond_stage_model + embedding_layer = cond_model.wrapped.transformer.text_model.embeddings + + pairs = [x.strip() for x in text.split(',')] + + col_weights = {} + + for pair in pairs: + word, col = pair.split(":") + + ids = cond_model.tokenizer(word, max_length=77, return_tensors="pt", add_special_tokens=False)["input_ids"] + embedding = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)[0] + weights = [] + + for i in range(0, 768): + weight = embedding[i].item() + floor = embedding_editor_distribution_floor[i].item() + ceiling = embedding_editor_distribution_ceiling[i].item() + + weight = (weight - floor) / (ceiling - floor) # adjust to range for using as a guidance marker along the slider + weights.append(weight) + + col_weights[col] = weights + + return col_weights + except: + return [] -script_callbacks.on_ui_tabs(on_ui_tabs) +script_callbacks.on_ui_tabs(on_ui_tabs) \ No newline at end of file diff --git a/style.css b/style.css index 7506440..dcc01c0 100644 --- a/style.css +++ b/style.css @@ -5,3 +5,10 @@ padding-top: 7px; } +#embedding_editor_refresh_guidance{ + max-width: 2em; +} + +[id^="embedding_editor_weight_slider_"] input[type=range] { + +} \ No newline at end of file