import html from ldm.modules.encoders.modules import FrozenCLIPEmbedder from modules import script_callbacks, shared import gradio as gr css = """ @media (prefers-color-scheme: dark) { .tokenizer-token{ cursor: pointer; } .tokenizer-token-0 {background: rgba(255, 0, 0, 0.2);} .tokenizer-token-0:hover {background: rgba(255, 0, 0, 0.4);} .tokenizer-token-1 {background: rgba(0, 255, 0, 0.2);} .tokenizer-token-1:hover {background: rgba(0, 255, 0, 0.4);} .tokenizer-token-2 {background: rgba(0, 0, 255, 0.2);} .tokenizer-token-2:hover {background: rgba(0, 0, 255, 0.4);} .tokenizer-token-3 {background: rgba(255, 156, 0, 0.2);} .tokenizer-token-3:hover {background: rgba(255, 156, 0, 0.4);} } @media (prefers-color-scheme: light) { .tokenizer-token{ cursor: pointer; } .tokenizer-token-0 {background: rgba(255, 0, 0, 0.1);} .tokenizer-token-0:hover {background: rgba(255, 0, 0, 0.2);} .tokenizer-token-1 {background: rgba(0, 255, 0, 0.1);} .tokenizer-token-1:hover {background: rgba(0, 255, 0, 0.2);} .tokenizer-token-2 {background: rgba(0, 0, 255, 0.1);} .tokenizer-token-2:hover {background: rgba(0, 0, 255, 0.2);} .tokenizer-token-3 {background: rgba(255, 156, 0, 0.1);} .tokenizer-token-3:hover {background: rgba(255, 156, 0, 0.2);} } """ def tokenize(text, current_step=1, total_step=1, AND_block=0, simple_input=False, input_is_ids=False): clip: FrozenCLIPEmbedder = shared.sd_model.cond_stage_model.wrapped token_count = None if input_is_ids: tokens = [int(x.strip()) for x in text.split(",")] elif simple_input: tokens = clip.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"] else: from modules import sd_hijack, prompt_parser from functools import reduce _, prompt_flat_list, _ = prompt_parser.get_multicond_prompt_list([text]) prompt_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(prompt_flat_list, int(total_step)) flat_prompts = reduce(lambda list1, list2: list1+list2, prompt_schedules) prompts = [prompt_text for step, prompt_text in flat_prompts] def find_current_prompt_idx(c_step, a_block): _idx = 0 for i, prompts_block in enumerate(prompt_schedules): for step_prompt_chunk in prompts_block: if i == a_block: if c_step <= step_prompt_chunk[0]: return _idx _idx += 1 idx = find_current_prompt_idx(current_step, AND_block) tokens, token_count, max_length = [sd_hijack.model_hijack.tokenize(prompt) for prompt in prompts][idx] vocab = {v: k for k, v in clip.tokenizer.get_vocab().items()} code = '' ids = [] current_ids = [] class_index = 0 def dump(last=False): nonlocal code, ids, current_ids words = [vocab.get(x, "") for x in current_ids] def wordscode(ids, word): nonlocal class_index if ids != [clip.tokenizer.eos_token_id]: res = f"""{html.escape(word)}""" else: res = f"""{html.escape(word)}""" class_index += 1 return res try: word = bytearray([clip.tokenizer.byte_decoder[x] for x in ''.join(words)]).decode("utf-8") except UnicodeDecodeError: if last: word = "❌" * len(current_ids) elif len(current_ids) > 4: id = current_ids[0] ids += [id] local_ids = current_ids[1:] code += wordscode([id], "❌") current_ids = [] for id in local_ids: current_ids.append(id) dump() return else: return word = word.replace("", " ") code += wordscode(current_ids, word) ids += current_ids current_ids = [] for token in tokens: token = int(token) current_ids.append(token) dump() dump(last=True) if token_count is None: token_count = len(ids) ids_html = f"""
Token count: {token_count}/{len(ids)}
{", ".join([str(x) for x in ids])}
Before your text is sent to the neural network, it gets turned into numbers in a process called tokenization. These tokens are how the neural network reads and interprets text. Thanks to our great friends at Shousetsu愛 for inspiration for this feature.
Depending on your setting, text will be first parsed by webui to calculate prompt attention like (text) and [text], and scheduler like [a:b:0.5], and the capital AND like a AND b before tokenization. This extension processes your text like this as well.
To disable this feature, check on "Don't parse webui special grammar".