From d9eda89facccdb7d3690d697faf8e4ad798e9d26 Mon Sep 17 00:00:00 2001 From: Alexander Sokol Date: Thu, 25 May 2023 20:04:53 +0300 Subject: [PATCH] add hash cache and cache calculation --- .gitignore | 3 +- scripts/mo/dl/download_manager.py | 4 +- scripts/mo/environment.py | 8 --- scripts/mo/ui_debug.py | 103 +++++++++++++++++++++++++++++- scripts/mo/ui_main.py | 5 +- scripts/mo/utils.py | 44 +++++++++++++ 6 files changed, 154 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index af85489..7189c80 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ database.sqlite dev/ tmp/ service-account-file.json -export/ \ No newline at end of file +export/ +hash_cache.json \ No newline at end of file diff --git a/scripts/mo/dl/download_manager.py b/scripts/mo/dl/download_manager.py index c1b48cc..b606071 100644 --- a/scripts/mo/dl/download_manager.py +++ b/scripts/mo/dl/download_manager.py @@ -7,9 +7,9 @@ from urllib.parse import urlparse from scripts.mo.dl.downloader import Downloader from scripts.mo.dl.gdrive_downloader import GDriveDownloader from scripts.mo.dl.http_downloader import HttpDownloader -from scripts.mo.environment import env, logger, calculate_md5, calculate_sha256 +from scripts.mo.environment import env, logger, calculate_md5 from scripts.mo.models import Record -from scripts.mo.utils import resize_preview_image, get_model_filename_without_extension +from scripts.mo.utils import resize_preview_image, get_model_filename_without_extension, calculate_sha256 GENERAL_STATUS_IN_PROGRESS = 'In Progress' GENERAL_STATUS_CANCELLED = 'Cancelled' diff --git a/scripts/mo/environment.py b/scripts/mo/environment.py index 7c9ae16..146fadb 100644 --- a/scripts/mo/environment.py +++ b/scripts/mo/environment.py @@ -134,14 +134,6 @@ def calculate_md5(file_path): return md5.hexdigest() -def calculate_sha256(file_path): - with open(file_path, 'rb') as file: - sha256_hash = hashlib.sha256() - while chunk := file.read(4096): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() - - def find_preview_file(record: Record): preview_file_path = None diff --git a/scripts/mo/ui_debug.py b/scripts/mo/ui_debug.py index 71babb8..56267e1 100644 --- a/scripts/mo/ui_debug.py +++ b/scripts/mo/ui_debug.py @@ -5,7 +5,8 @@ import gradio as gr from scripts.mo.environment import env from scripts.mo.models import ModelType -from scripts.mo.utils import get_model_files_in_dir, find_preview_file, link_preview +from scripts.mo.utils import get_model_files_in_dir, find_preview_file, link_preview, read_hash_cache, \ + calculate_file_temp_hash, write_hash_cache, calculate_sha256 def _ui_state_report(): @@ -59,6 +60,103 @@ def _ui_local_files(): outputs=local_files_json) +def _on_read_hash_click(): + cache = read_hash_cache() + return [ + gr.JSON.update(value=json.dumps(cache)), + gr.Button.update(visible=False) + ] + + +def _on_calculate_hash_click(): + result = [] + + def calc_in_dir(model_type) -> list: + dir_path = env.get_model_path(model_type) + local = [] + files = get_model_files_in_dir(dir_path) + for file in files: + rec = { + 'path': file, + 'temp_hash': calculate_file_temp_hash(file), + 'sha256': calculate_sha256(file) + } + local.append(rec) + return local + + result.extend(calc_in_dir(ModelType.CHECKPOINT)) + result.extend(calc_in_dir(ModelType.VAE)) + result.extend(calc_in_dir(ModelType.LORA)) + result.extend(calc_in_dir(ModelType.HYPER_NETWORK)) + result.extend(calc_in_dir(ModelType.EMBEDDING)) + result.extend(calc_in_dir(ModelType.LYCORIS)) + + return [ + gr.JSON.update(value=json.dumps(result)), + gr.Button.update(visible=True) + ] + + +def _on_compare_hash_click(): + result = [] + + cache = read_hash_cache() + + def find_in_cache(file_path, temp_hash): + for entry in cache: + if entry.get('path') == file_path and entry.get('temp_hash') == temp_hash and \ + entry.get('sha256') is not None: + return entry['sha256'] + + def search_in_dir(model_type) -> list: + dir_path = env.get_model_path(model_type) + local = [] + files = get_model_files_in_dir(dir_path) + for file in files: + temp_hash = calculate_file_temp_hash(file) + + rec = { + 'path': file, + 'temp_hash': temp_hash, + 'sha256': find_in_cache(file, temp_hash) + } + + local.append(rec) + return local + + result.extend(search_in_dir(ModelType.CHECKPOINT)) + result.extend(search_in_dir(ModelType.VAE)) + result.extend(search_in_dir(ModelType.LORA)) + result.extend(search_in_dir(ModelType.HYPER_NETWORK)) + result.extend(search_in_dir(ModelType.EMBEDDING)) + result.extend(search_in_dir(ModelType.LYCORIS)) + + return [ + gr.JSON.update(value=json.dumps(result)), + gr.Button.update(visible=False) + ] + + +def _on_hash_cache_save_click(json_data): + write_hash_cache(json_data) + + +def _ui_hash_cache(): + with gr.Column(): + read_button = gr.Button('Read hash cache') + compare_hash_button = gr.Button('Compare hash with cache') + calculate_button = gr.Button('Calculate hashes') + save_hash_button = gr.Button('Save hash', visible=False) + + hash_cache_json = gr.JSON(label='Local files') + + read_button.click(fn=_on_read_hash_click, outputs=[hash_cache_json, save_hash_button]) + calculate_button.click(fn=_on_calculate_hash_click, outputs=[hash_cache_json, save_hash_button]) + compare_hash_button.click(fn=_on_compare_hash_click, outputs=[hash_cache_json, save_hash_button]) + + save_hash_button.click(fn=_on_hash_cache_save_click, inputs=hash_cache_json) + + def debug_ui_block(): with gr.Column(): with gr.Row(): @@ -74,4 +172,7 @@ def debug_ui_block(): with gr.Tab('Local files'): _ui_local_files() + with gr.Tab('Hash cache'): + _ui_hash_cache() + back_button.click(fn=None, _js='navigateBack') diff --git a/scripts/mo/ui_main.py b/scripts/mo/ui_main.py index 3d691e8..d3bf3c1 100644 --- a/scripts/mo/ui_main.py +++ b/scripts/mo/ui_main.py @@ -98,7 +98,10 @@ def main_ui_block(): import_export_ui_block() with gr.Column(visible=False) as debug_block: - debug_ui_block() + if env.is_debug_mode_enabled(): + debug_ui_block() + else: + gr.Row() _json_nav_box.change(on_json_box_change, inputs=[_json_nav_box, home_refresh_box], diff --git a/scripts/mo/utils.py b/scripts/mo/utils.py index aa23bc7..fbb3761 100644 --- a/scripts/mo/utils.py +++ b/scripts/mo/utils.py @@ -1,3 +1,6 @@ +import datetime +import hashlib +import json import os import re import urllib.parse @@ -6,6 +9,8 @@ from PIL import Image from scripts.mo.environment import env +_HASH_CACHE_FILENAME = 'hash_cache.json' + model_extensions = ['.bin', '.ckpt', '.safetensors', '.pt'] preview_extensions = [".png", ".jpg", ".webp"] @@ -93,3 +98,42 @@ def resize_preview_image(input_file, output_file): canvas.paste(resized_image, (x_position, y_position)) canvas.save(output_file, "JPEG") + + +def calculate_file_temp_hash(file_path): + creation_timestamp = os.path.getctime(file_path) + creation_datetime = datetime.datetime.fromtimestamp(creation_timestamp) + + modification_timestamp = os.path.getmtime(file_path) + modification_datetime = datetime.datetime.fromtimestamp(modification_timestamp) + + input_string = f'{creation_datetime} {modification_datetime}' + + md5_hash = hashlib.md5() + md5_hash.update(input_string.encode('utf-8')) + return md5_hash.hexdigest() + + +def calculate_sha256(file_path): + with open(file_path, 'rb') as file: + sha256_hash = hashlib.sha256() + while chunk := file.read(4096): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() + + +def get_hash_cache_file(): + return os.path.join(env.script_dir, _HASH_CACHE_FILENAME) + + +def read_hash_cache() -> list: + file_path = get_hash_cache_file() + if os.path.isfile(file_path): + with open(file_path) as file: + return json.load(file) + return [] + + +def write_hash_cache(hash_cache: list): + with open(get_hash_cache_file(), 'w') as file: + json.dump(hash_cache, file, indent=4)