add hash cache and cache calculation

feature/existing-files-listing
Alexander Sokol 2023-05-25 20:04:53 +03:00
parent 77cd631f02
commit d9eda89fac
6 changed files with 154 additions and 13 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ dev/
tmp/
service-account-file.json
export/
hash_cache.json

View File

@ -7,9 +7,9 @@ from urllib.parse import urlparse
from scripts.mo.dl.downloader import Downloader
from scripts.mo.dl.gdrive_downloader import GDriveDownloader
from scripts.mo.dl.http_downloader import HttpDownloader
from scripts.mo.environment import env, logger, calculate_md5, calculate_sha256
from scripts.mo.environment import env, logger, calculate_md5
from scripts.mo.models import Record
from scripts.mo.utils import resize_preview_image, get_model_filename_without_extension
from scripts.mo.utils import resize_preview_image, get_model_filename_without_extension, calculate_sha256
GENERAL_STATUS_IN_PROGRESS = 'In Progress'
GENERAL_STATUS_CANCELLED = 'Cancelled'

View File

@ -134,14 +134,6 @@ def calculate_md5(file_path):
return md5.hexdigest()
def calculate_sha256(file_path):
with open(file_path, 'rb') as file:
sha256_hash = hashlib.sha256()
while chunk := file.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def find_preview_file(record: Record):
preview_file_path = None

View File

@ -5,7 +5,8 @@ import gradio as gr
from scripts.mo.environment import env
from scripts.mo.models import ModelType
from scripts.mo.utils import get_model_files_in_dir, find_preview_file, link_preview
from scripts.mo.utils import get_model_files_in_dir, find_preview_file, link_preview, read_hash_cache, \
calculate_file_temp_hash, write_hash_cache, calculate_sha256
def _ui_state_report():
@ -59,6 +60,103 @@ def _ui_local_files():
outputs=local_files_json)
def _on_read_hash_click():
cache = read_hash_cache()
return [
gr.JSON.update(value=json.dumps(cache)),
gr.Button.update(visible=False)
]
def _on_calculate_hash_click():
result = []
def calc_in_dir(model_type) -> list:
dir_path = env.get_model_path(model_type)
local = []
files = get_model_files_in_dir(dir_path)
for file in files:
rec = {
'path': file,
'temp_hash': calculate_file_temp_hash(file),
'sha256': calculate_sha256(file)
}
local.append(rec)
return local
result.extend(calc_in_dir(ModelType.CHECKPOINT))
result.extend(calc_in_dir(ModelType.VAE))
result.extend(calc_in_dir(ModelType.LORA))
result.extend(calc_in_dir(ModelType.HYPER_NETWORK))
result.extend(calc_in_dir(ModelType.EMBEDDING))
result.extend(calc_in_dir(ModelType.LYCORIS))
return [
gr.JSON.update(value=json.dumps(result)),
gr.Button.update(visible=True)
]
def _on_compare_hash_click():
result = []
cache = read_hash_cache()
def find_in_cache(file_path, temp_hash):
for entry in cache:
if entry.get('path') == file_path and entry.get('temp_hash') == temp_hash and \
entry.get('sha256') is not None:
return entry['sha256']
def search_in_dir(model_type) -> list:
dir_path = env.get_model_path(model_type)
local = []
files = get_model_files_in_dir(dir_path)
for file in files:
temp_hash = calculate_file_temp_hash(file)
rec = {
'path': file,
'temp_hash': temp_hash,
'sha256': find_in_cache(file, temp_hash)
}
local.append(rec)
return local
result.extend(search_in_dir(ModelType.CHECKPOINT))
result.extend(search_in_dir(ModelType.VAE))
result.extend(search_in_dir(ModelType.LORA))
result.extend(search_in_dir(ModelType.HYPER_NETWORK))
result.extend(search_in_dir(ModelType.EMBEDDING))
result.extend(search_in_dir(ModelType.LYCORIS))
return [
gr.JSON.update(value=json.dumps(result)),
gr.Button.update(visible=False)
]
def _on_hash_cache_save_click(json_data):
write_hash_cache(json_data)
def _ui_hash_cache():
with gr.Column():
read_button = gr.Button('Read hash cache')
compare_hash_button = gr.Button('Compare hash with cache')
calculate_button = gr.Button('Calculate hashes')
save_hash_button = gr.Button('Save hash', visible=False)
hash_cache_json = gr.JSON(label='Local files')
read_button.click(fn=_on_read_hash_click, outputs=[hash_cache_json, save_hash_button])
calculate_button.click(fn=_on_calculate_hash_click, outputs=[hash_cache_json, save_hash_button])
compare_hash_button.click(fn=_on_compare_hash_click, outputs=[hash_cache_json, save_hash_button])
save_hash_button.click(fn=_on_hash_cache_save_click, inputs=hash_cache_json)
def debug_ui_block():
with gr.Column():
with gr.Row():
@ -74,4 +172,7 @@ def debug_ui_block():
with gr.Tab('Local files'):
_ui_local_files()
with gr.Tab('Hash cache'):
_ui_hash_cache()
back_button.click(fn=None, _js='navigateBack')

View File

@ -98,7 +98,10 @@ def main_ui_block():
import_export_ui_block()
with gr.Column(visible=False) as debug_block:
if env.is_debug_mode_enabled():
debug_ui_block()
else:
gr.Row()
_json_nav_box.change(on_json_box_change,
inputs=[_json_nav_box, home_refresh_box],

View File

@ -1,3 +1,6 @@
import datetime
import hashlib
import json
import os
import re
import urllib.parse
@ -6,6 +9,8 @@ from PIL import Image
from scripts.mo.environment import env
_HASH_CACHE_FILENAME = 'hash_cache.json'
model_extensions = ['.bin', '.ckpt', '.safetensors', '.pt']
preview_extensions = [".png", ".jpg", ".webp"]
@ -93,3 +98,42 @@ def resize_preview_image(input_file, output_file):
canvas.paste(resized_image, (x_position, y_position))
canvas.save(output_file, "JPEG")
def calculate_file_temp_hash(file_path):
creation_timestamp = os.path.getctime(file_path)
creation_datetime = datetime.datetime.fromtimestamp(creation_timestamp)
modification_timestamp = os.path.getmtime(file_path)
modification_datetime = datetime.datetime.fromtimestamp(modification_timestamp)
input_string = f'{creation_datetime} {modification_datetime}'
md5_hash = hashlib.md5()
md5_hash.update(input_string.encode('utf-8'))
return md5_hash.hexdigest()
def calculate_sha256(file_path):
with open(file_path, 'rb') as file:
sha256_hash = hashlib.sha256()
while chunk := file.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def get_hash_cache_file():
return os.path.join(env.script_dir, _HASH_CACHE_FILENAME)
def read_hash_cache() -> list:
file_path = get_hash_cache_file()
if os.path.isfile(file_path):
with open(file_path) as file:
return json.load(file)
return []
def write_hash_cache(hash_cache: list):
with open(get_hash_cache_file(), 'w') as file:
json.dump(hash_cache, file, indent=4)