PR #214 Remove slugify

pull/292/head
Physton 2023-09-10 23:33:13 +08:00
parent 7b40c85fe8
commit 3696babae4
16 changed files with 86 additions and 704 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -21,7 +21,6 @@ from scripts.physton_prompt.get_lang import get_lang
from scripts.physton_prompt.get_version import get_git_commit_version, get_git_remote_versions, get_latest_version
from scripts.physton_prompt.mbart50 import initialize as mbart50_initialize, translate as mbart50_translate
from scripts.physton_prompt.get_group_tags import get_group_tags
from scripts.physton_prompt.slugify import slugify
try:
from modules.shared import cmd_opts
@ -374,13 +373,6 @@ def on_app_started(_: gr.Blocks, app: FastAPI):
async def _get_group_tags(lang: str):
return {"tags": get_group_tags(lang)}
@app.post("/physton_prompt/slugify")
async def _slugify(request: Request):
data = await request.json()
if 'keywords' not in data:
return {"success": False, "message": get_lang('is_required', {'0': 'keywords'})}
return {"success": True, 'result': slugify(data['keywords'])}
try:
translate_api = st.get('translateApi')
if translate_api == 'mbart50':

View File

@ -1,123 +0,0 @@
import os
import sys
import time
current_dir = os.path.dirname(os.path.abspath(__file__))
cache_file = os.path.normpath(os.path.join(current_dir, '../../storage/slugify.cache'))
def read_caches():
global cache_file
try:
result = {}
if not os.path.exists(cache_file):
with open(cache_file, 'w', encoding='utf8') as f:
pass
with open(cache_file, 'r', encoding='utf8') as f:
content = f.read()
lines = content.split("\n")
result = {}
for line in lines:
if line.strip():
key, value = line.split('====')
key = key.strip()
value = value.strip()
if key:
result[key] = value
return result
except Exception as e:
return []
def save_cache(results):
global cache_file
try:
if not os.path.exists(cache_file):
with open(cache_file, 'w', encoding='utf8') as f:
pass
content = ''
for item in results:
content += f'{item["text"]}===={item["result"]}\n'
with open(cache_file, "a") as file:
file.write(content)
except Exception as e:
pass
handling = False
def slugify(keywords):
global handling
if handling:
while handling:
time.sleep(0.1)
pass
handling = True
results = __slugify(keywords)
handling = False
return results
def __slugify(keywords):
global current_dir
if not isinstance(keywords, list):
return []
if len(keywords) <= 0:
return []
# 去除 keywords 中的重复项
keywords = list(set(keywords))
results = {}
cache = read_caches()
keywords2 = []
for keyword in keywords:
if keyword in cache:
results[keyword] = cache[keyword]
continue
else:
keywords2.append(keyword)
keywords = keywords2
try:
import execjs
import threading
code_file = os.path.normpath(os.path.join(current_dir, '../../slugify/dist/slugify.js'))
if not os.path.exists(code_file):
return results
with open(code_file, 'r', encoding='utf8') as f:
code = f.read()
ctx = execjs.compile(code)
threads_results = []
def process_keywords(keywords):
try:
result = ctx.call('slugifyMulti', keywords)
threads_results.extend(result)
except Exception as e:
print(f'[sd-webui-prompt-all-in-one] slugify error: {e}')
def process_thread(keywords):
thread = threading.Thread(target=process_keywords, args=(keywords,))
thread.start()
return thread
threads = []
for i in range(0, len(keywords), 10000):
chunk = keywords[i:i+10000]
thread = process_thread(chunk)
threads.append(thread)
for thread in threads:
thread.join()
for item in threads_results:
results[item['text']] = item['result']
save_cache(threads_results)
return results
except Exception as e:
print(f'[sd-webui-prompt-all-in-one] slugify error: {e}')
return {}

3
slugify/.gitignore vendored
View File

@ -1,3 +0,0 @@
node_modules
package-lock.json
.DS_Store

File diff suppressed because one or more lines are too long

View File

@ -1,144 +0,0 @@
/*!
* Check to see if the MemoizeMap has recorded a result of the two operands
*
* @param {Mixed} leftHandOperand
* @param {Mixed} rightHandOperand
* @param {MemoizeMap} memoizeMap
* @returns {Boolean|null} result
*/
/*!
* Compare two Regular Expressions for equality.
*
* @param {RegExp} leftHandOperand
* @param {RegExp} rightHandOperand
* @return {Boolean} result
*/
/*!
* Compare two Sets/Maps for equality. Faster than other equality functions.
*
* @param {Set} leftHandOperand
* @param {Set} rightHandOperand
* @param {Object} [options] (Optional)
* @return {Boolean} result
*/
/*!
* Determine if the given object has an @@iterator function.
*
* @param {Object} target
* @return {Boolean} `true` if the object has an @@iterator function.
*/
/*!
* Determines if two objects have matching values, given a set of keys. Defers to deepEqual for the equality check of
* each key. If any value of the given key is not equal, the function will return false (early).
*
* @param {Mixed} leftHandOperand
* @param {Mixed} rightHandOperand
* @param {Array} keys An array of keys to compare the values of leftHandOperand and rightHandOperand against
* @param {Object} [options] (Optional)
* @return {Boolean} result
*/
/*!
* Gets all entries from a Generator. This will consume the generator - which could have side effects.
*
* @param {Generator} target
* @returns {Array} an array of entries from the Generator.
*/
/*!
* Gets all iterator entries from the given Object. If the Object has no @@iterator function, returns an empty array.
* This will consume the iterator - which could have side effects depending on the @@iterator implementation.
*
* @param {Object} target
* @returns {Array} an array of entries from the @@iterator function
*/
/*!
* Gets all own and inherited enumerable keys from a target.
*
* @param {Object} target
* @returns {Array} an array of own and inherited enumerable keys from the target.
*/
/*!
* Primary Export
*/
/*!
* Recursively check the equality of two Objects. Once basic sameness has been established it will defer to `deepEqual`
* for each enumerable key in the object.
*
* @param {Mixed} leftHandOperand
* @param {Mixed} rightHandOperand
* @param {Object} [options] (Optional)
* @return {Boolean} result
*/
/*!
* Returns true if the argument is a primitive.
*
* This intentionally returns true for all objects that can be compared by reference,
* including functions and symbols.
*
* @param {Mixed} value
* @return {Boolean} result
*/
/*!
* Set the result of the equality into the MemoizeMap
*
* @param {Mixed} leftHandOperand
* @param {Mixed} rightHandOperand
* @param {MemoizeMap} memoizeMap
* @param {Boolean} result
*/
/*!
* Simple equality for flat iterable objects such as Arrays, TypedArrays or Node.js buffers.
*
* @param {Iterable} leftHandOperand
* @param {Iterable} rightHandOperand
* @param {Object} [options] (Optional)
* @return {Boolean} result
*/
/*!
* Simple equality for generator objects such as those returned by generator functions.
*
* @param {Iterable} leftHandOperand
* @param {Iterable} rightHandOperand
* @param {Object} [options] (Optional)
* @return {Boolean} result
*/
/*!
* The main logic of the `deepEqual` function.
*
* @param {Mixed} leftHandOperand
* @param {Mixed} rightHandOperand
* @param {Object} [options] (optional) Additional options
* @param {Array} [options.comparator] (optional) Override default algorithm, determining custom equality.
* @param {Array} [options.memoize] (optional) Provide a custom memoization object which will cache the results of
complex objects for a speed boost. By passing `false` you can disable memoization, but this will cause circular
references to blow the stack.
* @return {Boolean} equal match
*/
/*!
* deep-eql
* Copyright(c) 2013 Jake Luer <jake@alogicalparadox.com>
* MIT Licensed
*/
/**
* @license
* Lodash <https://lodash.com/>
* Copyright OpenJS Foundation and other contributors <https://openjsf.org/>
* Released under MIT license <https://lodash.com/license>
* Based on Underscore.js 1.8.3 <http://underscorejs.org/LICENSE>
* Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors
*/

View File

@ -1,15 +0,0 @@
import {slugify} from '@lazy-cjk/zh-slugify'
function slugifyMulti(texts) {
var result = []
texts.forEach((text, index) => {
result.push({
text,
index,
result: slugify(text, true)
})
})
return result
}
export default slugifyMulti

View File

@ -1,18 +0,0 @@
{
"name": "slugify",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"build": "webpack --mode=production"
},
"author": "",
"license": "MIT",
"dependencies": {
"@lazy-cjk/zh-slugify": "^1.0.86"
},
"devDependencies": {
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
}
}

View File

@ -1,65 +0,0 @@
import execjs
import execjs.runtime_names
import os
import sys
import csv
import yaml
import time
import threading
code = ''
code_file = os.path.abspath('../dist/slugify.js')
with open(code_file, 'r', encoding='utf8') as f:
code = f.read()
keywords = []
csv_file = os.path.abspath('../../tags/danbooru-10w-zh_cn.csv')
if os.path.exists(csv_file):
with open(csv_file, 'r') as file:
reader = csv.reader(file)
for row in reader:
if len(row) >= 2:
keywords.append(row[1])
group_tags_file = os.path.abspath('../../group_tags/zh_CN.yaml')
if os.path.exists(group_tags_file):
with open(group_tags_file, 'r') as file:
data = yaml.safe_load(file)
for item in data:
for group in item['groups']:
tags = group.get('tags', {})
for key in tags:
if not tags[key]:
continue
keywords.append(tags[key])
results = []
ctx = execjs.compile(code)
def process_keywords(keywords):
result = ctx.call('slugifyMulti', keywords)
results.extend(result)
def process_thread(keywords):
thread = threading.Thread(target=process_keywords, args=(keywords,))
thread.start()
return thread
start_time = time.time()
print(f'Total count: {len(keywords)}')
print(f'Start time: {start_time}')
threads = []
for i in range(0, len(keywords), 10000):
chunk = keywords[i:i+10000]
thread = process_thread(chunk)
threads.append(thread)
print(f'Threads num: {len(threads)}')
for thread in threads:
thread.join()
end_time = time.time()
print(f'End time: {end_time}')
time_diff = end_time - start_time
print(f'{time_diff} s')

View File

@ -1,14 +0,0 @@
const path = require('path');
module.exports = {
entry: './index.js',
output: {
filename: 'slugify.js',
path: path.resolve(__dirname, 'dist'),
library: {
name: 'slugifyMulti',
type: 'var',
export: 'default',
},
}
};

View File

@ -9,7 +9,6 @@
"preview": "vite preview"
},
"dependencies": {
"@lazy-cjk/zh-slugify": "^1.0.86",
"autosize-input": "^1.0.2",
"axios": "^1.4.0",
"js-yaml": "^4.1.0",

View File

@ -158,7 +158,6 @@ import globals from "../globals";
import jsYaml from "js-yaml";
import {ref} from "vue";
import Hotkey from "@/components/hotkey.vue";
import { slugify } from '@lazy-cjk/zh-slugify';
import ExtraNetworksPopup from "@/components/extraNetworksPopup.vue";
export default {
@ -800,26 +799,6 @@ export default {
},
_handleGroupTags() {
let data = {toEn: new Map(), toLocal: new Map()}
let slugifyQueue = []
let handleSlugifyQueue = () => {
if (!['zh_CN', 'zh_HK', 'zh_TW'].includes(this.languageCode)) return
if (slugifyQueue.length > 0) {
let keywords = []
slugifyQueue.forEach((item) => {
keywords.push(item.local)
})
this.gradioAPI.slugify(keywords).then(res => {
if (!res.result) return
slugifyQueue.forEach((item) => {
if (res.result[item.local]) {
!data.toEn.has(res.result[item.local]) && data.toEn.set(res.result[item.local], item.en)
}
})
this.groupTagsTranslateCache = data
console.log('Slugify complete: _handleGroupTags')
})
}
}
let setData = (en, local) => {
const texts = [
en,
@ -836,9 +815,6 @@ export default {
data.toLocal.set(t, [local])
}
})
// const key = slugify(local, true)
// !data.toEn.has(key) && data.toEn.set(key, en)
slugifyQueue.push({en, local})
data.toEn.set(local, en)
// console.log('setData:groupTags', local, key, en)
}
@ -862,8 +838,6 @@ export default {
})
})
handleSlugifyQueue()
this.groupTagsTranslateCache = data
},
updateTippyState() {

View File

@ -1,7 +1,6 @@
import common from "@/utils/common"
import Papa from 'papaparse'
import globals from "../../globals";
import { slugify } from '@lazy-cjk/zh-slugify';
export default {
props: {
@ -94,27 +93,6 @@ export default {
window.tagCompleteFileLoading[tagCompleteFile] = true
let data = {toEn: new Map(), toLocal: new Map()}
let slugifyQueue = []
let handleSlugifyQueue = () => {
if (!['zh_CN', 'zh_HK', 'zh_TW'].includes(this.languageCode)) return
if (slugifyQueue.length > 0) {
let keywords = []
slugifyQueue.forEach((item) => {
keywords.push(item.local)
})
this.gradioAPI.slugify(keywords).then(res => {
if (!res.result) return
slugifyQueue.forEach((item) => {
if (res.result[item.local]) {
!data.toEn.has(res.result[item.local]) && data.toEn.set(res.result[item.local], item.en)
}
})
window.tagCompleteFileLoading[tagCompleteFile] = false
window.tagCompleteFileCache[tagCompleteFile] = data
console.log('Slugify complete: getCSV')
})
}
}
let setData = (en, local) => {
const texts = [
en,
@ -122,9 +100,6 @@ export default {
en.replace(/\-/g, ' '),
]
texts.forEach(t => data.toLocal.set(t, local))
slugifyQueue.push({en, local})
// const key = slugify(local, true)
// !data.toEn.has(key) && data.toEn.set(key, en)
data.toEn.set(local, en)
// console.log('setData:csv', local, key, en)
}
@ -134,7 +109,6 @@ export default {
translations.forEach((local, en) => {
setData(en, local)
})
handleSlugifyQueue()
window.tagCompleteFileLoading[tagCompleteFile] = false
window.tagCompleteFileCache[tagCompleteFile] = data
resolve(data)
@ -174,7 +148,6 @@ export default {
if (en === '' || local === '') return
setData(en, local)
})*/
handleSlugifyQueue()
window.tagCompleteFileLoading[tagCompleteFile] = false
window.tagCompleteFileCache[tagCompleteFile] = data
resolve(data)
@ -229,8 +202,6 @@ export default {
text = text.trim().toLowerCase()
if (toEn.has(text)) {
return toEn.get(text)
} else if ((text = slugify(text, true)) && toEn.has(text)) {
return toEn.get(text)
}
return ''
},

View File

@ -183,8 +183,4 @@ export default class GradioAPI {
async getGroupTags(lang) {
return (await this.api.get("/get_group_tags", {params: {lang}})).data.tags
}
async slugify(keywords) {
return (await this.api.post("/slugify", {keywords})).data
}
}

View File

@ -1,39 +0,0 @@
import os
import sys
import csv
import yaml
import time
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from scripts.physton_prompt.slugify import slugify
keywords = []
csv_file = os.path.abspath('../tags/danbooru-10w-zh_cn.csv')
if os.path.exists(csv_file):
with open(csv_file, 'r') as file:
reader = csv.reader(file)
for row in reader:
if len(row) >= 2:
keywords.append(row[1])
group_tags_file = os.path.abspath('../group_tags/zh_CN.yaml')
if os.path.exists(group_tags_file):
with open(group_tags_file, 'r') as file:
data = yaml.safe_load(file)
for item in data:
for group in item['groups']:
tags = group.get('tags', {})
for key in tags:
if not tags[key]:
continue
keywords.append(tags[key])
start_time = time.time()
print(f'Total count: {len(keywords)}')
print(f'Start time: {start_time}')
result = slugify(keywords)
end_time = time.time()
print(f'End time: {end_time}')
time_diff = end_time - start_time
print(f'{time_diff} s')