mirror of https://github.com/vladmandic/automatic
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
import sys
|
|
from installer import install, log
|
|
|
|
|
|
bnb = None
|
|
quanto = None
|
|
|
|
|
|
def load_bnb(msg='', silent=False):
|
|
global bnb # pylint: disable=global-statement
|
|
if bnb is not None:
|
|
return bnb
|
|
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
|
|
log.debug(f'Quantization: type=bitsandbytes fn={fn}') # pylint: disable=protected-access
|
|
install('bitsandbytes', quiet=True)
|
|
try:
|
|
import bitsandbytes
|
|
bnb = bitsandbytes
|
|
return bnb
|
|
except Exception as e:
|
|
if len(msg) > 0:
|
|
log.error(f"{msg} failed to import bitsandbytes: {e}")
|
|
bnb = None
|
|
if not silent:
|
|
raise
|
|
|
|
|
|
def load_quanto(msg='', silent=False):
|
|
global quanto # pylint: disable=global-statement
|
|
if quanto is not None:
|
|
return quanto
|
|
fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
|
|
log.debug(f'Quantization: type=quanto fn={fn}') # pylint: disable=protected-access
|
|
install('optimum-quanto', quiet=True)
|
|
try:
|
|
from optimum import quanto as optimum_quanto # pylint: disable=no-name-in-module
|
|
quanto = optimum_quanto
|
|
return quanto
|
|
except Exception as e:
|
|
if len(msg) > 0:
|
|
log.error(f"{msg} failed to import optimum.quanto: {e}")
|
|
quanto = None
|
|
if not silent:
|
|
raise
|
|
|
|
|
|
def get_quant(name):
|
|
if "qint8" in name.lower():
|
|
return 'qint8'
|
|
if "qint4" in name.lower():
|
|
return 'qint4'
|
|
if "fp8" in name.lower():
|
|
return 'fp8'
|
|
if "fp4" in name.lower():
|
|
return 'fp4'
|
|
if "nf4" in name.lower():
|
|
return 'nf4'
|
|
if name.endswith('.gguf'):
|
|
return 'gguf'
|
|
return 'none'
|