37 lines
1.2 KiB
Python
37 lines
1.2 KiB
Python
import torch
|
|
|
|
|
|
def optim_to(optim:torch.optim.Optimizer, device="cpu"):
|
|
def inplace_move(obj: torch.Tensor, target):
|
|
if hasattr(obj, 'data'):
|
|
obj.data = obj.data.to(target)
|
|
if hasattr(obj, '_grad') and obj._grad is not None:
|
|
obj._grad.data = obj._grad.data.to(target)
|
|
if isinstance(optim, torch.optim.Optimizer):
|
|
for param in optim.state.values():
|
|
if isinstance(param, torch.Tensor):
|
|
inplace_move(param, device)
|
|
elif isinstance(param, dict):
|
|
for subparams in param.values():
|
|
inplace_move(subparams, device)
|
|
torch.cuda.empty_cache()
|
|
|
|
|
|
def parse_dropout_structure(layer_structure, use_dropout, last_layer_dropout):
|
|
if layer_structure is None:
|
|
layer_structure = [1, 2, 1]
|
|
if not use_dropout:
|
|
return [0] * len(layer_structure)
|
|
dropout_values = [0]
|
|
dropout_values.extend([0.3] * (len(layer_structure) - 3))
|
|
if last_layer_dropout:
|
|
dropout_values.append(0.3)
|
|
else:
|
|
dropout_values.append(0)
|
|
dropout_values.append(0)
|
|
return dropout_values
|
|
|
|
|
|
def get_closest(val):
|
|
i, j = divmod(val,64)
|
|
return i*64 + (j!=0) * 64 |