91 lines
4.3 KiB
Python
91 lines
4.3 KiB
Python
# Copyright (C) 2023 Deforum LLC
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, version 3 of the License.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
# Contact the authors: https://deforum.github.io/
|
|
|
|
import os
|
|
import cv2
|
|
import torch
|
|
import numpy as np
|
|
from .general_utils import download_file_with_checksum
|
|
from midas.dpt_depth import DPTDepthModel
|
|
from midas.transforms import Resize, NormalizeImage, PrepareForNet
|
|
import torchvision.transforms as T
|
|
|
|
class MidasDepth:
|
|
def __init__(self, models_path, device, half_precision=True, midas_model_type='Midas-3-Hybrid'):
|
|
if midas_model_type.lower() == 'midas-3.1-beitlarge':
|
|
self.midas_model_filename = 'dpt_beit_large_512.pt'
|
|
self.midas_model_checksum='66cbb00ea7bccd6e43d3fd277bd21002d8d8c2c5c487e5fcd1e1d70c691688a19122418b3ddfa94e62ab9f086957aa67bbec39afe2b41c742aaaf0699ee50b33'
|
|
self.midas_model_url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt'
|
|
self.resize_px = 512
|
|
self.backbone = 'beitl16_512'
|
|
else:
|
|
self.midas_model_filename = 'dpt_large-midas-2f21e586.pt'
|
|
self.midas_model_checksum = 'fcc4829e65d00eeed0a38e9001770676535d2e95c8a16965223aba094936e1316d569563552a852d471f310f83f597e8a238987a26a950d667815e08adaebc06'
|
|
self.midas_model_url = 'https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt'
|
|
self.resize_px = 384
|
|
self.backbone = 'vitl16_384'
|
|
self.device = device
|
|
self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
|
self.midas_transform = T.Compose([
|
|
Resize(self.resize_px, self.resize_px, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32,
|
|
resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC),
|
|
self.normalization,
|
|
PrepareForNet()
|
|
])
|
|
|
|
download_file_with_checksum(url=self.midas_model_url, expected_checksum=self.midas_model_checksum, dest_folder=models_path, dest_filename=self.midas_model_filename)
|
|
|
|
self.load_midas_model(models_path, self.midas_model_filename)
|
|
if half_precision:
|
|
self.midas_model = self.midas_model.half()
|
|
|
|
def load_midas_model(self, models_path, midas_model_filename):
|
|
model_file = os.path.join(models_path, midas_model_filename)
|
|
print(f"Loading MiDaS model from {midas_model_filename}...")
|
|
self.midas_model = DPTDepthModel(
|
|
path=model_file,
|
|
backbone=self.backbone,
|
|
non_negative=True,
|
|
)
|
|
self.midas_model.eval().to(self.device, memory_format=torch.channels_last if self.device == torch.device("cuda") else None)
|
|
|
|
def predict(self, prev_img_cv2, half_precision):
|
|
img_midas = prev_img_cv2.astype(np.float32) / 255.0
|
|
img_midas_input = self.midas_transform({"image": img_midas})["image"]
|
|
sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0)
|
|
|
|
if self.device.type == "cuda" or self.device.type == "mps":
|
|
sample = sample.to(memory_format=torch.channels_last)
|
|
if half_precision:
|
|
sample = sample.half()
|
|
|
|
with torch.no_grad():
|
|
midas_depth = self.midas_model.forward(sample)
|
|
midas_depth = torch.nn.functional.interpolate(
|
|
midas_depth.unsqueeze(1),
|
|
size=img_midas.shape[:2],
|
|
mode="bicubic",
|
|
align_corners=False,
|
|
).squeeze().cpu().numpy()
|
|
|
|
torch.cuda.empty_cache()
|
|
depth_tensor = torch.from_numpy(np.expand_dims(midas_depth, axis=0)).squeeze().to(self.device)
|
|
|
|
return depth_tensor
|
|
|
|
def to(self, device):
|
|
self.device = device
|
|
self.midas_model = self.midas_model.to(device, memory_format=torch.channels_last if device == torch.device("cuda") else None) |