less ghosting + stable colors

pull/26/head
Alexey Borsky 2023-04-21 23:00:43 +03:00
parent f46b73b6f4
commit 3c69efe7a8
3 changed files with 41 additions and 18 deletions

View File

@ -78,8 +78,8 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
prev_flow = prev_flow / np.array([fl_h,fl_w])
# remove low value noise (@alexfredo suggestion)
next_flow[np.abs(next_flow) < 0.01] = 0
prev_flow[np.abs(prev_flow) < 0.01] = 0
next_flow[np.abs(next_flow) < 0.05] = 0
prev_flow[np.abs(prev_flow) < 0.05] = 0
# resize flow
next_flow = cv2.resize(next_flow, (w, h))
@ -87,17 +87,24 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
prev_flow = cv2.resize(prev_flow, (w, h))
prev_flow = (prev_flow * np.array([h,w])).astype(np.float32)
# This is not correct. The flow map should be applied to the next frame to get previous frame
# flow_map = -next_flow.copy()
# Generate sampling grids
grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
flow_grid += torch.from_numpy(prev_flow).permute(2, 0, 1)
flow_grid = flow_grid.unsqueeze(0)
flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
flow_grid = flow_grid.permute(0, 2, 3, 1)
# Here is the correct version
flow_map = prev_flow.copy()
prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
flow_map[:,:,0] += np.arange(w)
flow_map[:,:,1] += np.arange(h)[:,np.newaxis]
warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
#warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
#warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
# compute occlusion mask
fb_flow = next_flow + prev_flow

View File

@ -101,6 +101,8 @@ python3 txt2vid.py
## Last version changes: v0.6
* Added separate flag '-rb' for background removal process at the flow computation stage in the compute_flow.py script.
* Added flow normalization before rescaling it, so the magnitude of the flow computed correctly at the different resolution.
* Less ghosting and color change in vid2vid mode
-->
<!--
## Potential improvements

View File

@ -8,13 +8,16 @@ import os
import h5py
from flow_utils import compute_diff_map
import skimage
import datetime
INPUT_VIDEO = "input.mp4"
FLOW_MAPS = "flow.h5"
OUTPUT_VIDEO = "result.mp4"
OUTPUT_VIDEO = f'result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
PROMPT = "marble statue"
N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
w,h = 1152, 640 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
PROMPT = "RAW photo, Jessica Chastain, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
N_PROMPT = "person, skin, (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
w,h = 1024, 576 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
START_FROM_IND = 0 # index of a frame to start a processing from. Might be helpful with long animations where you need to restart the script multiple times
SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
@ -140,6 +143,7 @@ output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fp
prev_frame = None
prev_frame_styled = None
#init_image = None
# reading flow maps in a stream manner
with h5py.File(FLOW_MAPS, 'r') as f:
@ -172,6 +176,8 @@ with h5py.File(FLOW_MAPS, 'r') as f:
alpha_img = out_image.copy()
out_image_ = out_image.copy()
warped_styled = out_image.copy()
#init_image = out_image.copy()
else:
# Resize the frame to proper resolution
frame = cv2.resize(cur_frame, (w, h))
@ -188,13 +194,18 @@ with h5py.File(FLOW_MAPS, 'r') as f:
alpha_mask = np.clip(alpha_mask + 0.05, 0.05, 0.95)
alpha_img = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)
# normalizing the colors
out_image = skimage.exposure.match_histograms(out_image, frame, multichannel=False, channel_axis=-1)
out_image = out_image.astype(float) * alpha_mask + warped_styled.astype(float) * (1 - alpha_mask)
out_image_ = (out_image * 0.65 + warped_styled * 0.35)
#out_image = skimage.exposure.match_histograms(out_image, prev_frame, multichannel=True, channel_axis=-1)
#out_image_ = (out_image * 0.65 + warped_styled * 0.35)
# Bluring issue fix via additional processing
out_image_fixed = controlnetRequest(to_b64(out_image_), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
out_image_fixed = controlnetRequest(to_b64(out_image), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
# Write the frame to the output video
frame_out = np.clip(out_image_fixed, 0, 255).astype(np.uint8)
@ -202,8 +213,11 @@ with h5py.File(FLOW_MAPS, 'r') as f:
if VISUALIZE:
# show the last written frame - useful to catch any issue with the process
img_show = cv2.hconcat([frame_out, alpha_img])
cv2.imshow('Out img', img_show)
warped_styled = np.clip(warped_styled, 0, 255).astype(np.uint8)
img_show_top = cv2.hconcat([frame, warped_styled])
img_show_bot = cv2.hconcat([frame_out, alpha_img])
cv2.imshow('Out img', cv2.vconcat([img_show_top, img_show_bot]))
cv2.setWindowTitle("Out img", str(ind+1))
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing