less ghosting + stable colors
parent
f46b73b6f4
commit
3c69efe7a8
|
|
@ -78,8 +78,8 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
|
|||
prev_flow = prev_flow / np.array([fl_h,fl_w])
|
||||
|
||||
# remove low value noise (@alexfredo suggestion)
|
||||
next_flow[np.abs(next_flow) < 0.01] = 0
|
||||
prev_flow[np.abs(prev_flow) < 0.01] = 0
|
||||
next_flow[np.abs(next_flow) < 0.05] = 0
|
||||
prev_flow[np.abs(prev_flow) < 0.05] = 0
|
||||
|
||||
# resize flow
|
||||
next_flow = cv2.resize(next_flow, (w, h))
|
||||
|
|
@ -87,17 +87,24 @@ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_sty
|
|||
prev_flow = cv2.resize(prev_flow, (w, h))
|
||||
prev_flow = (prev_flow * np.array([h,w])).astype(np.float32)
|
||||
|
||||
# This is not correct. The flow map should be applied to the next frame to get previous frame
|
||||
# flow_map = -next_flow.copy()
|
||||
# Generate sampling grids
|
||||
grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
|
||||
flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
|
||||
flow_grid += torch.from_numpy(prev_flow).permute(2, 0, 1)
|
||||
flow_grid = flow_grid.unsqueeze(0)
|
||||
flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
|
||||
flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
|
||||
flow_grid = flow_grid.permute(0, 2, 3, 1)
|
||||
|
||||
# Here is the correct version
|
||||
flow_map = prev_flow.copy()
|
||||
|
||||
prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
|
||||
prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
|
||||
|
||||
flow_map[:,:,0] += np.arange(w)
|
||||
flow_map[:,:,1] += np.arange(h)[:,np.newaxis]
|
||||
warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
|
||||
warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
|
||||
|
||||
warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
||||
warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
||||
#warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
||||
#warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
||||
|
||||
# compute occlusion mask
|
||||
fb_flow = next_flow + prev_flow
|
||||
|
|
|
|||
|
|
@ -101,6 +101,8 @@ python3 txt2vid.py
|
|||
## Last version changes: v0.6
|
||||
* Added separate flag '-rb' for background removal process at the flow computation stage in the compute_flow.py script.
|
||||
* Added flow normalization before rescaling it, so the magnitude of the flow computed correctly at the different resolution.
|
||||
* Less ghosting and color change in vid2vid mode
|
||||
-->
|
||||
|
||||
<!--
|
||||
## Potential improvements
|
||||
|
|
|
|||
30
vid2vid.py
30
vid2vid.py
|
|
@ -8,13 +8,16 @@ import os
|
|||
import h5py
|
||||
from flow_utils import compute_diff_map
|
||||
|
||||
import skimage
|
||||
import datetime
|
||||
|
||||
INPUT_VIDEO = "input.mp4"
|
||||
FLOW_MAPS = "flow.h5"
|
||||
OUTPUT_VIDEO = "result.mp4"
|
||||
OUTPUT_VIDEO = f'result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
|
||||
|
||||
PROMPT = "marble statue"
|
||||
N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
||||
w,h = 1152, 640 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
|
||||
PROMPT = "RAW photo, Jessica Chastain, (high detailed skin:1.2), 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
|
||||
N_PROMPT = "person, skin, (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
||||
w,h = 1024, 576 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
|
||||
|
||||
START_FROM_IND = 0 # index of a frame to start a processing from. Might be helpful with long animations where you need to restart the script multiple times
|
||||
SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
|
||||
|
|
@ -140,6 +143,7 @@ output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fp
|
|||
|
||||
prev_frame = None
|
||||
prev_frame_styled = None
|
||||
#init_image = None
|
||||
|
||||
# reading flow maps in a stream manner
|
||||
with h5py.File(FLOW_MAPS, 'r') as f:
|
||||
|
|
@ -172,6 +176,8 @@ with h5py.File(FLOW_MAPS, 'r') as f:
|
|||
|
||||
alpha_img = out_image.copy()
|
||||
out_image_ = out_image.copy()
|
||||
warped_styled = out_image.copy()
|
||||
#init_image = out_image.copy()
|
||||
else:
|
||||
# Resize the frame to proper resolution
|
||||
frame = cv2.resize(cur_frame, (w, h))
|
||||
|
|
@ -188,13 +194,18 @@ with h5py.File(FLOW_MAPS, 'r') as f:
|
|||
alpha_mask = np.clip(alpha_mask + 0.05, 0.05, 0.95)
|
||||
alpha_img = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)
|
||||
|
||||
# normalizing the colors
|
||||
out_image = skimage.exposure.match_histograms(out_image, frame, multichannel=False, channel_axis=-1)
|
||||
|
||||
out_image = out_image.astype(float) * alpha_mask + warped_styled.astype(float) * (1 - alpha_mask)
|
||||
|
||||
out_image_ = (out_image * 0.65 + warped_styled * 0.35)
|
||||
#out_image = skimage.exposure.match_histograms(out_image, prev_frame, multichannel=True, channel_axis=-1)
|
||||
#out_image_ = (out_image * 0.65 + warped_styled * 0.35)
|
||||
|
||||
|
||||
# Bluring issue fix via additional processing
|
||||
out_image_fixed = controlnetRequest(to_b64(out_image_), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
|
||||
out_image_fixed = controlnetRequest(to_b64(out_image), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
|
||||
|
||||
|
||||
# Write the frame to the output video
|
||||
frame_out = np.clip(out_image_fixed, 0, 255).astype(np.uint8)
|
||||
|
|
@ -202,8 +213,11 @@ with h5py.File(FLOW_MAPS, 'r') as f:
|
|||
|
||||
if VISUALIZE:
|
||||
# show the last written frame - useful to catch any issue with the process
|
||||
img_show = cv2.hconcat([frame_out, alpha_img])
|
||||
cv2.imshow('Out img', img_show)
|
||||
warped_styled = np.clip(warped_styled, 0, 255).astype(np.uint8)
|
||||
|
||||
img_show_top = cv2.hconcat([frame, warped_styled])
|
||||
img_show_bot = cv2.hconcat([frame_out, alpha_img])
|
||||
cv2.imshow('Out img', cv2.vconcat([img_show_top, img_show_bot]))
|
||||
cv2.setWindowTitle("Out img", str(ind+1))
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue