Improved version

2023-04-05 08:17:42 +03:00 · 2023-04-05 08:17:42 +03:00 · c75ea0c5e7
parent 164ced85be
commit c75ea0c5e7
1 changed files with 114 additions and 67 deletions
--- a/script.py
+++ b/script.py
@ -7,13 +7,11 @@ import os

 INPUT_VIDEO = "video_input.mp4"
 OUTPUT_VIDEO = "result.mp4"
-REF_IMAGE = "init.png"
-
-PROMPT = "pixarstyle 3D cartoon version of Pulp Fiction apartment Scene, natural skin texture, 4k textures, hdr, intricate, highly detailed, sharp focus, cinematic look, hyperdetailed. White man, black man, strong man, 90's room, gun, burger and cola."
-N_PROMPT = "cropped head, black and white, slanted eyes, deformed, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, disgusting, poorly drawn hands, missing limb, floating limbs, disconnected limbs, malformed hands, blurry, ((((mutated hands and fingers)))), watermark, watermarked, oversaturated, censored, distorted hands, amputation, missing hands, obese, doubled face, double hands"
-SEED = 2901260158
-w,h = 1216, 512 # Width and height of the processed image. Note that actual image processed would be a W x 2H resolution. You should have enough VRAM to process it.

+PROMPT = "The Matrix as stop motion animation with plastic figures. Clay plasticine texture, cinematic light. 4k textures, hd, hyperdetailed. Claymation, plasticine animation, clay stop motion animation."
+N_PROMPT = "green face, green skin, black and white, slanted eyes, red eyes, blood eyes, deformed, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, disgusting, poorly drawn hands, missing limb, floating limbs, disconnected limbs, malformed hands, blurry, ((((mutated hands and fingers)))), watermark, watermarked, oversaturated, censored, distorted hands, amputation, missing hands, obese, doubled face, double hands"
+SEED = -1
+w,h = 896, 512 # Width and height of the processed image. Note that actual image processed would be a W x 2H resolution. You should have enough VRAM to process it.

 START_FROM_IND = 0 # index of a frame to start a processing from. Might be helpful with long animations where you need to restart the script multiple times
 SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
@ -23,22 +21,16 @@ def to_b64(img):
    b64img = base64.b64encode(buffer).decode("utf-8")
    return b64img

-mask_img = np.zeros((h * 2,w,3), dtype=np.uint8)
-mask_img[:h] = 255
-b64_mask_img = to_b64(mask_img)
-
-# load context image to make generations more stable
-cont_img = cv2.imread(REF_IMAGE)
-cont_img = cv2.resize(cont_img, (w,h))
-
 class controlnetRequest():
-    def __init__(self, b64_full_img):
-        self.url = "http://localhost:7860/controlnet/img2img"
+    def __init__(self, b64_cur_img, b64_hed_img, ds = 0.35, w=w, h=h, mask = None):
+
+        self.url = "http://localhost:7860/sdapi/v1/img2img"
        self.body = {
-            "init_images": [b64_full_img],
-            "mask": b64_mask_img,
+            "init_images": [b64_cur_img],
+            "mask": mask,
            "mask_blur": 0,
            "inpainting_fill": 1,
+            "inpainting_mask_invert": 0,
            "prompt": PROMPT,
            "negative_prompt": N_PROMPT,
            "seed": SEED,
@ -46,45 +38,59 @@ class controlnetRequest():
            "subseed_strength": 0,
            "batch_size": 1,
            "n_iter": 1,
-            "steps": 20,
-            "cfg_scale": 4.5,
-            "denoising_strength":0.6,
+            "steps": 15,
+            "cfg_scale": 7,
+            "denoising_strength": ds,
            "width": w,
-            "height": h * 2,
+            "height": h,
            "restore_faces": False,
            "eta": 0,
            "sampler_index": "DPM++ 2S a",
-            "controlnet_units": [
-                {
-                    "module": "hed",
-                    "model": "control_hed-fp16 [13fee50b]",
-                    "weight": 0.6,
-                    "resize_mode": "Just Resize",
-                    "lowvram": False,
-                    "processor_res": 64,
-                    "threshold_a": 64,
-                    "threshold_b": 64,
-                    "guidance": 0.6,
-                    "guessmode": False
-                },
-                {
-                    "module": "none",
-                    "model": "t2iadapter_color_sd14v1 [8522029d]",
-                    "weight": 0.6,
-                    "resize_mode": "Just Resize",
-                    "lowvram": False,
-                    "processor_res": 64,
-                    "threshold_a": 64,
-                    "threshold_b": 64,
-                    "guidance": 0.6,
-                    "guessmode": False
+            "control_net_enabled": True,
+            "alwayson_scripts": {
+                "ControlNet":{
+                    "args": [
+                        {
+                            "input_image": b64_hed_img,
+                            "module": "hed",
+                            "model": "control_hed-fp16 [13fee50b]",
+                            "weight": 1,
+                            "resize_mode": "Just Resize",
+                            "lowvram": False,
+                            "processor_res": 512,
+                            "guidance": 1,
+                            "guessmode": False
+                        }
+                    ]
                }
-            ]
+            },
        }

    def sendRequest(self):
        r = requests.post(self.url, json=self.body)
        return r.json()
+    
+
+def estimate_flow_diff(frame1, frame2, frame1_styled):
+  prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
+  next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
+
+  flow_data = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
+  h, w = flow_data.shape[:2]
+  flow_data = -flow_data
+  flow_data[:,:,0] += np.arange(w)
+  flow_data[:,:,1] += np.arange(h)[:,np.newaxis]
+  #map_x, map_y = cv2.convertMaps(flow_data, 0, -1, True)
+  warped_frame = cv2.remap(frame1, flow_data, None, cv2.INTER_LINEAR)
+  warped_frame_styled = cv2.remap(frame1_styled, flow_data, None, cv2.INTER_LINEAR)
+
+  diff = np.abs(warped_frame.astype(np.float32) - frame2.astype(np.float32))
+  diff = diff.max(axis = -1, keepdims=True).repeat(3, axis = -1) * 30
+  diff = cv2.GaussianBlur(diff,(15,15),10,cv2.BORDER_DEFAULT)
+  diff_frame = np.clip(diff, 0, 255).astype(np.uint8)
+
+  return warped_frame, diff_frame, warped_frame_styled
+

 # Open the input video file
 input_video = cv2.VideoCapture(INPUT_VIDEO)
@ -99,43 +105,84 @@ output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'MP4V'), fp
 for ind in tqdm(range(total_frames)):
    # Read the next frame from the input video
    if not input_video.isOpened(): break
-    ret, frame = input_video.read()
+    ret, init_frame = input_video.read()
    if not ret: break

    if ind+1 < START_FROM_IND: continue

-    # Resize the frame to proper resolution 
-    frame = cv2.resize(frame, (w,h))

-    full_img = cv2.vconcat([frame, cont_img])
-    full_img = cv2.resize(full_img, (w,h * 2))
-    b64_full_img = to_b64(full_img)
+    # Generate course version of a current frame with previous stylized frame as a reference image
+    if ind == 0:
+        # Resize the frame to proper resolution 
+        frame = cv2.resize(init_frame, (w, h))

-    # Sending request to the web-ui
-    data_js = controlnetRequest(b64_full_img).sendRequest()
-    
-    # Convert the byte array to a NumPy array
-    image_bytes = base64.b64decode(data_js["images"][0])
-    np_array = np.frombuffer(image_bytes, dtype=np.uint8)
+        # Sending request to the web-ui
+        data_js = controlnetRequest(to_b64(frame), to_b64(frame), 0.85, w, h, mask = None).sendRequest()
+        
+        # Convert the byte array to a NumPy array
+        image_bytes = base64.b64decode(data_js["images"][0])
+        np_array = np.frombuffer(image_bytes, dtype=np.uint8)

-    # Convert the NumPy array to a cv2 image
-    cv2_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+        # Convert the NumPy array to a cv2 image
+        out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+        diff_mask = out_image.copy()
+    else:
+        # Resize the frame to proper resolution 
+        frame = cv2.resize(init_frame, (w, h))
+        prev_frame = cv2.resize(prev_frame, (w, h))
+
+        # Sending request to the web-ui
+        data_js = controlnetRequest(to_b64(frame), to_b64(frame), 0.55, w, h, mask = None).sendRequest()
+        
+        # Convert the byte array to a NumPy array
+        image_bytes = base64.b64decode(data_js["images"][0])
+        np_array = np.frombuffer(image_bytes, dtype=np.uint8)
+
+        # Convert the NumPy array to a cv2 image
+        out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
+
+
+        _, diff_mask, warped_styled = estimate_flow_diff(prev_frame, frame, prev_frame_styled)
+
+        alpha = diff_mask.astype(np.float32) / 255.0
+        pr_image = out_image * alpha * 0.5 + warped_styled * (1 - alpha * 0.5)
+
+        diff = cv2.GaussianBlur(alpha * 255 * 3,(11,11),4,cv2.BORDER_DEFAULT)
+        diff_mask = np.clip(diff, 0, 255).astype(np.uint8)
+
+        # Sending request to the web-ui
+        data_js = controlnetRequest(to_b64(pr_image), to_b64(frame), 0.35, w, h, mask = to_b64(diff_mask)).sendRequest()
+        
+        # Convert the byte array to a NumPy array
+        image_bytes = base64.b64decode(data_js["images"][0])
+        np_array = np.frombuffer(image_bytes, dtype=np.uint8)
+
+        # Convert the NumPy array to a cv2 image
+        out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)

    # Write the frame to the output video
-    cv2_image = cv2_image[:h]
-    output_video.write(cv2_image)
+    frame_out = out_image[:h]
+    output_video.write(frame_out)

    # show the last written frame - useful to catch any issue with the process
-    cv2.imshow('Out img', cv2_image)
+    img_show = cv2.vconcat([out_image, diff_mask])
+    cv2.imshow('Out img', img_show)
+    if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
+
+
+    # Write the frame to the output video
+    output_video.write(frame_out)
+    prev_frame = init_frame.copy()
+    prev_frame_styled = frame_out.copy()
+
    
    if SAVE_FRAMES: 
        if not os.path.isdir('out'): os.makedirs('out')
-        cv2.imwrite(f'out/{ind+1:05d}.png', cv2_image)
-    if cv2.waitKey(1) & 0xFF == ord('q'): break # press Q to close the script while processing
+        cv2.imwrite(f'out/{ind+1:05d}.png', frame_out)

 # Release the input and output video files
 input_video.release()
 output_video.release()

 # Close all windows
-cv2.destroyAllWindows()
+cv2.destroyAllWindows()