Spaces:

Chaerin5
/

FoundHand

Running on Zero

App Files Files Community

Chaerin5 commited on 1 day ago

Commit

2344154

1 Parent(s): a95d78d

multiple improvements

Browse files

Files changed (4) hide show

app.py +201 -148
sample_images/sample11.jpg +2 -2
sbatch/sbatch_demo.sh +5 -3
sbatch/sbatch_demo2.sh +1 -1

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ import random
 from copy import deepcopy
 from huggingface_hub import hf_hub_download
 from gradio_toggle import Toggle
 try:
     import spaces
 except:
@@ -27,11 +28,17 @@ except:
 MAX_N = 6
 FIX_MAX_N = 6
 LENGTH = 480
 placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
-NEW_MODEL = True
-MODEL_EPOCH = 6
-HF = True
 pre_device = "cpu" if HF else "cuda"
 spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
 spaces_120_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
@@ -214,6 +221,7 @@ if NEW_MODEL:
     # ckpt_state_dict = torch.load(model_path)['model_state_dict']
     ckpt_state_dict = torch.load(model_path, map_location='cpu')['ema_state_dict']
     missing_keys, extra_keys = model.load_state_dict(ckpt_state_dict, strict=False)
     model = model.to(device)
     model.eval()
     print(missing_keys, extra_keys)
@@ -233,6 +241,29 @@ if NEW_MODEL:
     print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
     print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
     assert len(missing_keys) == 0
 sam_path = "sam_vit_h_4b8939.pth"
 if not os.path.exists(sam_path):
@@ -249,6 +280,7 @@ hands = mp_hands.Hands(
 no_hands_open = cv2.resize(np.array(Image.open("no_hands_open.jpeg"))[..., :3], (LENGTH, LENGTH))
 def prepare_anno(ref, ref_is_user):
     if not ref_is_user:  # no_hand_open.jpeg
         return gr.update(value=None), gr.update(value=None)
     if ref is None or ref["background"] is None or ref["background"].sum()==0:  # clear_all
@@ -284,6 +316,7 @@ def prepare_anno(ref, ref_is_user):
 @spaces_60_fn
 def get_ref_anno(img, keypts, use_mask, use_pose):
     no_mask, no_pose = not use_mask, not use_pose
     if img.sum() == 0:  # clear_all
         return None, gr.update(), None, gr.update(), True
@@ -407,6 +440,7 @@ def get_ref_anno(img, keypts, use_mask, use_pose):
     return img, ref_pose, ref_cond, gr.update(), True
 def get_target_anno(img, keypts):
     if img.sum() == 0:  # clear_all
         return None, gr.update(), None, gr.update(), True
     if keypts is None:  # hands not detected
@@ -447,6 +481,7 @@ def get_target_anno(img, keypts):
     return img, target_pose, target_cond, keypts, gr.update(), True
 def visualize_ref(ref, ex_mask):
     if ref is None:
         return None
@@ -598,6 +633,12 @@ def process_crop(img, crop_coord, evt:gr.SelectData):
             cropped_vis = image.copy()
             cropped_vis[:,:,-1] = 255
         else:  # will add second click
             crop_coord.append(new_coord)
             x1, y1 = crop_coord[0]
             x2, y2 = crop_coord[1]
@@ -889,6 +930,7 @@ def flip_hand(
     cond, auto_cond, manual_cond,
     keypts=None, auto_keypts=None, manual_keypts=None
 ):
     if cond is None:  # clear clicked
         return
     img["composite"] = img["composite"][:, ::-1, :]
@@ -923,7 +965,7 @@ def flip_hand(
             manual_keypts[:21, 0] = opts.image_size[1] - manual_keypts[:21, 0]
         if manual_keypts[21:, :].sum() != 0:
             manual_keypts[21:, 0] = opts.image_size[1] - manual_keypts[21:, 0]
-    return img, img_raw, pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond, keypts, auto_keypts, manual_keypts
 def resize_to_full(img):
     img["background"] = cv2.resize(img["background"], (LENGTH, LENGTH))
@@ -1117,75 +1159,80 @@ def unvisible_component(decider, component):
 example_ref_imgs = [
     [
-        "sample_images/sample1.jpg",
     ],
     [
-        "sample_images/sample2.jpg",
     ],
     [
-        "sample_images/sample3.jpg",
     ],
     [
-        "sample_images/sample4.jpg",
     ],
     [
-        "sample_images/sample6.jpg",
     ],
 ]
 example_target_imgs = [
     [
-        "sample_images/sample5.jpg",
     ],
     [
         "sample_images/sample9.jpg",
     ],
     [
         "sample_images/sample10.jpg",
     ],
     [
-        "sample_images/sample11.jpg",
     ],
-    ["pose_images/pose1.jpg"],
-]
-fix_example_imgs = [
-    ["bad_hands/1.jpg"],
-    ["bad_hands/3.jpg"],
-    ["bad_hands/4.jpg"],
-    ["bad_hands/5.jpg"],
-    ["bad_hands/6.jpg"],
-    ["bad_hands/7.jpg"],
-]
-fix_example_brush = [
-    ["bad_hands/1_composite.png"],
-    ["bad_hands/3_composite.png"],
-    ["bad_hands/4_composite.png"],
-    ["bad_hands/5_composite.png"],
-    ["bad_hands/6_composite.png"],
-    ["bad_hands/7_composite.png"],
-]
-fix_example_kpts = [
-    ["bad_hands/1_kpts.png", 3.0, 1224],
-    ["bad_hands/3_kpts.png", 1.0, 42],
-    ["bad_hands/4_kpts.png", 2.0, 42],
-    ["bad_hands/5_kpts.png", 3.0, 42],
-    ["bad_hands/6_kpts.png", 3.0, 1348],
-    ["bad_hands/7_kpts.png", 3.0, 42],
 ]
 fix_example_all = [
     ["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_mask.jpg", "bad_hands/1_kpts.png", 3.0, 1224],
     ["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_mask.jpg", "bad_hands/3_kpts.png", 1.0, 42],
-    ["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_mask.jpg", "bad_hands/4_kpts.png", 2.0, 42],
     ["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_mask.jpg", "bad_hands/5_kpts.png", 3.0, 42],
     ["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_mask.jpg", "bad_hands/6_kpts.png", 3.0, 1348],
     ["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_mask.jpg", "bad_hands/7_kpts.png", 3.0, 42],
 ]
-for i in range(len(fix_example_kpts)):
-    npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
-    fix_example_kpts[i].append(npy_path)
 for i in range(len(fix_example_all)):
     npy_path = fix_example_all[i][3].replace("_kpts.png", ".npy")
     fix_example_all[i].append(npy_path)
 custom_css = """
 .gradio-container .examples img {
     width: 240px !important;
@@ -1237,9 +1284,6 @@ custom_css = """
 #fix_examples_all table tr td:nth-child(7) {
     display: none !important;
 }
-#fix_examples_all table tr:first-child {
-    display: none !important;
-}
 #repose_tutorial video {
     width: 50% !important;
     display: block;
@@ -1280,6 +1324,13 @@ custom_css = """
 #gradio-app {
     flex-direction: row; !important;
 }
 """
 ##no_wrap_row {
     # display: flex !important;
@@ -1411,6 +1462,37 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         # config
         use_pose = gr.State(value=True)
         # main tabs
         with gr.Row():
             # ref column
@@ -1431,7 +1513,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     layers=False,
                     crop_size="1:1",
                 )
-                gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
                 use_mask = Toggle(label="Use mask", value=False, interactive=True)
                 with gr.Accordion(label="See hand pose & mask", open=False):
                     with gr.Tab("Automatic hand keypoints"):
@@ -1550,7 +1631,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                     layers=False,
                     crop_size="1:1",
                 )
-                gr.Examples(example_target_imgs, [target], examples_per_page=20)
                 with gr.Accordion(label="See hand pose", open=False):
                     with gr.Tab("Automatic hand keypoints"):
                         target_pose = gr.Image(
@@ -1685,36 +1766,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 # )
                 clear = gr.ClearButton(elem_id="clear_button")
-        # more options
-        with gr.Accordion(label="More options", open=False):
-            with gr.Row():
-                n_generation = gr.Slider(
-                    label="Number of generations",
-                    value=1,
-                    minimum=1,
-                    maximum=MAX_N,
-                    step=1,
-                    randomize=False,
-                    interactive=True,
-                )
-                seed = gr.Slider(
-                    label="Seed",
-                    value=42,
-                    minimum=0,
-                    maximum=10000,
-                    step=1,
-                    randomize=False,
-                    interactive=True,
-                )
-                cfg = gr.Slider(
-                    label="Classifier free guidance scale",
-                    value=2.5,
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    randomize=False,
-                    interactive=True,
-                )
         # tutorial video
         with gr.Accordion("Tutorial Video of Demo 1", elem_id="accordion_bold_large_center"):
@@ -1791,7 +1844,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         ref_flip.select(
             flip_hand,
             [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
-            [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond]
         )
         # target listeners
@@ -1858,7 +1911,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
         target_flip.select(
             flip_hand,
             [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
-            [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
         )
         # run listerners
@@ -2020,7 +2073,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 )
             with gr.Column():
                 gr.Markdown(
-                    """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger <br>(⚠️and surrounding area)</p>"""
                 )
                 # gr.Markdown(
                 #     """<p style="text-align: center;">Don't brush the entire hand!</p>"""
@@ -2055,17 +2108,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 # gr.Markdown(
                 #     """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
                 # )
-                fix_kp_all = gr.Image(
-                    type="numpy",
-                    label="Target Hand Pose",
-                    show_label=False,
-                    height=LENGTH,
-                    width=LENGTH,
-                    interactive=False,
-                    visible=True,
-                    sources=(),
-                    image_mode="RGBA"
-                )
                 # with gr.Accordion(open=True):
                 #     fix_ex_kpts = gr.Examples(
                 #         fix_example_kpts,
@@ -2074,68 +2116,79 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
                 #         postprocess=False,
                 #         elem_id="kpts_examples"
                 #     )
-                with gr.Accordion("[Your own image] Manually give hand pose", open=False, elem_id="accordion_bold"):
-                    gr.Markdown(
-                        """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
-                    )
-                    fix_checkbox = gr.CheckboxGroup(
-                        ["Right hand", "Left hand"],
-                        show_label=False,
-                        interactive=False,
-                    )
-                    fix_kp_r_info = gr.Markdown(
-                        """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
-                        visible=False
-                    )
-                    fix_kp_right = gr.Image(
-                        type="numpy",
-                        label="Keypoint Selection (right hand)",
-                        show_label=True,
-                        height=LENGTH,
-                        width=LENGTH,
-                        interactive=False,
-                        visible=False,
-                        sources=[],
-                    )
-                    with gr.Row():
-                        fix_undo_right = gr.Button(
-                            value="Undo", interactive=False, visible=False
-                        )
-                        fix_reset_right = gr.Button(
-                            value="Reset", interactive=False, visible=False
-                        )
-                    fix_kp_l_info = gr.Markdown(
-                        """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
-                        visible=False
                     )
-                    fix_kp_left = gr.Image(
-                        type="numpy",
-                        label="Keypoint Selection (left hand)",
-                        show_label=True,
-                        height=LENGTH,
-                        width=LENGTH,
-                        interactive=False,
-                        visible=False,
-                        sources=[],
                     )
-                    with gr.Row():
-                        fix_undo_left = gr.Button(
-                            value="Undo", interactive=False, visible=False
-                        )
-                        fix_reset_left = gr.Button(
-                            value="Reset", interactive=False, visible=False
-                        )
-                    gr.Markdown(
-                        """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
                     )
-                    fix_openpose = gr.Image(
-                        value="openpose.png",
-                        type="numpy",
-                        show_label=False,
-                        height=LENGTH // 2,
-                        width=LENGTH // 2,
-                        interactive=False,
                     )
             # result column
             with gr.Column():

 from copy import deepcopy
 from huggingface_hub import hf_hub_download
 from gradio_toggle import Toggle
+import argparse
 try:
     import spaces
 except:
 MAX_N = 6
 FIX_MAX_N = 6
 LENGTH = 480
 placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
+parser = argparse.ArgumentParser()
+parser.add_argument("--not_hf", action="store_true", default=False)
+parser.add_argument("--old_model", action="store_true", default=False)
+parser.add_argument("--model_epoch", type=int, default=6)
+args = parser.parse_args()
+NEW_MODEL = not args.old_model
+MODEL_EPOCH = args.model_epoch
+HF = not args.not_hf
 pre_device = "cpu" if HF else "cuda"
 spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
 spaces_120_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
     # ckpt_state_dict = torch.load(model_path)['model_state_dict']
     ckpt_state_dict = torch.load(model_path, map_location='cpu')['ema_state_dict']
     missing_keys, extra_keys = model.load_state_dict(ckpt_state_dict, strict=False)
+    print(f"Loaded {model_path}")
     model = model.to(device)
     model.eval()
     print(missing_keys, extra_keys)
     print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
     print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
     assert len(missing_keys) == 0
+else:
+    opts = HandDiffOpts()
+    model_path = './finetune_epoch=5-step=130000.ckpt'
+    sd_path = './sd-v1-4.ckpt'
+    print('Load diffusion model...')
+    diffusion = create_diffusion(str(opts.test_sampling_steps))
+    model = vit.DiT_XL_2(
+        input_size=opts.latent_size[0],
+        latent_dim=opts.latent_dim,
+        in_channels=opts.latent_dim+opts.n_keypoints+opts.n_mask,
+        learn_sigma=True,
+    ).cuda()
+    ckpt_state_dict = torch.load(model_path)['state_dict']
+    print(f"Loaded {model_path}")
+    dit_state_dict = {remove_prefix(k, 'diffusion_backbone.'): v for k, v in ckpt_state_dict.items() if k.startswith('diffusion_backbone')}
+    vae_state_dict = {remove_prefix(k, 'autoencoder.'): v for k, v in ckpt_state_dict.items() if k.startswith('autoencoder')}
+    missing_keys, extra_keys = model.load_state_dict(dit_state_dict, strict=False)
+    model.eval()
+    assert len(missing_keys) == 0 and len(extra_keys) == 0
+    autoencoder = vqvae.create_model(3, 3, opts.latent_dim).eval().requires_grad_(False).cuda()
+    missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
+    autoencoder.eval()
+    assert len(missing_keys) == 0 and len(extra_keys) == 0
 sam_path = "sam_vit_h_4b8939.pth"
 if not os.path.exists(sam_path):
 no_hands_open = cv2.resize(np.array(Image.open("no_hands_open.jpeg"))[..., :3], (LENGTH, LENGTH))
 def prepare_anno(ref, ref_is_user):
+    print("inside prepare_anno")
     if not ref_is_user:  # no_hand_open.jpeg
         return gr.update(value=None), gr.update(value=None)
     if ref is None or ref["background"] is None or ref["background"].sum()==0:  # clear_all
 @spaces_60_fn
 def get_ref_anno(img, keypts, use_mask, use_pose):
+    print("inside get_ref_anno")
     no_mask, no_pose = not use_mask, not use_pose
     if img.sum() == 0:  # clear_all
         return None, gr.update(), None, gr.update(), True
     return img, ref_pose, ref_cond, gr.update(), True
 def get_target_anno(img, keypts):
+    print("inside get_target_anno")
     if img.sum() == 0:  # clear_all
         return None, gr.update(), None, gr.update(), True
     if keypts is None:  # hands not detected
     return img, target_pose, target_cond, keypts, gr.update(), True
 def visualize_ref(ref, ex_mask):
+    print("inside visualize_ref")
     if ref is None:
         return None
             cropped_vis = image.copy()
             cropped_vis[:,:,-1] = 255
         else:  # will add second click
+            x_length = new_coord[0] - crop_coord[0][0]
+            y_length = new_coord[1] - crop_coord[0][1]
+            if x_length > y_length:
+                new_coord[0] = crop_coord[0][0] + y_length
+            else:
+                new_coord[1] = crop_coord[0][1] + x_length
             crop_coord.append(new_coord)
             x1, y1 = crop_coord[0]
             x2, y2 = crop_coord[1]
     cond, auto_cond, manual_cond,
     keypts=None, auto_keypts=None, manual_keypts=None
 ):
+    print("inside flip_hand")
     if cond is None:  # clear clicked
         return
     img["composite"] = img["composite"][:, ::-1, :]
             manual_keypts[:21, 0] = opts.image_size[1] - manual_keypts[:21, 0]
         if manual_keypts[21:, :].sum() != 0:
             manual_keypts[21:, 0] = opts.image_size[1] - manual_keypts[21:, 0]
+    return img, img_raw, pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond, False, keypts, auto_keypts, manual_keypts
 def resize_to_full(img):
     img["background"] = cv2.resize(img["background"], (LENGTH, LENGTH))
 example_ref_imgs = [
     [
+        "sample_images/sample2.jpg", "sample_images/sample10.jpg"
     ],
     [
+        "sample_images/sample10.jpg", "sample_images/sample9.jpg"
     ],
     [
+        "sample_images/sample3.jpg", "sample_images/sample5.jpg"
     ],
     [
+        "sample_images/sample11.jpg", "sample_images/sample10.jpg"
     ],
     [
+        "sample_images/sample4.jpg", "pose_images/pose4.jpg"
     ],
 ]
 example_target_imgs = [
     [
+        "sample_images/sample10.jpg",
     ],
     [
         "sample_images/sample9.jpg",
     ],
+    [
+        "sample_images/sample5.jpg",
+    ],
     [
         "sample_images/sample10.jpg",
     ],
     [
+        "pose_images/pose4.jpg"
     ],
 ]
+# fix_example_imgs = [
+#     ["bad_hands/1.jpg"],
+#     ["bad_hands/3.jpg"],
+#     # ["bad_hands/4.jpg"],
+#     ["bad_hands/5.jpg"],
+#     ["bad_hands/6.jpg"],
+#     ["bad_hands/7.jpg"],
+# ]
+# fix_example_brush = [
+#     ["bad_hands/1_composite.png"],
+#     ["bad_hands/3_composite.png"],
+#     # ["bad_hands/4_composite.png"],
+#     ["bad_hands/5_composite.png"],
+#     ["bad_hands/6_composite.png"],
+#     ["bad_hands/7_composite.png"],
+# ]
+# fix_example_kpts = [
+#     ["bad_hands/1_kpts.png", 3.0, 1224],
+#     ["bad_hands/3_kpts.png", 1.0, 42],
+#     # ["bad_hands/4_kpts.png", 2.0, 42],
+#     ["bad_hands/5_kpts.png", 3.0, 42],
+#     ["bad_hands/6_kpts.png", 3.0, 1348],
+#     ["bad_hands/7_kpts.png", 3.0, 42],
+# ]
 fix_example_all = [
     ["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_mask.jpg", "bad_hands/1_kpts.png", 3.0, 1224],
     ["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_mask.jpg", "bad_hands/3_kpts.png", 1.0, 42],
+    # ["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_mask.jpg", "bad_hands/4_kpts.png", 2.0, 42],
     ["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_mask.jpg", "bad_hands/5_kpts.png", 3.0, 42],
     ["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_mask.jpg", "bad_hands/6_kpts.png", 3.0, 1348],
     ["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_mask.jpg", "bad_hands/7_kpts.png", 3.0, 42],
 ]
+# for i in range(len(fix_example_kpts)):
+#     npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
+#     fix_example_kpts[i].append(npy_path)
 for i in range(len(fix_example_all)):
     npy_path = fix_example_all[i][3].replace("_kpts.png", ".npy")
     fix_example_all[i].append(npy_path)
+# #fix_examples_all table tr:first-child {
+#     display: none !important;
+# }
 custom_css = """
 .gradio-container .examples img {
     width: 240px !important;
 #fix_examples_all table tr td:nth-child(7) {
     display: none !important;
 }
 #repose_tutorial video {
     width: 50% !important;
     display: block;
 #gradio-app {
     flex-direction: row; !important;
 }
+#example_ref_target {
+    display: block !important;
+    width: 66.6667% !important;
+    margin-left: 0 !important;
+    margin-right: auto !important;
+    align-self: flex-start !important;
+}
 """
 ##no_wrap_row {
     # display: flex !important;
         # config
         use_pose = gr.State(value=True)
+        # more options
+        with gr.Accordion(label="More options", open=False):
+            with gr.Row():
+                n_generation = gr.Slider(
+                    label="Number of generations",
+                    value=1,
+                    minimum=1,
+                    maximum=MAX_N,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                seed = gr.Slider(
+                    label="Seed",
+                    value=42,
+                    minimum=0,
+                    maximum=10000,
+                    step=1,
+                    randomize=False,
+                    interactive=True,
+                )
+                cfg = gr.Slider(
+                    label="Classifier free guidance scale",
+                    value=2.5,
+                    minimum=0.0,
+                    maximum=10.0,
+                    step=0.1,
+                    randomize=False,
+                    interactive=True,
+                )
         # main tabs
         with gr.Row():
             # ref column
                     layers=False,
                     crop_size="1:1",
                 )
                 use_mask = Toggle(label="Use mask", value=False, interactive=True)
                 with gr.Accordion(label="See hand pose & mask", open=False):
                     with gr.Tab("Automatic hand keypoints"):
                     layers=False,
                     crop_size="1:1",
                 )
+                # gr.Examples(example_target_imgs, [target], examples_per_page=20)
                 with gr.Accordion(label="See hand pose", open=False):
                     with gr.Tab("Automatic hand keypoints"):
                         target_pose = gr.Image(
                 # )
                 clear = gr.ClearButton(elem_id="clear_button")
+        with gr.Row():
+            gr.Examples(example_ref_imgs, [ref, target], examples_per_page=20, elem_id="example_ref_target")
         # tutorial video
         with gr.Accordion("Tutorial Video of Demo 1", elem_id="accordion_bold_large_center"):
         ref_flip.select(
             flip_hand,
             [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
+            [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond, ref_is_user]
         )
         # target listeners
         target_flip.select(
             flip_hand,
             [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
+            [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_is_user, target_keypts, target_auto_keypts, target_manual_keypts],
         )
         # run listerners
                 )
             with gr.Column():
                 gr.Markdown(
+                    """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Select area to fix <br>(⚠️and surrounding area)</p>"""
                 )
                 # gr.Markdown(
                 #     """<p style="text-align: center;">Don't brush the entire hand!</p>"""
                 # gr.Markdown(
                 #     """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
                 # )
                 # with gr.Accordion(open=True):
                 #     fix_ex_kpts = gr.Examples(
                 #         fix_example_kpts,
                 #         postprocess=False,
                 #         elem_id="kpts_examples"
                 #     )
+                # with gr.Accordion("[Your own image] Manually give hand pose", open=False, elem_id="accordion_bold"):
+                gr.Markdown(
+                    """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands, if it wasn't from Example</p>"""
+                )
+                fix_checkbox = gr.CheckboxGroup(
+                    ["Right hand", "Left hand"],
+                    show_label=False,
+                    interactive=False,
+                )
+                fix_kp_r_info = gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
+                    visible=False
+                )
+                fix_kp_right = gr.Image(
+                    type="numpy",
+                    label="Keypoint Selection (right hand)",
+                    show_label=True,
+                    height=LENGTH,
+                    width=LENGTH,
+                    interactive=False,
+                    visible=False,
+                    sources=[],
+                )
+                with gr.Row():
+                    fix_undo_right = gr.Button(
+                        value="Undo", interactive=False, visible=False
                     )
+                    fix_reset_right = gr.Button(
+                        value="Reset", interactive=False, visible=False
                     )
+                fix_kp_l_info = gr.Markdown(
+                    """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
+                    visible=False
+                )
+                fix_kp_left = gr.Image(
+                    type="numpy",
+                    label="Keypoint Selection (left hand)",
+                    show_label=True,
+                    height=LENGTH,
+                    width=LENGTH,
+                    interactive=False,
+                    visible=False,
+                    sources=[],
+                )
+                with gr.Row():
+                    fix_undo_left = gr.Button(
+                        value="Undo", interactive=False, visible=False
                     )
+                    fix_reset_left = gr.Button(
+                        value="Reset", interactive=False, visible=False
                     )
+                fix_kp_all = gr.Image(
+                    type="numpy",
+                    label="Keypoint Selection (from Example)",
+                    show_label=True,
+                    height=LENGTH,
+                    width=LENGTH,
+                    interactive=False,
+                    visible=True,
+                    sources=(),
+                    image_mode="RGBA"
+                )
+                gr.Markdown(
+                    """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
+                )
+                fix_openpose = gr.Image(
+                    value="openpose.png",
+                    type="numpy",
+                    show_label=False,
+                    height=LENGTH // 2,
+                    width=LENGTH // 2,
+                    interactive=False,
+                )
             # result column
             with gr.Column():

sample_images/sample11.jpg CHANGED Viewed

Git LFS Details

SHA256: e28d0f86e77b7c3c43069c308fade53628f361ca4fffa1679787b01b22f8625e
Pointer size: 130 Bytes
Size of remote file: 36.9 kB

Git LFS Details

SHA256: 38317316b9db4dc0641b972aab15ff0814525b34f4cbc5801e3e47925822d8b1
Pointer size: 130 Bytes
Size of remote file: 65.1 kB

sbatch/sbatch_demo.sh CHANGED Viewed

@@ -4,8 +4,10 @@
 #SBATCH -J demo_foundhand
 # partition
-#SBATCH --partition=ssrinath-gcondo --gres=gpu:1 --gres-flags=enforce-binding
-#SBATCH --account=ssrinath-gcondo
 # ensures all allocated cores are on the same node
 #SBATCH -N 1
@@ -35,4 +37,4 @@ conda activate handdiff
 cd $HOME/hdd/FoundHand_demo
 echo Directory is `pwd`
-python -u app.py

 #SBATCH -J demo_foundhand
 # partition
+#SBATCH -p 3090-gcondo --gres=gpu:1
+##SBATCH --partition=ssrinath-gcondo --gres=gpu:1 --gres-flags=enforce-binding
+##SBATCH --account=ssrinath-gcondo
 # ensures all allocated cores are on the same node
 #SBATCH -N 1
 cd $HOME/hdd/FoundHand_demo
 echo Directory is `pwd`
+python -u app.py --not_hf --model_epoch 4

sbatch/sbatch_demo2.sh CHANGED Viewed

@@ -37,4 +37,4 @@ conda activate handdiff
 cd $HOME/hdd/FoundHand_demo
 echo Directory is `pwd`
-python -u app.py

 cd $HOME/hdd/FoundHand_demo
 echo Directory is `pwd`
+python -u app.py --not_hf --model_epoch 6