Spaces:
Running
on
Zero
Running
on
Zero
multiple improvements
Browse files- app.py +201 -148
- sample_images/sample11.jpg +2 -2
- sbatch/sbatch_demo.sh +5 -3
- sbatch/sbatch_demo2.sh +1 -1
app.py
CHANGED
@@ -19,6 +19,7 @@ import random
|
|
19 |
from copy import deepcopy
|
20 |
from huggingface_hub import hf_hub_download
|
21 |
from gradio_toggle import Toggle
|
|
|
22 |
try:
|
23 |
import spaces
|
24 |
except:
|
@@ -27,11 +28,17 @@ except:
|
|
27 |
MAX_N = 6
|
28 |
FIX_MAX_N = 6
|
29 |
LENGTH = 480
|
30 |
-
|
31 |
placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
pre_device = "cpu" if HF else "cuda"
|
36 |
spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
|
37 |
spaces_120_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
|
@@ -214,6 +221,7 @@ if NEW_MODEL:
|
|
214 |
# ckpt_state_dict = torch.load(model_path)['model_state_dict']
|
215 |
ckpt_state_dict = torch.load(model_path, map_location='cpu')['ema_state_dict']
|
216 |
missing_keys, extra_keys = model.load_state_dict(ckpt_state_dict, strict=False)
|
|
|
217 |
model = model.to(device)
|
218 |
model.eval()
|
219 |
print(missing_keys, extra_keys)
|
@@ -233,6 +241,29 @@ if NEW_MODEL:
|
|
233 |
print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
234 |
print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
|
235 |
assert len(missing_keys) == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
sam_path = "sam_vit_h_4b8939.pth"
|
238 |
if not os.path.exists(sam_path):
|
@@ -249,6 +280,7 @@ hands = mp_hands.Hands(
|
|
249 |
no_hands_open = cv2.resize(np.array(Image.open("no_hands_open.jpeg"))[..., :3], (LENGTH, LENGTH))
|
250 |
|
251 |
def prepare_anno(ref, ref_is_user):
|
|
|
252 |
if not ref_is_user: # no_hand_open.jpeg
|
253 |
return gr.update(value=None), gr.update(value=None)
|
254 |
if ref is None or ref["background"] is None or ref["background"].sum()==0: # clear_all
|
@@ -284,6 +316,7 @@ def prepare_anno(ref, ref_is_user):
|
|
284 |
|
285 |
@spaces_60_fn
|
286 |
def get_ref_anno(img, keypts, use_mask, use_pose):
|
|
|
287 |
no_mask, no_pose = not use_mask, not use_pose
|
288 |
if img.sum() == 0: # clear_all
|
289 |
return None, gr.update(), None, gr.update(), True
|
@@ -407,6 +440,7 @@ def get_ref_anno(img, keypts, use_mask, use_pose):
|
|
407 |
return img, ref_pose, ref_cond, gr.update(), True
|
408 |
|
409 |
def get_target_anno(img, keypts):
|
|
|
410 |
if img.sum() == 0: # clear_all
|
411 |
return None, gr.update(), None, gr.update(), True
|
412 |
if keypts is None: # hands not detected
|
@@ -447,6 +481,7 @@ def get_target_anno(img, keypts):
|
|
447 |
return img, target_pose, target_cond, keypts, gr.update(), True
|
448 |
|
449 |
def visualize_ref(ref, ex_mask):
|
|
|
450 |
if ref is None:
|
451 |
return None
|
452 |
|
@@ -598,6 +633,12 @@ def process_crop(img, crop_coord, evt:gr.SelectData):
|
|
598 |
cropped_vis = image.copy()
|
599 |
cropped_vis[:,:,-1] = 255
|
600 |
else: # will add second click
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
crop_coord.append(new_coord)
|
602 |
x1, y1 = crop_coord[0]
|
603 |
x2, y2 = crop_coord[1]
|
@@ -889,6 +930,7 @@ def flip_hand(
|
|
889 |
cond, auto_cond, manual_cond,
|
890 |
keypts=None, auto_keypts=None, manual_keypts=None
|
891 |
):
|
|
|
892 |
if cond is None: # clear clicked
|
893 |
return
|
894 |
img["composite"] = img["composite"][:, ::-1, :]
|
@@ -923,7 +965,7 @@ def flip_hand(
|
|
923 |
manual_keypts[:21, 0] = opts.image_size[1] - manual_keypts[:21, 0]
|
924 |
if manual_keypts[21:, :].sum() != 0:
|
925 |
manual_keypts[21:, 0] = opts.image_size[1] - manual_keypts[21:, 0]
|
926 |
-
return img, img_raw, pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond, keypts, auto_keypts, manual_keypts
|
927 |
|
928 |
def resize_to_full(img):
|
929 |
img["background"] = cv2.resize(img["background"], (LENGTH, LENGTH))
|
@@ -1117,75 +1159,80 @@ def unvisible_component(decider, component):
|
|
1117 |
|
1118 |
example_ref_imgs = [
|
1119 |
[
|
1120 |
-
"sample_images/
|
1121 |
],
|
1122 |
[
|
1123 |
-
"sample_images/
|
1124 |
],
|
1125 |
[
|
1126 |
-
"sample_images/sample3.jpg",
|
1127 |
],
|
1128 |
[
|
1129 |
-
"sample_images/
|
1130 |
],
|
1131 |
[
|
1132 |
-
"sample_images/
|
1133 |
],
|
1134 |
]
|
1135 |
example_target_imgs = [
|
1136 |
[
|
1137 |
-
"sample_images/
|
1138 |
],
|
1139 |
[
|
1140 |
"sample_images/sample9.jpg",
|
1141 |
],
|
|
|
|
|
|
|
1142 |
[
|
1143 |
"sample_images/sample10.jpg",
|
1144 |
],
|
1145 |
[
|
1146 |
-
"
|
1147 |
],
|
1148 |
-
["pose_images/pose1.jpg"],
|
1149 |
-
]
|
1150 |
-
fix_example_imgs = [
|
1151 |
-
["bad_hands/1.jpg"],
|
1152 |
-
["bad_hands/3.jpg"],
|
1153 |
-
["bad_hands/4.jpg"],
|
1154 |
-
["bad_hands/5.jpg"],
|
1155 |
-
["bad_hands/6.jpg"],
|
1156 |
-
["bad_hands/7.jpg"],
|
1157 |
-
]
|
1158 |
-
fix_example_brush = [
|
1159 |
-
["bad_hands/1_composite.png"],
|
1160 |
-
["bad_hands/3_composite.png"],
|
1161 |
-
["bad_hands/4_composite.png"],
|
1162 |
-
["bad_hands/5_composite.png"],
|
1163 |
-
["bad_hands/6_composite.png"],
|
1164 |
-
["bad_hands/7_composite.png"],
|
1165 |
-
]
|
1166 |
-
fix_example_kpts = [
|
1167 |
-
["bad_hands/1_kpts.png", 3.0, 1224],
|
1168 |
-
["bad_hands/3_kpts.png", 1.0, 42],
|
1169 |
-
["bad_hands/4_kpts.png", 2.0, 42],
|
1170 |
-
["bad_hands/5_kpts.png", 3.0, 42],
|
1171 |
-
["bad_hands/6_kpts.png", 3.0, 1348],
|
1172 |
-
["bad_hands/7_kpts.png", 3.0, 42],
|
1173 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1174 |
fix_example_all = [
|
1175 |
["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_mask.jpg", "bad_hands/1_kpts.png", 3.0, 1224],
|
1176 |
["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_mask.jpg", "bad_hands/3_kpts.png", 1.0, 42],
|
1177 |
-
["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_mask.jpg", "bad_hands/4_kpts.png", 2.0, 42],
|
1178 |
["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_mask.jpg", "bad_hands/5_kpts.png", 3.0, 42],
|
1179 |
["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_mask.jpg", "bad_hands/6_kpts.png", 3.0, 1348],
|
1180 |
["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_mask.jpg", "bad_hands/7_kpts.png", 3.0, 42],
|
1181 |
]
|
1182 |
-
for i in range(len(fix_example_kpts)):
|
1183 |
-
|
1184 |
-
|
1185 |
for i in range(len(fix_example_all)):
|
1186 |
npy_path = fix_example_all[i][3].replace("_kpts.png", ".npy")
|
1187 |
fix_example_all[i].append(npy_path)
|
1188 |
|
|
|
|
|
|
|
1189 |
custom_css = """
|
1190 |
.gradio-container .examples img {
|
1191 |
width: 240px !important;
|
@@ -1237,9 +1284,6 @@ custom_css = """
|
|
1237 |
#fix_examples_all table tr td:nth-child(7) {
|
1238 |
display: none !important;
|
1239 |
}
|
1240 |
-
#fix_examples_all table tr:first-child {
|
1241 |
-
display: none !important;
|
1242 |
-
}
|
1243 |
#repose_tutorial video {
|
1244 |
width: 50% !important;
|
1245 |
display: block;
|
@@ -1280,6 +1324,13 @@ custom_css = """
|
|
1280 |
#gradio-app {
|
1281 |
flex-direction: row; !important;
|
1282 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1283 |
"""
|
1284 |
##no_wrap_row {
|
1285 |
# display: flex !important;
|
@@ -1411,6 +1462,37 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1411 |
# config
|
1412 |
use_pose = gr.State(value=True)
|
1413 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1414 |
# main tabs
|
1415 |
with gr.Row():
|
1416 |
# ref column
|
@@ -1431,7 +1513,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1431 |
layers=False,
|
1432 |
crop_size="1:1",
|
1433 |
)
|
1434 |
-
gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
|
1435 |
use_mask = Toggle(label="Use mask", value=False, interactive=True)
|
1436 |
with gr.Accordion(label="See hand pose & mask", open=False):
|
1437 |
with gr.Tab("Automatic hand keypoints"):
|
@@ -1550,7 +1631,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1550 |
layers=False,
|
1551 |
crop_size="1:1",
|
1552 |
)
|
1553 |
-
gr.Examples(example_target_imgs, [target], examples_per_page=20)
|
1554 |
with gr.Accordion(label="See hand pose", open=False):
|
1555 |
with gr.Tab("Automatic hand keypoints"):
|
1556 |
target_pose = gr.Image(
|
@@ -1685,36 +1766,8 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1685 |
# )
|
1686 |
clear = gr.ClearButton(elem_id="clear_button")
|
1687 |
|
1688 |
-
|
1689 |
-
|
1690 |
-
with gr.Row():
|
1691 |
-
n_generation = gr.Slider(
|
1692 |
-
label="Number of generations",
|
1693 |
-
value=1,
|
1694 |
-
minimum=1,
|
1695 |
-
maximum=MAX_N,
|
1696 |
-
step=1,
|
1697 |
-
randomize=False,
|
1698 |
-
interactive=True,
|
1699 |
-
)
|
1700 |
-
seed = gr.Slider(
|
1701 |
-
label="Seed",
|
1702 |
-
value=42,
|
1703 |
-
minimum=0,
|
1704 |
-
maximum=10000,
|
1705 |
-
step=1,
|
1706 |
-
randomize=False,
|
1707 |
-
interactive=True,
|
1708 |
-
)
|
1709 |
-
cfg = gr.Slider(
|
1710 |
-
label="Classifier free guidance scale",
|
1711 |
-
value=2.5,
|
1712 |
-
minimum=0.0,
|
1713 |
-
maximum=10.0,
|
1714 |
-
step=0.1,
|
1715 |
-
randomize=False,
|
1716 |
-
interactive=True,
|
1717 |
-
)
|
1718 |
|
1719 |
# tutorial video
|
1720 |
with gr.Accordion("Tutorial Video of Demo 1", elem_id="accordion_bold_large_center"):
|
@@ -1791,7 +1844,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1791 |
ref_flip.select(
|
1792 |
flip_hand,
|
1793 |
[ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
|
1794 |
-
[ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond]
|
1795 |
)
|
1796 |
|
1797 |
# target listeners
|
@@ -1858,7 +1911,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
1858 |
target_flip.select(
|
1859 |
flip_hand,
|
1860 |
[target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
|
1861 |
-
[target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
|
1862 |
)
|
1863 |
|
1864 |
# run listerners
|
@@ -2020,7 +2073,7 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
2020 |
)
|
2021 |
with gr.Column():
|
2022 |
gr.Markdown(
|
2023 |
-
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2.
|
2024 |
)
|
2025 |
# gr.Markdown(
|
2026 |
# """<p style="text-align: center;">Don't brush the entire hand!</p>"""
|
@@ -2055,17 +2108,6 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
2055 |
# gr.Markdown(
|
2056 |
# """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
|
2057 |
# )
|
2058 |
-
fix_kp_all = gr.Image(
|
2059 |
-
type="numpy",
|
2060 |
-
label="Target Hand Pose",
|
2061 |
-
show_label=False,
|
2062 |
-
height=LENGTH,
|
2063 |
-
width=LENGTH,
|
2064 |
-
interactive=False,
|
2065 |
-
visible=True,
|
2066 |
-
sources=(),
|
2067 |
-
image_mode="RGBA"
|
2068 |
-
)
|
2069 |
# with gr.Accordion(open=True):
|
2070 |
# fix_ex_kpts = gr.Examples(
|
2071 |
# fix_example_kpts,
|
@@ -2074,68 +2116,79 @@ with gr.Blocks(css=custom_css, theme="soft") as demo:
|
|
2074 |
# postprocess=False,
|
2075 |
# elem_id="kpts_examples"
|
2076 |
# )
|
2077 |
-
with gr.Accordion("[Your own image] Manually give hand pose", open=False, elem_id="accordion_bold"):
|
2078 |
-
|
2079 |
-
|
2080 |
-
|
2081 |
-
|
2082 |
-
|
2083 |
-
|
2084 |
-
|
2085 |
-
|
2086 |
-
|
2087 |
-
|
2088 |
-
|
2089 |
-
|
2090 |
-
|
2091 |
-
|
2092 |
-
|
2093 |
-
|
2094 |
-
|
2095 |
-
|
2096 |
-
|
2097 |
-
|
2098 |
-
|
2099 |
-
|
2100 |
-
|
2101 |
-
|
2102 |
-
|
2103 |
-
)
|
2104 |
-
fix_reset_right = gr.Button(
|
2105 |
-
value="Reset", interactive=False, visible=False
|
2106 |
-
)
|
2107 |
-
fix_kp_l_info = gr.Markdown(
|
2108 |
-
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
2109 |
-
visible=False
|
2110 |
)
|
2111 |
-
|
2112 |
-
|
2113 |
-
label="Keypoint Selection (left hand)",
|
2114 |
-
show_label=True,
|
2115 |
-
height=LENGTH,
|
2116 |
-
width=LENGTH,
|
2117 |
-
interactive=False,
|
2118 |
-
visible=False,
|
2119 |
-
sources=[],
|
2120 |
)
|
2121 |
-
|
2122 |
-
|
2123 |
-
|
2124 |
-
|
2125 |
-
|
2126 |
-
|
2127 |
-
|
2128 |
-
|
2129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2130 |
)
|
2131 |
-
|
2132 |
-
value="
|
2133 |
-
type="numpy",
|
2134 |
-
show_label=False,
|
2135 |
-
height=LENGTH // 2,
|
2136 |
-
width=LENGTH // 2,
|
2137 |
-
interactive=False,
|
2138 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2139 |
|
2140 |
# result column
|
2141 |
with gr.Column():
|
|
|
19 |
from copy import deepcopy
|
20 |
from huggingface_hub import hf_hub_download
|
21 |
from gradio_toggle import Toggle
|
22 |
+
import argparse
|
23 |
try:
|
24 |
import spaces
|
25 |
except:
|
|
|
28 |
MAX_N = 6
|
29 |
FIX_MAX_N = 6
|
30 |
LENGTH = 480
|
|
|
31 |
placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
|
32 |
+
|
33 |
+
parser = argparse.ArgumentParser()
|
34 |
+
parser.add_argument("--not_hf", action="store_true", default=False)
|
35 |
+
parser.add_argument("--old_model", action="store_true", default=False)
|
36 |
+
parser.add_argument("--model_epoch", type=int, default=6)
|
37 |
+
args = parser.parse_args()
|
38 |
+
NEW_MODEL = not args.old_model
|
39 |
+
MODEL_EPOCH = args.model_epoch
|
40 |
+
HF = not args.not_hf
|
41 |
+
|
42 |
pre_device = "cpu" if HF else "cuda"
|
43 |
spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
|
44 |
spaces_120_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
|
|
|
221 |
# ckpt_state_dict = torch.load(model_path)['model_state_dict']
|
222 |
ckpt_state_dict = torch.load(model_path, map_location='cpu')['ema_state_dict']
|
223 |
missing_keys, extra_keys = model.load_state_dict(ckpt_state_dict, strict=False)
|
224 |
+
print(f"Loaded {model_path}")
|
225 |
model = model.to(device)
|
226 |
model.eval()
|
227 |
print(missing_keys, extra_keys)
|
|
|
241 |
print(f"encoder after eval() max: {max([p.max() for p in autoencoder.encoder.parameters()])}")
|
242 |
print(f"autoencoder encoder after eval() dtype: {next(autoencoder.encoder.parameters()).dtype}")
|
243 |
assert len(missing_keys) == 0
|
244 |
+
else:
|
245 |
+
opts = HandDiffOpts()
|
246 |
+
model_path = './finetune_epoch=5-step=130000.ckpt'
|
247 |
+
sd_path = './sd-v1-4.ckpt'
|
248 |
+
print('Load diffusion model...')
|
249 |
+
diffusion = create_diffusion(str(opts.test_sampling_steps))
|
250 |
+
model = vit.DiT_XL_2(
|
251 |
+
input_size=opts.latent_size[0],
|
252 |
+
latent_dim=opts.latent_dim,
|
253 |
+
in_channels=opts.latent_dim+opts.n_keypoints+opts.n_mask,
|
254 |
+
learn_sigma=True,
|
255 |
+
).cuda()
|
256 |
+
ckpt_state_dict = torch.load(model_path)['state_dict']
|
257 |
+
print(f"Loaded {model_path}")
|
258 |
+
dit_state_dict = {remove_prefix(k, 'diffusion_backbone.'): v for k, v in ckpt_state_dict.items() if k.startswith('diffusion_backbone')}
|
259 |
+
vae_state_dict = {remove_prefix(k, 'autoencoder.'): v for k, v in ckpt_state_dict.items() if k.startswith('autoencoder')}
|
260 |
+
missing_keys, extra_keys = model.load_state_dict(dit_state_dict, strict=False)
|
261 |
+
model.eval()
|
262 |
+
assert len(missing_keys) == 0 and len(extra_keys) == 0
|
263 |
+
autoencoder = vqvae.create_model(3, 3, opts.latent_dim).eval().requires_grad_(False).cuda()
|
264 |
+
missing_keys, extra_keys = autoencoder.load_state_dict(vae_state_dict, strict=False)
|
265 |
+
autoencoder.eval()
|
266 |
+
assert len(missing_keys) == 0 and len(extra_keys) == 0
|
267 |
|
268 |
sam_path = "sam_vit_h_4b8939.pth"
|
269 |
if not os.path.exists(sam_path):
|
|
|
280 |
no_hands_open = cv2.resize(np.array(Image.open("no_hands_open.jpeg"))[..., :3], (LENGTH, LENGTH))
|
281 |
|
282 |
def prepare_anno(ref, ref_is_user):
|
283 |
+
print("inside prepare_anno")
|
284 |
if not ref_is_user: # no_hand_open.jpeg
|
285 |
return gr.update(value=None), gr.update(value=None)
|
286 |
if ref is None or ref["background"] is None or ref["background"].sum()==0: # clear_all
|
|
|
316 |
|
317 |
@spaces_60_fn
|
318 |
def get_ref_anno(img, keypts, use_mask, use_pose):
|
319 |
+
print("inside get_ref_anno")
|
320 |
no_mask, no_pose = not use_mask, not use_pose
|
321 |
if img.sum() == 0: # clear_all
|
322 |
return None, gr.update(), None, gr.update(), True
|
|
|
440 |
return img, ref_pose, ref_cond, gr.update(), True
|
441 |
|
442 |
def get_target_anno(img, keypts):
|
443 |
+
print("inside get_target_anno")
|
444 |
if img.sum() == 0: # clear_all
|
445 |
return None, gr.update(), None, gr.update(), True
|
446 |
if keypts is None: # hands not detected
|
|
|
481 |
return img, target_pose, target_cond, keypts, gr.update(), True
|
482 |
|
483 |
def visualize_ref(ref, ex_mask):
|
484 |
+
print("inside visualize_ref")
|
485 |
if ref is None:
|
486 |
return None
|
487 |
|
|
|
633 |
cropped_vis = image.copy()
|
634 |
cropped_vis[:,:,-1] = 255
|
635 |
else: # will add second click
|
636 |
+
x_length = new_coord[0] - crop_coord[0][0]
|
637 |
+
y_length = new_coord[1] - crop_coord[0][1]
|
638 |
+
if x_length > y_length:
|
639 |
+
new_coord[0] = crop_coord[0][0] + y_length
|
640 |
+
else:
|
641 |
+
new_coord[1] = crop_coord[0][1] + x_length
|
642 |
crop_coord.append(new_coord)
|
643 |
x1, y1 = crop_coord[0]
|
644 |
x2, y2 = crop_coord[1]
|
|
|
930 |
cond, auto_cond, manual_cond,
|
931 |
keypts=None, auto_keypts=None, manual_keypts=None
|
932 |
):
|
933 |
+
print("inside flip_hand")
|
934 |
if cond is None: # clear clicked
|
935 |
return
|
936 |
img["composite"] = img["composite"][:, ::-1, :]
|
|
|
965 |
manual_keypts[:21, 0] = opts.image_size[1] - manual_keypts[:21, 0]
|
966 |
if manual_keypts[21:, :].sum() != 0:
|
967 |
manual_keypts[21:, 0] = opts.image_size[1] - manual_keypts[21:, 0]
|
968 |
+
return img, img_raw, pose_img, pose_manual_img, manual_kp_right, manual_kp_left, cond, auto_cond, manual_cond, False, keypts, auto_keypts, manual_keypts
|
969 |
|
970 |
def resize_to_full(img):
|
971 |
img["background"] = cv2.resize(img["background"], (LENGTH, LENGTH))
|
|
|
1159 |
|
1160 |
example_ref_imgs = [
|
1161 |
[
|
1162 |
+
"sample_images/sample2.jpg", "sample_images/sample10.jpg"
|
1163 |
],
|
1164 |
[
|
1165 |
+
"sample_images/sample10.jpg", "sample_images/sample9.jpg"
|
1166 |
],
|
1167 |
[
|
1168 |
+
"sample_images/sample3.jpg", "sample_images/sample5.jpg"
|
1169 |
],
|
1170 |
[
|
1171 |
+
"sample_images/sample11.jpg", "sample_images/sample10.jpg"
|
1172 |
],
|
1173 |
[
|
1174 |
+
"sample_images/sample4.jpg", "pose_images/pose4.jpg"
|
1175 |
],
|
1176 |
]
|
1177 |
example_target_imgs = [
|
1178 |
[
|
1179 |
+
"sample_images/sample10.jpg",
|
1180 |
],
|
1181 |
[
|
1182 |
"sample_images/sample9.jpg",
|
1183 |
],
|
1184 |
+
[
|
1185 |
+
"sample_images/sample5.jpg",
|
1186 |
+
],
|
1187 |
[
|
1188 |
"sample_images/sample10.jpg",
|
1189 |
],
|
1190 |
[
|
1191 |
+
"pose_images/pose4.jpg"
|
1192 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1193 |
]
|
1194 |
+
# fix_example_imgs = [
|
1195 |
+
# ["bad_hands/1.jpg"],
|
1196 |
+
# ["bad_hands/3.jpg"],
|
1197 |
+
# # ["bad_hands/4.jpg"],
|
1198 |
+
# ["bad_hands/5.jpg"],
|
1199 |
+
# ["bad_hands/6.jpg"],
|
1200 |
+
# ["bad_hands/7.jpg"],
|
1201 |
+
# ]
|
1202 |
+
# fix_example_brush = [
|
1203 |
+
# ["bad_hands/1_composite.png"],
|
1204 |
+
# ["bad_hands/3_composite.png"],
|
1205 |
+
# # ["bad_hands/4_composite.png"],
|
1206 |
+
# ["bad_hands/5_composite.png"],
|
1207 |
+
# ["bad_hands/6_composite.png"],
|
1208 |
+
# ["bad_hands/7_composite.png"],
|
1209 |
+
# ]
|
1210 |
+
# fix_example_kpts = [
|
1211 |
+
# ["bad_hands/1_kpts.png", 3.0, 1224],
|
1212 |
+
# ["bad_hands/3_kpts.png", 1.0, 42],
|
1213 |
+
# # ["bad_hands/4_kpts.png", 2.0, 42],
|
1214 |
+
# ["bad_hands/5_kpts.png", 3.0, 42],
|
1215 |
+
# ["bad_hands/6_kpts.png", 3.0, 1348],
|
1216 |
+
# ["bad_hands/7_kpts.png", 3.0, 42],
|
1217 |
+
# ]
|
1218 |
fix_example_all = [
|
1219 |
["bad_hands/1.jpg", "bad_hands/1_composite.png", "bad_hands/1_mask.jpg", "bad_hands/1_kpts.png", 3.0, 1224],
|
1220 |
["bad_hands/3.jpg", "bad_hands/3_composite.png", "bad_hands/3_mask.jpg", "bad_hands/3_kpts.png", 1.0, 42],
|
1221 |
+
# ["bad_hands/4.jpg", "bad_hands/4_composite.png", "bad_hands/4_mask.jpg", "bad_hands/4_kpts.png", 2.0, 42],
|
1222 |
["bad_hands/5.jpg", "bad_hands/5_composite.png", "bad_hands/5_mask.jpg", "bad_hands/5_kpts.png", 3.0, 42],
|
1223 |
["bad_hands/6.jpg", "bad_hands/6_composite.png", "bad_hands/6_mask.jpg", "bad_hands/6_kpts.png", 3.0, 1348],
|
1224 |
["bad_hands/7.jpg", "bad_hands/7_composite.png", "bad_hands/7_mask.jpg", "bad_hands/7_kpts.png", 3.0, 42],
|
1225 |
]
|
1226 |
+
# for i in range(len(fix_example_kpts)):
|
1227 |
+
# npy_path = fix_example_kpts[i][0].replace("_kpts.png", ".npy")
|
1228 |
+
# fix_example_kpts[i].append(npy_path)
|
1229 |
for i in range(len(fix_example_all)):
|
1230 |
npy_path = fix_example_all[i][3].replace("_kpts.png", ".npy")
|
1231 |
fix_example_all[i].append(npy_path)
|
1232 |
|
1233 |
+
# #fix_examples_all table tr:first-child {
|
1234 |
+
# display: none !important;
|
1235 |
+
# }
|
1236 |
custom_css = """
|
1237 |
.gradio-container .examples img {
|
1238 |
width: 240px !important;
|
|
|
1284 |
#fix_examples_all table tr td:nth-child(7) {
|
1285 |
display: none !important;
|
1286 |
}
|
|
|
|
|
|
|
1287 |
#repose_tutorial video {
|
1288 |
width: 50% !important;
|
1289 |
display: block;
|
|
|
1324 |
#gradio-app {
|
1325 |
flex-direction: row; !important;
|
1326 |
}
|
1327 |
+
#example_ref_target {
|
1328 |
+
display: block !important;
|
1329 |
+
width: 66.6667% !important;
|
1330 |
+
margin-left: 0 !important;
|
1331 |
+
margin-right: auto !important;
|
1332 |
+
align-self: flex-start !important;
|
1333 |
+
}
|
1334 |
"""
|
1335 |
##no_wrap_row {
|
1336 |
# display: flex !important;
|
|
|
1462 |
# config
|
1463 |
use_pose = gr.State(value=True)
|
1464 |
|
1465 |
+
# more options
|
1466 |
+
with gr.Accordion(label="More options", open=False):
|
1467 |
+
with gr.Row():
|
1468 |
+
n_generation = gr.Slider(
|
1469 |
+
label="Number of generations",
|
1470 |
+
value=1,
|
1471 |
+
minimum=1,
|
1472 |
+
maximum=MAX_N,
|
1473 |
+
step=1,
|
1474 |
+
randomize=False,
|
1475 |
+
interactive=True,
|
1476 |
+
)
|
1477 |
+
seed = gr.Slider(
|
1478 |
+
label="Seed",
|
1479 |
+
value=42,
|
1480 |
+
minimum=0,
|
1481 |
+
maximum=10000,
|
1482 |
+
step=1,
|
1483 |
+
randomize=False,
|
1484 |
+
interactive=True,
|
1485 |
+
)
|
1486 |
+
cfg = gr.Slider(
|
1487 |
+
label="Classifier free guidance scale",
|
1488 |
+
value=2.5,
|
1489 |
+
minimum=0.0,
|
1490 |
+
maximum=10.0,
|
1491 |
+
step=0.1,
|
1492 |
+
randomize=False,
|
1493 |
+
interactive=True,
|
1494 |
+
)
|
1495 |
+
|
1496 |
# main tabs
|
1497 |
with gr.Row():
|
1498 |
# ref column
|
|
|
1513 |
layers=False,
|
1514 |
crop_size="1:1",
|
1515 |
)
|
|
|
1516 |
use_mask = Toggle(label="Use mask", value=False, interactive=True)
|
1517 |
with gr.Accordion(label="See hand pose & mask", open=False):
|
1518 |
with gr.Tab("Automatic hand keypoints"):
|
|
|
1631 |
layers=False,
|
1632 |
crop_size="1:1",
|
1633 |
)
|
1634 |
+
# gr.Examples(example_target_imgs, [target], examples_per_page=20)
|
1635 |
with gr.Accordion(label="See hand pose", open=False):
|
1636 |
with gr.Tab("Automatic hand keypoints"):
|
1637 |
target_pose = gr.Image(
|
|
|
1766 |
# )
|
1767 |
clear = gr.ClearButton(elem_id="clear_button")
|
1768 |
|
1769 |
+
with gr.Row():
|
1770 |
+
gr.Examples(example_ref_imgs, [ref, target], examples_per_page=20, elem_id="example_ref_target")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1771 |
|
1772 |
# tutorial video
|
1773 |
with gr.Accordion("Tutorial Video of Demo 1", elem_id="accordion_bold_large_center"):
|
|
|
1844 |
ref_flip.select(
|
1845 |
flip_hand,
|
1846 |
[ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
|
1847 |
+
[ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond, ref_is_user]
|
1848 |
)
|
1849 |
|
1850 |
# target listeners
|
|
|
1911 |
target_flip.select(
|
1912 |
flip_hand,
|
1913 |
[target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
|
1914 |
+
[target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_is_user, target_keypts, target_auto_keypts, target_manual_keypts],
|
1915 |
)
|
1916 |
|
1917 |
# run listerners
|
|
|
2073 |
)
|
2074 |
with gr.Column():
|
2075 |
gr.Markdown(
|
2076 |
+
"""<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Select area to fix <br>(⚠️and surrounding area)</p>"""
|
2077 |
)
|
2078 |
# gr.Markdown(
|
2079 |
# """<p style="text-align: center;">Don't brush the entire hand!</p>"""
|
|
|
2108 |
# gr.Markdown(
|
2109 |
# """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
|
2110 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2111 |
# with gr.Accordion(open=True):
|
2112 |
# fix_ex_kpts = gr.Examples(
|
2113 |
# fix_example_kpts,
|
|
|
2116 |
# postprocess=False,
|
2117 |
# elem_id="kpts_examples"
|
2118 |
# )
|
2119 |
+
# with gr.Accordion("[Your own image] Manually give hand pose", open=False, elem_id="accordion_bold"):
|
2120 |
+
gr.Markdown(
|
2121 |
+
"""<p style="text-align: center;">① Tell us if this is right, left, or both hands, if it wasn't from Example</p>"""
|
2122 |
+
)
|
2123 |
+
fix_checkbox = gr.CheckboxGroup(
|
2124 |
+
["Right hand", "Left hand"],
|
2125 |
+
show_label=False,
|
2126 |
+
interactive=False,
|
2127 |
+
)
|
2128 |
+
fix_kp_r_info = gr.Markdown(
|
2129 |
+
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
2130 |
+
visible=False
|
2131 |
+
)
|
2132 |
+
fix_kp_right = gr.Image(
|
2133 |
+
type="numpy",
|
2134 |
+
label="Keypoint Selection (right hand)",
|
2135 |
+
show_label=True,
|
2136 |
+
height=LENGTH,
|
2137 |
+
width=LENGTH,
|
2138 |
+
interactive=False,
|
2139 |
+
visible=False,
|
2140 |
+
sources=[],
|
2141 |
+
)
|
2142 |
+
with gr.Row():
|
2143 |
+
fix_undo_right = gr.Button(
|
2144 |
+
value="Undo", interactive=False, visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2145 |
)
|
2146 |
+
fix_reset_right = gr.Button(
|
2147 |
+
value="Reset", interactive=False, visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2148 |
)
|
2149 |
+
fix_kp_l_info = gr.Markdown(
|
2150 |
+
"""<p style="text-align: center;">② Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
|
2151 |
+
visible=False
|
2152 |
+
)
|
2153 |
+
fix_kp_left = gr.Image(
|
2154 |
+
type="numpy",
|
2155 |
+
label="Keypoint Selection (left hand)",
|
2156 |
+
show_label=True,
|
2157 |
+
height=LENGTH,
|
2158 |
+
width=LENGTH,
|
2159 |
+
interactive=False,
|
2160 |
+
visible=False,
|
2161 |
+
sources=[],
|
2162 |
+
)
|
2163 |
+
with gr.Row():
|
2164 |
+
fix_undo_left = gr.Button(
|
2165 |
+
value="Undo", interactive=False, visible=False
|
2166 |
)
|
2167 |
+
fix_reset_left = gr.Button(
|
2168 |
+
value="Reset", interactive=False, visible=False
|
|
|
|
|
|
|
|
|
|
|
2169 |
)
|
2170 |
+
fix_kp_all = gr.Image(
|
2171 |
+
type="numpy",
|
2172 |
+
label="Keypoint Selection (from Example)",
|
2173 |
+
show_label=True,
|
2174 |
+
height=LENGTH,
|
2175 |
+
width=LENGTH,
|
2176 |
+
interactive=False,
|
2177 |
+
visible=True,
|
2178 |
+
sources=(),
|
2179 |
+
image_mode="RGBA"
|
2180 |
+
)
|
2181 |
+
gr.Markdown(
|
2182 |
+
"""<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
|
2183 |
+
)
|
2184 |
+
fix_openpose = gr.Image(
|
2185 |
+
value="openpose.png",
|
2186 |
+
type="numpy",
|
2187 |
+
show_label=False,
|
2188 |
+
height=LENGTH // 2,
|
2189 |
+
width=LENGTH // 2,
|
2190 |
+
interactive=False,
|
2191 |
+
)
|
2192 |
|
2193 |
# result column
|
2194 |
with gr.Column():
|
sample_images/sample11.jpg
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
sbatch/sbatch_demo.sh
CHANGED
@@ -4,8 +4,10 @@
|
|
4 |
#SBATCH -J demo_foundhand
|
5 |
|
6 |
# partition
|
7 |
-
#SBATCH
|
8 |
-
|
|
|
|
|
9 |
|
10 |
# ensures all allocated cores are on the same node
|
11 |
#SBATCH -N 1
|
@@ -35,4 +37,4 @@ conda activate handdiff
|
|
35 |
cd $HOME/hdd/FoundHand_demo
|
36 |
echo Directory is `pwd`
|
37 |
|
38 |
-
python -u app.py
|
|
|
4 |
#SBATCH -J demo_foundhand
|
5 |
|
6 |
# partition
|
7 |
+
#SBATCH -p 3090-gcondo --gres=gpu:1
|
8 |
+
|
9 |
+
##SBATCH --partition=ssrinath-gcondo --gres=gpu:1 --gres-flags=enforce-binding
|
10 |
+
##SBATCH --account=ssrinath-gcondo
|
11 |
|
12 |
# ensures all allocated cores are on the same node
|
13 |
#SBATCH -N 1
|
|
|
37 |
cd $HOME/hdd/FoundHand_demo
|
38 |
echo Directory is `pwd`
|
39 |
|
40 |
+
python -u app.py --not_hf --model_epoch 4
|
sbatch/sbatch_demo2.sh
CHANGED
@@ -37,4 +37,4 @@ conda activate handdiff
|
|
37 |
cd $HOME/hdd/FoundHand_demo
|
38 |
echo Directory is `pwd`
|
39 |
|
40 |
-
python -u app.py
|
|
|
37 |
cd $HOME/hdd/FoundHand_demo
|
38 |
echo Directory is `pwd`
|
39 |
|
40 |
+
python -u app.py --not_hf --model_epoch 6
|