chaerinmin commited on
Commit
8f16411
·
1 Parent(s): 33b5165
Files changed (2) hide show
  1. app.py +934 -861
  2. diffusion/gaussian_diffusion.py +15 -10
app.py CHANGED
@@ -30,7 +30,7 @@ LENGTH = 480
30
  placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
31
  NEW_MODEL = True
32
  MODEL_EPOCH = 6
33
- REF_POSE_MASK = True
34
  HF = False
35
  pre_device = "cpu" if HF else "cuda"
36
  spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
@@ -305,7 +305,10 @@ def get_ref_anno(img, keypts):
305
  gr.Info("Number of left hand keypoints should be either 0 or 21.")
306
  return None, None, None, gr.update(), gr.update()
307
  keypts = np.concatenate(keypts, axis=0)
308
- if REF_POSE_MASK:
 
 
 
309
  sam_predictor.set_image(img)
310
  if keypts[0].sum() != 0 and keypts[21].sum() != 0:
311
  # input_point = np.array([keypts[0], keypts[21]])
@@ -335,9 +338,7 @@ def get_ref_anno(img, keypts):
335
  hand_mask = masks[0]
336
  masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
337
  ref_pose = visualize_hand(keypts, masked_img)
338
- else:
339
- hand_mask = np.zeros_like(img[:,:, 0])
340
- ref_pose = np.zeros_like(img)
341
  def make_ref_cond(
342
  img,
343
  keypts,
@@ -390,8 +391,7 @@ def get_ref_anno(img, keypts):
390
  print(f"autoencoder encoder before operating dtype: {next(autoencoder.encoder.parameters()).dtype}")
391
  latent = opts.latent_scaling_factor * autoencoder.encode(image).sample()
392
  print(f"latent.max(): {latent.max()}, latent.min(): {latent.min()}")
393
- if not REF_POSE_MASK:
394
- heatmaps = torch.zeros_like(heatmaps)
395
  mask = torch.zeros_like(mask)
396
  print(f"heatmaps.max(): {heatmaps.max()}, heatmaps.min(): {heatmaps.min()}")
397
  print(f"mask.max(): {mask.max()}, mask.min(): {mask.min()}")
@@ -615,7 +615,7 @@ def sample_diff(ref_cond, target_cond, target_keypts, num_gen, seed, cfg):
615
  z,
616
  clip_denoised=False,
617
  model_kwargs=model_kwargs,
618
- progress=True,
619
  device=device,
620
  ).chunk(2)
621
  sampled_images = autoencoder.decode(samples / opts.latent_scaling_factor)
@@ -818,7 +818,7 @@ def sample_inpaint(
818
  z,
819
  clip_denoised=False,
820
  model_kwargs=model_kwargs,
821
- progress=True,
822
  device=z.device,
823
  jump_length=jump_length,
824
  jump_n_sample=jump_n_sample,
@@ -1160,7 +1160,7 @@ custom_css = """
1160
  #repose-tab-button {
1161
  font-size: 18px !important;
1162
  font-weight: bold !important;
1163
- background-color: #90EE90 !important;
1164
  }
1165
  #kpts_examples table tr th:nth-child(2),
1166
  #kpts_examples table tr td:nth-child(2) {
@@ -1174,6 +1174,14 @@ custom_css = """
1174
  #kpts_examples table tr td:nth-child(4) {
1175
  display: none !important;
1176
  }
 
 
 
 
 
 
 
 
1177
  #fix_examples_all table tr th:nth-child(4),
1178
  #fix_examples_all table tr td:nth-child(4) {
1179
  display: none !important;
@@ -1186,12 +1194,75 @@ custom_css = """
1186
  #fix_examples_all table tr td:nth-child(6) {
1187
  display: none !important;
1188
  }
 
 
 
1189
  #repose_tutorial video {
1190
- width: 70% !important;
1191
  display: block;
1192
  margin: 0 auto;
1193
  padding: 0;
1194
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1195
  """
1196
 
1197
  tut1_custom = f"""
@@ -1245,9 +1316,9 @@ _HEADER_ = '''
1245
  <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
1246
  <a href='' target='_blank' class="link-spacing">Code (Coming in June)</a>
1247
  </h3>
1248
- <p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
1249
  </div>
1250
  '''
 
1251
 
1252
  _CITE_ = r"""
1253
  <pre style="white-space: pre-wrap; margin: 0;">
@@ -1266,1004 +1337,1006 @@ Part of this work was done during Kefan (Arthur) Chen’s internship at Meta Rea
1266
  """
1267
 
1268
  with gr.Blocks(css=custom_css, theme="soft") as demo:
 
1269
  gr.Markdown(_HEADER_)
1270
- with gr.Tab("Demo 1. Malformed Hand Correction", elem_id="fix-tab"):
1271
- fix_inpaint_mask = gr.State(value=None)
1272
- fix_original = gr.State(value=None)
1273
- fix_crop_coord = gr.State(value=None)
1274
- fix_img = gr.State(value=None)
1275
- fix_kpts = gr.State(value=None)
1276
- fix_kpts_path = gr.Textbox(visible=False)
1277
- fix_kpts_np = gr.State(value=None)
1278
- fix_ref_cond = gr.State(value=None)
1279
- fix_target_cond = gr.State(value=None)
1280
- fix_latent = gr.State(value=None)
1281
- fix_inpaint_latent = gr.State(value=None)
1282
 
1283
- # tutorial video
1284
- with gr.Accordion():
1285
- gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 1</p>""")
1286
- with gr.Row(variant="panel"):
1287
- with gr.Column():
1288
- # gr.Video(
1289
- # "how_to_videos/subtitled_fix_hands_custom.mp4",
1290
- # label="Using your own image",
1291
- # autoplay=True,
1292
- # loop=True,
1293
- # show_label=True,
1294
- # )
1295
- gr.HTML(tut1_custom)
1296
- with gr.Column():
1297
- # gr.Video(
1298
- # "how_to_videos/subtitled_fix_hands_example.mp4",
1299
- # label="Using our example image",
1300
- # autoplay=True,
1301
- # loop=True,
1302
- # show_label=True,
1303
- # )
1304
- gr.HTML(tut1_example)
1305
-
1306
- # more options
1307
- with gr.Accordion(label="More options", open=False):
1308
- gr.Markdown(
1309
- "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
1310
- )
1311
- with gr.Row():
1312
- fix_n_generation = gr.Slider(
1313
- label="Number of generations",
1314
- value=1,
1315
- minimum=1,
1316
- maximum=FIX_MAX_N,
1317
- step=1,
1318
- randomize=False,
1319
- interactive=True,
1320
- )
1321
- fix_seed = gr.Slider(
1322
- label="Seed",
1323
- value=42,
1324
- minimum=0,
1325
- maximum=10000,
1326
- step=1,
1327
- randomize=False,
1328
- interactive=True,
1329
- )
1330
- fix_cfg = gr.Slider(
1331
- label="Classifier free guidance scale",
1332
- value=3.0,
1333
- minimum=0.0,
1334
- maximum=10.0,
1335
- step=0.1,
1336
- randomize=False,
1337
- interactive=True,
1338
- )
1339
- fix_quality = gr.Slider(
1340
- label="Quality",
1341
- value=10,
1342
- minimum=1,
1343
- maximum=10,
1344
- step=1,
1345
- randomize=False,
1346
- interactive=True,
1347
- )
1348
-
1349
  # main tabs
1350
  with gr.Row():
1351
- # crop & brush
1352
  with gr.Column():
1353
  gr.Markdown(
1354
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">1. Upload a malformed hand image 📥</p>"""
1355
- )
1356
- gr.Markdown(
1357
- """<p style="text-align: center;">Optionally crop the image.<br>(Click <b>top left</b> and <b>bottom right</b> of your desired bounding box around the hand)</p>"""
1358
- )
1359
- fix_crop = gr.Image(
1360
- type="numpy",
1361
- sources=["upload", "webcam", "clipboard"],
1362
- label="Input Image",
1363
- show_label=True,
1364
- height=LENGTH,
1365
- width=LENGTH,
1366
- interactive=True,
1367
- visible=True,
1368
- image_mode="RGBA"
1369
- )
1370
- gr.Markdown(
1371
- """<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
1372
  )
1373
- # fix_example = gr.Examples(
1374
- # fix_example_imgs,
1375
- # inputs=[fix_crop],
1376
- # examples_per_page=20,
1377
  # )
1378
- with gr.Column():
1379
- gr.Markdown(
1380
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and its surrounding area</p>"""
1381
- )
1382
- gr.Markdown(
1383
- """<p style="text-align: center;">Don't brush the entire hand!</p>"""
1384
- )
1385
- fix_ref = gr.ImageEditor(
1386
  type="numpy",
1387
- label="Image Brushing",
1388
- sources=(),
1389
  show_label=True,
1390
  height=LENGTH,
1391
  width=LENGTH,
 
1392
  layers=False,
1393
- transforms=("brush"),
1394
- brush=gr.Brush(
1395
- colors=["rgb(255, 255, 255)"], default_size=20
1396
- ), # 204, 50, 50
1397
- image_mode="RGBA",
1398
- container=False,
1399
- interactive=True,
1400
- )
1401
- # fix_ex_brush = gr.Examples(
1402
- # fix_example_brush,
1403
- # inputs=[fix_ref],
1404
- # examples_per_page=20,
1405
- # )
1406
-
1407
- # keypoint selection
1408
- with gr.Column():
1409
- gr.Markdown(
1410
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Target hand pose</p>"""
1411
- )
1412
- gr.Markdown(
1413
- """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
1414
- )
1415
- fix_kp_all = gr.Image(
1416
- type="numpy",
1417
- label="Target Hand Pose",
1418
- show_label=True,
1419
- height=LENGTH,
1420
- width=LENGTH,
1421
- interactive=False,
1422
- visible=True,
1423
- sources=(),
1424
- image_mode="RGBA"
1425
  )
1426
- # with gr.Accordion(open=True):
1427
- # fix_ex_kpts = gr.Examples(
1428
- # fix_example_kpts,
1429
- # inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
1430
- # examples_per_page=20,
1431
- # postprocess=False,
1432
- # elem_id="kpts_examples"
1433
- # )
1434
- with gr.Accordion("[Custom data] Manually give hand pose", open=False):
1435
- gr.Markdown(
1436
- """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
1437
- )
1438
- fix_checkbox = gr.CheckboxGroup(
1439
- ["Right hand", "Left hand"],
1440
- show_label=False,
1441
- interactive=False,
1442
- )
1443
- fix_kp_r_info = gr.Markdown(
1444
- """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
1445
- visible=False
1446
- )
1447
- fix_kp_right = gr.Image(
1448
- type="numpy",
1449
- label="Keypoint Selection (right hand)",
1450
- show_label=True,
1451
- height=LENGTH,
1452
- width=LENGTH,
1453
- interactive=False,
1454
- visible=False,
1455
- sources=[],
1456
- )
1457
- with gr.Row():
1458
- fix_undo_right = gr.Button(
1459
- value="Undo", interactive=False, visible=False
1460
  )
1461
- fix_reset_right = gr.Button(
1462
- value="Reset", interactive=False, visible=False
 
 
 
1463
  )
1464
- fix_kp_l_info = gr.Markdown(
1465
- """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
1466
- visible=False
1467
- )
1468
- fix_kp_left = gr.Image(
1469
- type="numpy",
1470
- label="Keypoint Selection (left hand)",
1471
- show_label=True,
1472
- height=LENGTH,
1473
- width=LENGTH,
1474
- interactive=False,
1475
- visible=False,
1476
- sources=[],
1477
- )
1478
- with gr.Row():
1479
- fix_undo_left = gr.Button(
1480
- value="Undo", interactive=False, visible=False
1481
  )
1482
- fix_reset_left = gr.Button(
1483
- value="Reset", interactive=False, visible=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1484
  )
1485
  gr.Markdown(
1486
- """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
1487
  )
1488
- fix_openpose = gr.Image(
1489
- value="openpose.png",
1490
- type="numpy",
1491
- show_label=False,
1492
- height=LENGTH // 2,
1493
- width=LENGTH // 2,
1494
- interactive=False,
1495
  )
1496
 
1497
- # result column
1498
  with gr.Column():
1499
  gr.Markdown(
1500
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press &quot;Run&quot; to get the corrected hand image 🎯</p>"""
1501
  )
1502
- fix_vis_mask32 = gr.Image(
 
 
 
1503
  type="numpy",
1504
- label=f"Visualized {opts.latent_size} Inpaint Mask",
1505
  show_label=True,
1506
- height=opts.latent_size,
1507
- width=opts.latent_size,
1508
- interactive=False,
1509
- visible=False,
 
1510
  )
1511
- fix_run = gr.Button(value="Run", interactive=False)
1512
- with gr.Accordion(label="Visualized (256, 256) resized, brushed image", open=False):
1513
- fix_vis_mask256 = gr.Image(
1514
- type="numpy",
1515
- show_label=False,
1516
- height=opts.image_size,
1517
- width=opts.image_size,
1518
- interactive=False,
1519
- visible=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1520
  )
 
 
 
1521
  gr.Markdown(
1522
- """<p style="text-align: center;">⚠️ >3min and ~24GB per generation</p>"""
1523
  )
1524
- fix_result_original = gr.Gallery(
 
 
 
 
1525
  type="numpy",
1526
- label="Results on original input",
1527
  show_label=True,
1528
  height=LENGTH,
1529
  min_width=LENGTH,
1530
- columns=FIX_MAX_N,
1531
  interactive=False,
1532
  preview=True,
1533
  )
1534
- with gr.Accordion(label="Results of cropped area / Results with pose", open=False):
1535
- fix_result = gr.Gallery(
1536
- type="numpy",
1537
- label="Results",
1538
- show_label=True,
1539
- height=LENGTH,
1540
- min_width=LENGTH,
1541
- columns=FIX_MAX_N,
1542
- interactive=False,
1543
- preview=True,
1544
- )
1545
- fix_result_pose = gr.Gallery(
1546
  type="numpy",
1547
  label="Results Pose",
1548
  show_label=True,
1549
  height=LENGTH,
1550
  min_width=LENGTH,
1551
- columns=FIX_MAX_N,
1552
  interactive=False,
1553
  preview=True,
1554
  )
1555
- gr.Markdown(
1556
- """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
1557
- )
1558
- fix_clear = gr.ClearButton()
1559
-
1560
- gr.Examples(
1561
- fix_example_all,
1562
- inputs=[fix_crop, fix_ref, fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
1563
- examples_per_page=20,
1564
- postprocess=False,
1565
- elem_id="fix_examples_all",
1566
- )
1567
 
1568
- # listeners
1569
- fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
1570
- fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
1571
- fix_crop.select(process_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref, fix_crop])
1572
- fix_ref.change(visualize_ref, [fix_ref], [fix_img, fix_inpaint_mask])
1573
- fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
1574
- fix_img.change(lambda x: x, [fix_img], [fix_kp_left])
1575
- fix_ref.change(
1576
- enable_component, [fix_ref, fix_ref], fix_checkbox
1577
- )
1578
- fix_inpaint_mask.change(
1579
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_kp_right
1580
- )
1581
- fix_inpaint_mask.change(
1582
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_undo_right
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1583
  )
1584
- fix_inpaint_mask.change(
1585
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_reset_right
1586
  )
1587
- fix_inpaint_mask.change(
1588
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_kp_left
1589
  )
1590
- fix_inpaint_mask.change(
1591
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_undo_left
1592
  )
1593
- fix_inpaint_mask.change(
1594
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_reset_left
1595
  )
1596
- fix_inpaint_mask.change(
1597
- enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_run
1598
  )
1599
- fix_checkbox.select(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1600
  set_visible,
1601
- [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
1602
  [
1603
- fix_kpts,
1604
- fix_kp_right,
1605
- fix_kp_left,
1606
- fix_kp_right,
1607
- fix_undo_right,
1608
- fix_reset_right,
1609
- fix_kp_left,
1610
- fix_undo_left,
1611
- fix_reset_left,
1612
- fix_kp_r_info,
1613
- fix_kp_l_info,
1614
- ],
 
 
1615
  )
1616
- fix_kp_right.select(
1617
- get_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts] # fix_img: (real_cropped_H, real_cropped_W, 3)
1618
  )
1619
- fix_undo_right.click(
1620
- undo_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
1621
  )
1622
- fix_reset_right.click(
1623
- reset_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
1624
  )
1625
- fix_kp_left.select(
1626
- get_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1627
  )
1628
- fix_undo_left.click(
1629
- undo_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1630
  )
1631
- fix_reset_left.click(
1632
- reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
1633
  )
1634
- fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
1635
- fix_inpaint_mask.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
1636
- fix_kpts_np.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
1637
- fix_run.click(
1638
- ready_sample,
1639
- [fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
1640
- [
1641
- fix_ref_cond,
1642
- fix_target_cond,
1643
- fix_latent,
1644
- fix_inpaint_latent,
1645
- fix_kpts_np,
1646
- fix_vis_mask32,
1647
- fix_vis_mask256,
1648
- ],
1649
  )
1650
- fix_inpaint_latent.change(
1651
- sample_inpaint,
1652
- [
1653
- fix_ref_cond,
1654
- fix_target_cond,
1655
- fix_latent,
1656
- fix_inpaint_latent,
1657
- fix_kpts_np,
1658
- fix_original,
1659
- fix_crop_coord,
1660
- fix_n_generation,
1661
- fix_seed,
1662
- fix_cfg,
1663
- fix_quality,
1664
- ],
1665
- [fix_result, fix_result_pose, fix_result_original],
1666
  )
1667
- fix_clear.click(
1668
- fix_clear_all,
1669
  [],
1670
  [
1671
- fix_crop,
1672
- fix_crop_coord,
1673
- fix_ref,
1674
- fix_checkbox,
1675
- fix_kp_all,
1676
- fix_kp_right,
1677
- fix_kp_left,
1678
- fix_result,
1679
- fix_result_pose,
1680
- fix_result_original,
1681
- fix_inpaint_mask,
1682
- fix_original,
1683
- fix_img,
1684
- fix_vis_mask32,
1685
- fix_vis_mask256,
1686
- fix_kpts,
1687
- fix_kpts_np,
1688
- fix_ref_cond,
1689
- fix_target_cond,
1690
- fix_latent,
1691
- fix_inpaint_latent,
1692
- fix_kpts_path,
1693
- fix_n_generation,
1694
- fix_seed,
1695
- fix_cfg,
1696
- fix_quality,
1697
  ],
1698
  )
1699
- fix_clear.click(
1700
- fix_set_unvisible,
1701
  [],
1702
  [
1703
- fix_kp_right,
1704
- fix_kp_left,
1705
- fix_kp_r_info,
1706
- fix_kp_l_info,
1707
- fix_undo_left,
1708
- fix_undo_right,
1709
- fix_reset_left,
1710
- fix_reset_right
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1711
  ]
1712
  )
1713
-
1714
- with gr.Tab("Demo 2. Repose Hands", elem_id="repose-tab"):
1715
- # ref states
1716
- dump = gr.State(value=None)
1717
- ref_img = gr.State(value=None)
1718
- ref_im_raw = gr.State(value=None)
1719
- ref_kp_raw = gr.State(value=0)
1720
- ref_is_user = gr.State(value=True)
1721
- ref_kp_got = gr.State(value=None)
1722
- ref_manual_cond = gr.State(value=None)
1723
- ref_auto_cond = gr.State(value=None)
1724
- ref_cond = gr.State(value=None)
1725
-
1726
- # target states
1727
- target_img = gr.State(value=None)
1728
- target_im_raw = gr.State(value=None)
1729
- target_kp_raw = gr.State(value=0)
1730
- target_is_user = gr.State(value=True)
1731
- target_kp_got = gr.State(value=None)
1732
- target_manual_keypts = gr.State(value=None)
1733
- target_auto_keypts = gr.State(value=None)
1734
- target_keypts = gr.State(value=None)
1735
- target_manual_cond = gr.State(value=None)
1736
- target_auto_cond = gr.State(value=None)
1737
- target_cond = gr.State(value=None)
1738
-
1739
- # tutorial video
1740
- with gr.Accordion(""):
1741
- gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;">Tutorial Videos of Demo 2</p>""")
1742
- with gr.Row(variant="panel", elem_id="repose_tutorial"):
1743
- with gr.Column():
1744
- # gr.Video(
1745
- # "how_to_videos/subtitled_repose_hands.mp4",
1746
- # label="Tutorial",
1747
- # autoplay=True,
1748
- # loop=True,
1749
- # show_label=True,
1750
- # )
1751
- gr.HTML(tut2_example)
1752
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1753
  # main tabs
1754
  with gr.Row():
1755
- # ref column
1756
  with gr.Column():
1757
  gr.Markdown(
1758
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">1. Upload a hand image to repose 📥</p>"""
1759
- )
1760
- gr.Markdown(
1761
- """<p style="text-align: center;">Optionally crop the image</p>"""
1762
  )
1763
- ref = gr.ImageEditor(
1764
  type="numpy",
1765
- label="Reference",
 
1766
  show_label=True,
1767
  height=LENGTH,
1768
  width=LENGTH,
1769
- brush=False,
1770
- layers=False,
1771
- crop_size="1:1",
1772
  )
1773
- gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
1774
- with gr.Accordion(label="See hand pose and more options", open=False):
1775
- with gr.Tab("Automatic hand keypoints"):
1776
- ref_pose = gr.Image(
1777
- type="numpy",
1778
- label="Reference Pose",
1779
- show_label=True,
1780
- height=LENGTH,
1781
- width=LENGTH,
1782
- interactive=False,
1783
- )
1784
- ref_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
1785
- with gr.Tab("Manual hand keypoints"):
1786
- ref_manual_checkbox_info = gr.Markdown(
1787
- """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
1788
- visible=True,
1789
- )
1790
- ref_manual_checkbox = gr.CheckboxGroup(
1791
- ["Right hand", "Left hand"],
1792
- show_label=False,
1793
- visible=True,
1794
- interactive=True,
1795
- )
1796
- ref_manual_kp_r_info = gr.Markdown(
1797
- """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
1798
- visible=False,
1799
- )
1800
- ref_manual_kp_right = gr.Image(
1801
- type="numpy",
1802
- label="Keypoint Selection (right hand)",
1803
- show_label=True,
1804
- height=LENGTH,
1805
- width=LENGTH,
1806
- interactive=False,
1807
- visible=False,
1808
- sources=[],
1809
- )
1810
- with gr.Row():
1811
- ref_manual_undo_right = gr.Button(
1812
- value="Undo", interactive=True, visible=False
1813
- )
1814
- ref_manual_reset_right = gr.Button(
1815
- value="Reset", interactive=True, visible=False
1816
- )
1817
- ref_manual_kp_l_info = gr.Markdown(
1818
- """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
1819
- visible=False
1820
- )
1821
- ref_manual_kp_left = gr.Image(
1822
- type="numpy",
1823
- label="Keypoint Selection (left hand)",
1824
- show_label=True,
1825
- height=LENGTH,
1826
- width=LENGTH,
1827
- interactive=False,
1828
- visible=False,
1829
- sources=[],
1830
- )
1831
- with gr.Row():
1832
- ref_manual_undo_left = gr.Button(
1833
- value="Undo", interactive=True, visible=False
1834
- )
1835
- ref_manual_reset_left = gr.Button(
1836
- value="Reset", interactive=True, visible=False
1837
- )
1838
- ref_manual_done_info = gr.Markdown(
1839
- """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
1840
- visible=False,
1841
- )
1842
- ref_manual_done = gr.Button(value="Done", interactive=True, visible=False)
1843
- ref_manual_pose = gr.Image(
1844
- type="numpy",
1845
- label="Reference Pose",
1846
- show_label=True,
1847
- height=LENGTH,
1848
- width=LENGTH,
1849
- interactive=False,
1850
- visible=False
1851
- )
1852
- ref_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
1853
- ref_manual_instruct = gr.Markdown(
1854
- value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
1855
- visible=True
1856
- )
1857
- ref_manual_openpose = gr.Image(
1858
- value="openpose.png",
1859
- type="numpy",
1860
- show_label=False,
1861
- height=LENGTH // 2,
1862
- width=LENGTH // 2,
1863
- interactive=False,
1864
- visible=True
1865
- )
1866
- gr.Markdown(
1867
- """<p style="text-align: center;">Optionally flip the hand</p>"""
1868
- )
1869
- ref_flip = gr.Checkbox(
1870
- value=False, label="Flip Handedness (Reference)", interactive=False
1871
- )
1872
-
1873
- # target column
1874
- with gr.Column():
1875
  gr.Markdown(
1876
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
1877
  )
 
1878
  gr.Markdown(
1879
- """<p style="text-align: center;">Optionally crop the image</p>"""
1880
- )
1881
- target = gr.ImageEditor(
1882
- type="numpy",
1883
- label="Target",
1884
- show_label=True,
1885
- height=LENGTH,
1886
- width=LENGTH,
1887
- brush=False,
1888
- layers=False,
1889
- crop_size="1:1",
1890
- )
1891
- gr.Examples(example_target_imgs, [target], examples_per_page=20)
1892
- with gr.Accordion(label="See hand pose and more options", open=False):
1893
- with gr.Tab("Automatic hand keypoints"):
1894
- target_pose = gr.Image(
1895
- type="numpy",
1896
- label="Target Pose",
1897
- show_label=True,
1898
- height=LENGTH,
1899
- width=LENGTH,
1900
- interactive=False,
1901
- )
1902
- target_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
1903
- with gr.Tab("Manual hand keypoints"):
1904
- target_manual_checkbox_info = gr.Markdown(
1905
- """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
1906
- visible=True,
1907
- )
1908
- target_manual_checkbox = gr.CheckboxGroup(
1909
- ["Right hand", "Left hand"],
1910
- show_label=False,
1911
- visible=True,
1912
- interactive=True,
1913
- )
1914
- target_manual_kp_r_info = gr.Markdown(
1915
- """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
1916
- visible=False,
1917
- )
1918
- target_manual_kp_right = gr.Image(
1919
- type="numpy",
1920
- label="Keypoint Selection (right hand)",
1921
- show_label=True,
1922
- height=LENGTH,
1923
- width=LENGTH,
1924
- interactive=False,
1925
- visible=False,
1926
- sources=[],
1927
- )
1928
- with gr.Row():
1929
- target_manual_undo_right = gr.Button(
1930
- value="Undo", interactive=True, visible=False
1931
- )
1932
- target_manual_reset_right = gr.Button(
1933
- value="Reset", interactive=True, visible=False
1934
- )
1935
- target_manual_kp_l_info = gr.Markdown(
1936
- """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
1937
- visible=False
1938
- )
1939
- target_manual_kp_left = gr.Image(
1940
- type="numpy",
1941
- label="Keypoint Selection (left hand)",
1942
- show_label=True,
1943
- height=LENGTH,
1944
- width=LENGTH,
1945
- interactive=False,
1946
- visible=False,
1947
- sources=[],
1948
- )
1949
- with gr.Row():
1950
- target_manual_undo_left = gr.Button(
1951
- value="Undo", interactive=True, visible=False
1952
- )
1953
- target_manual_reset_left = gr.Button(
1954
- value="Reset", interactive=True, visible=False
1955
- )
1956
- target_manual_done_info = gr.Markdown(
1957
- """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
1958
- visible=False,
1959
  )
1960
- target_manual_done = gr.Button(value="Done", interactive=True, visible=False)
1961
- target_manual_pose = gr.Image(
1962
- type="numpy",
1963
- label="Target Pose",
1964
- show_label=True,
1965
- height=LENGTH,
1966
- width=LENGTH,
1967
- interactive=False,
1968
- visible=False
1969
  )
1970
- target_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
1971
- target_manual_instruct = gr.Markdown(
1972
- value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
1973
- visible=True
 
 
 
 
 
 
 
 
 
 
 
 
 
1974
  )
1975
- target_manual_openpose = gr.Image(
1976
- value="openpose.png",
1977
- type="numpy",
1978
- show_label=False,
1979
- height=LENGTH // 2,
1980
- width=LENGTH // 2,
1981
- interactive=False,
1982
- visible=True
1983
  )
1984
  gr.Markdown(
1985
- """<p style="text-align: center;">Optionally flip the hand</p>"""
1986
  )
1987
- target_flip = gr.Checkbox(
1988
- value=False, label="Flip Handedness (Target)", interactive=False
 
 
 
 
 
1989
  )
1990
-
1991
  # result column
1992
  with gr.Column():
1993
  gr.Markdown(
1994
- """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Press &quot;Run&quot; to get the reposed results 🎯</p>"""
1995
  )
1996
- run = gr.Button(value="Run", interactive=False)
1997
- gr.Markdown(
1998
- """<p style="text-align: center;">⚠️ ~20s per generation with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)</p>"""
 
 
 
 
 
1999
  )
2000
- results = gr.Gallery(
 
 
 
 
 
 
 
 
 
 
 
 
2001
  type="numpy",
2002
  label="Results",
2003
  show_label=True,
2004
  height=LENGTH,
2005
  min_width=LENGTH,
2006
- columns=MAX_N,
2007
  interactive=False,
2008
  preview=True,
2009
  )
2010
- with gr.Accordion(label="Results with pose", open=False):
2011
- results_pose = gr.Gallery(
 
 
 
 
 
 
 
 
 
 
2012
  type="numpy",
2013
  label="Results Pose",
2014
  show_label=True,
2015
  height=LENGTH,
2016
  min_width=LENGTH,
2017
- columns=MAX_N,
2018
  interactive=False,
2019
  preview=True,
2020
  )
2021
- gr.Markdown(
2022
- """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
2023
- )
2024
- clear = gr.ClearButton()
2025
 
2026
- # more options
2027
- with gr.Accordion(label="More options", open=False):
2028
- with gr.Row():
2029
- n_generation = gr.Slider(
2030
- label="Number of generations",
2031
- value=1,
2032
- minimum=1,
2033
- maximum=MAX_N,
2034
- step=1,
2035
- randomize=False,
2036
- interactive=True,
2037
- )
2038
- seed = gr.Slider(
2039
- label="Seed",
2040
- value=42,
2041
- minimum=0,
2042
- maximum=10000,
2043
- step=1,
2044
- randomize=False,
2045
- interactive=True,
2046
- )
2047
- cfg = gr.Slider(
2048
- label="Classifier free guidance scale",
2049
- value=2.5,
2050
- minimum=0.0,
2051
- maximum=10.0,
2052
- step=0.1,
2053
- randomize=False,
2054
- interactive=True,
2055
- )
2056
 
2057
- # reference listeners
2058
- ref.change(prepare_anno, [ref, ref_is_user], [ref_im_raw, ref_kp_raw])
2059
- ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
2060
- ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
2061
- ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond, ref, ref_is_user])
2062
- ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
2063
- ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
2064
- ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
2065
- ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
2066
- ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
2067
-
2068
- ref_manual_checkbox.select(
2069
- set_visible,
2070
- [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
2071
- [
2072
- ref_kp_got,
2073
- ref_manual_kp_right,
2074
- ref_manual_kp_left,
2075
- ref_manual_kp_right,
2076
- ref_manual_undo_right,
2077
- ref_manual_reset_right,
2078
- ref_manual_kp_left,
2079
- ref_manual_undo_left,
2080
- ref_manual_reset_left,
2081
- ref_manual_kp_r_info,
2082
- ref_manual_kp_l_info,
2083
- ref_manual_done,
2084
- ref_manual_done_info
2085
- ]
 
 
 
2086
  )
2087
- ref_manual_kp_right.select(
2088
- get_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
2089
  )
2090
- ref_manual_undo_right.click(
2091
- undo_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
2092
  )
2093
- ref_manual_reset_right.click(
2094
- reset_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
2095
  )
2096
- ref_manual_kp_left.select(
2097
- get_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
2098
  )
2099
- ref_manual_undo_left.click(
2100
- undo_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
2101
  )
2102
- ref_manual_reset_left.click(
2103
- reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
2104
  )
2105
- ref_manual_done.click(visible_component, [gr.State(0), ref_manual_pose], ref_manual_pose)
2106
- ref_manual_done.click(visible_component, [gr.State(0), ref_use_manual], ref_use_manual)
2107
- ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
2108
- ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
2109
- ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
2110
- ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
2111
- ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
2112
- ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
2113
-
2114
- ref_flip.select(
2115
- flip_hand,
2116
- [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
2117
- [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond]
2118
  )
2119
-
2120
- # target listeners
2121
- target.change(prepare_anno, [target, target_is_user], [target_im_raw, target_kp_raw])
2122
- target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_right)
2123
- target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_left)
2124
- target_kp_raw.change(get_target_anno, [target_im_raw, target_kp_raw], [target_img, target_pose, target_auto_cond, target_auto_keypts, target, target_is_user])
2125
- target_pose.change(enable_component, [target_kp_raw, target_pose], target_use_auto)
2126
- target_pose.change(enable_component, [target_img, target_pose], target_flip)
2127
- target_auto_cond.change(lambda x: x, target_auto_cond, target_cond)
2128
- target_auto_keypts.change(lambda x: x, target_auto_keypts, target_keypts)
2129
- target_use_auto.click(lambda x: x, target_auto_cond, target_cond)
2130
- target_use_auto.click(lambda x: x, target_auto_keypts, target_keypts)
2131
- target_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Target'", duration=3))
2132
-
2133
- target_manual_checkbox.select(
2134
  set_visible,
2135
- [target_manual_checkbox, target_kp_got, target_im_raw, target_manual_kp_right, target_manual_kp_left, target_manual_done],
2136
  [
2137
- target_kp_got,
2138
- target_manual_kp_right,
2139
- target_manual_kp_left,
2140
- target_manual_kp_right,
2141
- target_manual_undo_right,
2142
- target_manual_reset_right,
2143
- target_manual_kp_left,
2144
- target_manual_undo_left,
2145
- target_manual_reset_left,
2146
- target_manual_kp_r_info,
2147
- target_manual_kp_l_info,
2148
- target_manual_done,
2149
- target_manual_done_info
2150
- ]
2151
  )
2152
- target_manual_kp_right.select(
2153
- get_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
2154
  )
2155
- target_manual_undo_right.click(
2156
- undo_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
2157
  )
2158
- target_manual_reset_right.click(
2159
- reset_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
2160
  )
2161
- target_manual_kp_left.select(
2162
- get_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
2163
  )
2164
- target_manual_undo_left.click(
2165
- undo_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
2166
  )
2167
- target_manual_reset_left.click(
2168
- reset_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
2169
  )
2170
- target_manual_done.click(visible_component, [gr.State(0), target_manual_pose], target_manual_pose)
2171
- target_manual_done.click(visible_component, [gr.State(0), target_use_manual], target_use_manual)
2172
- target_manual_done.click(get_target_anno, [target_im_raw, target_kp_got], [target_img, target_manual_pose, target_manual_cond, target_manual_keypts])
2173
- target_manual_pose.change(enable_component, [target_manual_pose, target_manual_pose], target_manual_done)
2174
- target_manual_pose.change(enable_component, [target_img, target_manual_pose], target_flip)
2175
- target_manual_cond.change(lambda x: x, target_manual_cond, target_cond)
2176
- target_manual_keypts.change(lambda x: x, target_manual_keypts, target_keypts)
2177
- target_use_manual.click(lambda x: x, target_manual_cond, target_cond)
2178
- target_use_manual.click(lambda x: x, target_manual_keypts, target_keypts)
2179
- target_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
2180
-
2181
- target_flip.select(
2182
- flip_hand,
2183
- [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
2184
- [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
2185
  )
2186
-
2187
- # run listerners
2188
- ref_cond.change(enable_component, [ref_cond, target_cond], run)
2189
- target_cond.change(enable_component, [ref_cond, target_cond], run)
2190
- run.click(
2191
- sample_diff,
2192
- [ref_cond, target_cond, target_keypts, n_generation, seed, cfg],
2193
- [results, results_pose],
 
 
 
 
 
 
 
 
2194
  )
2195
- clear.click(
2196
- clear_all,
2197
  [],
2198
  [
2199
- ref,
2200
- ref_manual_checkbox,
2201
- ref_manual_kp_right,
2202
- ref_manual_kp_left,
2203
- ref_img,
2204
- ref_pose,
2205
- ref_manual_pose,
2206
- ref_cond,
2207
- ref_flip,
2208
- target,
2209
- target_keypts,
2210
- target_manual_checkbox,
2211
- target_manual_kp_right,
2212
- target_manual_kp_left,
2213
- target_img,
2214
- target_pose,
2215
- target_manual_pose,
2216
- target_cond,
2217
- target_flip,
2218
- results,
2219
- results_pose,
2220
- n_generation,
2221
- seed,
2222
- cfg,
2223
- ref_kp_raw,
 
2224
  ],
2225
  )
2226
- clear.click(
2227
- set_unvisible,
2228
  [],
2229
  [
2230
- ref_manual_kp_l_info,
2231
- ref_manual_kp_r_info,
2232
- ref_manual_kp_left,
2233
- ref_manual_kp_right,
2234
- ref_manual_undo_left,
2235
- ref_manual_undo_right,
2236
- ref_manual_reset_left,
2237
- ref_manual_reset_right,
2238
- ref_manual_done,
2239
- ref_manual_done_info,
2240
- ref_manual_pose,
2241
- ref_use_manual,
2242
- target_manual_kp_l_info,
2243
- target_manual_kp_r_info,
2244
- target_manual_kp_left,
2245
- target_manual_kp_right,
2246
- target_manual_undo_left,
2247
- target_manual_undo_right,
2248
- target_manual_reset_left,
2249
- target_manual_reset_right,
2250
- target_manual_done,
2251
- target_manual_done_info,
2252
- target_manual_pose,
2253
- target_use_manual,
2254
  ]
2255
  )
2256
 
2257
- gr.Markdown("<h1>Acknowledgement</h1>")
2258
  gr.Markdown(_ACK_)
2259
- gr.Markdown("<h1>Trouble Shooting</h1>")
2260
- gr.Markdown("If error persists, please try the following steps:<br>1. Refresh the page and try again.<br>2. The issue might be due to compatibility with HuggingFace or GPU memory limitations. We recommend cloning this repository and trying it with your own GPU if possible.<br>3. Kindly leave a message on our HuggingFace Spaces Community tab (located at the top right), on our GitHub repository's Issues page, or send us an email. We are happy to help you as soon as possible.")
2261
- gr.Markdown("If the result is not satisfactory:<br>1. Try changing either <b>Classifier Free Guidance Scale</b> or <b>Seed</b>, that can be found at \"More Options\".")
2262
  gr.Markdown("<h1>Citation</h1>")
2263
  gr.Markdown(
2264
  """<p style="text-align: left;">If this was useful, please cite us! ❤️</p>"""
2265
  )
2266
  gr.Markdown(_CITE_)
 
 
 
 
2267
 
2268
  # print("Ready to launch..")
2269
  # _, _, shared_url = demo.queue().launch(
 
30
  placeholder = cv2.cvtColor(cv2.imread("placeholder.png"), cv2.COLOR_BGR2RGB)
31
  NEW_MODEL = True
32
  MODEL_EPOCH = 6
33
+ NO_MASK = False
34
  HF = False
35
  pre_device = "cpu" if HF else "cuda"
36
  spaces_60_fn = spaces.GPU(duration=60) if HF else (lambda f: f)
 
305
  gr.Info("Number of left hand keypoints should be either 0 or 21.")
306
  return None, None, None, gr.update(), gr.update()
307
  keypts = np.concatenate(keypts, axis=0)
308
+ if NO_MASK:
309
+ hand_mask = np.zeros_like(img[:,:, 0])
310
+ ref_pose = visualize_hand(keypts, img)
311
+ else:
312
  sam_predictor.set_image(img)
313
  if keypts[0].sum() != 0 and keypts[21].sum() != 0:
314
  # input_point = np.array([keypts[0], keypts[21]])
 
338
  hand_mask = masks[0]
339
  masked_img = img * hand_mask[..., None] + 255 * (1 - hand_mask[..., None])
340
  ref_pose = visualize_hand(keypts, masked_img)
341
+
 
 
342
  def make_ref_cond(
343
  img,
344
  keypts,
 
391
  print(f"autoencoder encoder before operating dtype: {next(autoencoder.encoder.parameters()).dtype}")
392
  latent = opts.latent_scaling_factor * autoencoder.encode(image).sample()
393
  print(f"latent.max(): {latent.max()}, latent.min(): {latent.min()}")
394
+ if NO_MASK:
 
395
  mask = torch.zeros_like(mask)
396
  print(f"heatmaps.max(): {heatmaps.max()}, heatmaps.min(): {heatmaps.min()}")
397
  print(f"mask.max(): {mask.max()}, mask.min(): {mask.min()}")
 
615
  z,
616
  clip_denoised=False,
617
  model_kwargs=model_kwargs,
618
+ progress=gr.Progress(),
619
  device=device,
620
  ).chunk(2)
621
  sampled_images = autoencoder.decode(samples / opts.latent_scaling_factor)
 
818
  z,
819
  clip_denoised=False,
820
  model_kwargs=model_kwargs,
821
+ progress=gr.Progress(),
822
  device=z.device,
823
  jump_length=jump_length,
824
  jump_n_sample=jump_n_sample,
 
1160
  #repose-tab-button {
1161
  font-size: 18px !important;
1162
  font-weight: bold !important;
1163
+ background-color: #FFB6C1 !important;
1164
  }
1165
  #kpts_examples table tr th:nth-child(2),
1166
  #kpts_examples table tr td:nth-child(2) {
 
1174
  #kpts_examples table tr td:nth-child(4) {
1175
  display: none !important;
1176
  }
1177
+ #fix_examples_all table tr th:nth-child(2),
1178
+ #fix_examples_all table tr td:nth-child(2) {
1179
+ display: none !important;
1180
+ }
1181
+ #fix_examples_all table tr th:nth-child(3),
1182
+ #fix_examples_all table tr td:nth-child(3) {
1183
+ display: none !important;
1184
+ }
1185
  #fix_examples_all table tr th:nth-child(4),
1186
  #fix_examples_all table tr td:nth-child(4) {
1187
  display: none !important;
 
1194
  #fix_examples_all table tr td:nth-child(6) {
1195
  display: none !important;
1196
  }
1197
+ #fix_examples_all table tr:first-child {
1198
+ display: none !important;
1199
+ }
1200
  #repose_tutorial video {
1201
+ width: 50% !important;
1202
  display: block;
1203
  margin: 0 auto;
1204
  padding: 0;
1205
  }
1206
+ #accordion_bold button span {
1207
+ font-weight: bold !important;
1208
+ }
1209
+ #accordion_bold_large button span {
1210
+ font-weight: bold !important;
1211
+ font-size: 20px !important;
1212
+ }
1213
+ #accordion_bold_large_center button span {
1214
+ font-weight: bold !important;
1215
+ font-size: 20px !important;
1216
+ }
1217
+ #accordion_bold_large_center button {
1218
+ text-align: center !important;
1219
+ margin: 0 auto !important;
1220
+ display: block !important;
1221
+ }
1222
+ #fix_examples_all table tbody {
1223
+ display: flex !important;
1224
+ flex-direction: row;
1225
+ flex-wrap: nowrap;
1226
+ }
1227
+
1228
+ #fix_examples_all table tr {
1229
+ display: flex !important;
1230
+ align-items: center;
1231
+ }
1232
+
1233
+ #fix_examples_all table tr th,
1234
+ #fix_examples_all table tr td {
1235
+ display: table-cell;
1236
+ }
1237
+ #gradio-app {
1238
+ flex-direction: row; !important;
1239
+ }
1240
+ """
1241
+ ##no_wrap_row {
1242
+ # display: flex !important;
1243
+ # flex-direction: row !important;
1244
+ # flex-wrap: nowrap !important;
1245
+ # }
1246
+ ##no_wrap_row > div {
1247
+ # flex: 1 1 auto !important;
1248
+ # min-width: 0;
1249
+ # }
1250
+
1251
+ button_css = """
1252
+ #clear_button {
1253
+ background-color: #f44336 !important;
1254
+ }
1255
+ #clear_button:hover {
1256
+ background-color: #d32f2f !important;
1257
+ }
1258
+ #run_button {
1259
+ background-color: #4CAF50 !important;
1260
+ cursor: pointer;
1261
+ transition: background-color 0.3s ease;
1262
+ }
1263
+ #run_button:hover {
1264
+ background-color: #388E3C !important;
1265
+ }
1266
  """
1267
 
1268
  tut1_custom = f"""
 
1316
  <a href='https://ivl.cs.brown.edu/research/foundhand.html' target='_blank' class="link-spacing">Project Page</a>
1317
  <a href='' target='_blank' class="link-spacing">Code (Coming in June)</a>
1318
  </h3>
 
1319
  </div>
1320
  '''
1321
+ # <p>Below are two important abilities of our model. First, we can automatically <b>fix malformed hand images</b>, following the user-provided target hand pose and area to fix. Second, we can <b>repose hand</b> given two hand images - one is the image to edit, and the other one provides target hand pose.</p>
1322
 
1323
  _CITE_ = r"""
1324
  <pre style="white-space: pre-wrap; margin: 0;">
 
1337
  """
1338
 
1339
  with gr.Blocks(css=custom_css, theme="soft") as demo:
1340
+ gr.HTML(f"<style>{button_css}</style>")
1341
  gr.Markdown(_HEADER_)
 
 
 
 
 
 
 
 
 
 
 
 
1342
 
1343
+ with gr.Tab("Demo 1. Repose Hands", elem_id="repose-tab"):
1344
+ # ref states
1345
+ dump = gr.State(value=None)
1346
+ ref_img = gr.State(value=None)
1347
+ ref_im_raw = gr.State(value=None)
1348
+ ref_kp_raw = gr.State(value=0)
1349
+ ref_is_user = gr.State(value=True)
1350
+ ref_kp_got = gr.State(value=None)
1351
+ ref_manual_cond = gr.State(value=None)
1352
+ ref_auto_cond = gr.State(value=None)
1353
+ ref_cond = gr.State(value=None)
1354
+
1355
+ # target states
1356
+ target_img = gr.State(value=None)
1357
+ target_im_raw = gr.State(value=None)
1358
+ target_kp_raw = gr.State(value=0)
1359
+ target_is_user = gr.State(value=True)
1360
+ target_kp_got = gr.State(value=None)
1361
+ target_manual_keypts = gr.State(value=None)
1362
+ target_auto_keypts = gr.State(value=None)
1363
+ target_keypts = gr.State(value=None)
1364
+ target_manual_cond = gr.State(value=None)
1365
+ target_auto_cond = gr.State(value=None)
1366
+ target_cond = gr.State(value=None)
1367
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1368
  # main tabs
1369
  with gr.Row():
1370
+ # ref column
1371
  with gr.Column():
1372
  gr.Markdown(
1373
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">1. Upload a hand image to repose 📥</p>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1374
  )
1375
+ # gr.Markdown(
1376
+ # """<p style="text-align: center;">Optionally crop the image</p>"""
 
 
1377
  # )
1378
+ ref = gr.ImageEditor(
 
 
 
 
 
 
 
1379
  type="numpy",
1380
+ label="Reference",
 
1381
  show_label=True,
1382
  height=LENGTH,
1383
  width=LENGTH,
1384
+ brush=False,
1385
  layers=False,
1386
+ crop_size="1:1",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1387
  )
1388
+ gr.Examples(example_ref_imgs, [ref], examples_per_page=20)
1389
+ with gr.Accordion(label="See hand pose", open=False):
1390
+ with gr.Tab("Automatic hand keypoints"):
1391
+ ref_pose = gr.Image(
1392
+ type="numpy",
1393
+ label="Reference Pose",
1394
+ show_label=True,
1395
+ height=LENGTH,
1396
+ width=LENGTH,
1397
+ interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1398
  )
1399
+ ref_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
1400
+ with gr.Tab("Manual hand keypoints"):
1401
+ ref_manual_checkbox_info = gr.Markdown(
1402
+ """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
1403
+ visible=True,
1404
  )
1405
+ ref_manual_checkbox = gr.CheckboxGroup(
1406
+ ["Right hand", "Left hand"],
1407
+ show_label=False,
1408
+ visible=True,
1409
+ interactive=True,
 
 
 
 
 
 
 
 
 
 
 
 
1410
  )
1411
+ ref_manual_kp_r_info = gr.Markdown(
1412
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
1413
+ visible=False,
1414
+ )
1415
+ ref_manual_kp_right = gr.Image(
1416
+ type="numpy",
1417
+ label="Keypoint Selection (right hand)",
1418
+ show_label=True,
1419
+ height=LENGTH,
1420
+ width=LENGTH,
1421
+ interactive=False,
1422
+ visible=False,
1423
+ sources=[],
1424
+ )
1425
+ with gr.Row():
1426
+ ref_manual_undo_right = gr.Button(
1427
+ value="Undo", interactive=True, visible=False
1428
+ )
1429
+ ref_manual_reset_right = gr.Button(
1430
+ value="Reset", interactive=True, visible=False
1431
+ )
1432
+ ref_manual_kp_l_info = gr.Markdown(
1433
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
1434
+ visible=False
1435
+ )
1436
+ ref_manual_kp_left = gr.Image(
1437
+ type="numpy",
1438
+ label="Keypoint Selection (left hand)",
1439
+ show_label=True,
1440
+ height=LENGTH,
1441
+ width=LENGTH,
1442
+ interactive=False,
1443
+ visible=False,
1444
+ sources=[],
1445
+ )
1446
+ with gr.Row():
1447
+ ref_manual_undo_left = gr.Button(
1448
+ value="Undo", interactive=True, visible=False
1449
+ )
1450
+ ref_manual_reset_left = gr.Button(
1451
+ value="Reset", interactive=True, visible=False
1452
+ )
1453
+ ref_manual_done_info = gr.Markdown(
1454
+ """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
1455
+ visible=False,
1456
+ )
1457
+ ref_manual_done = gr.Button(value="Done", interactive=True, visible=False)
1458
+ ref_manual_pose = gr.Image(
1459
+ type="numpy",
1460
+ label="Reference Pose",
1461
+ show_label=True,
1462
+ height=LENGTH,
1463
+ width=LENGTH,
1464
+ interactive=False,
1465
+ visible=False
1466
+ )
1467
+ ref_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
1468
+ ref_manual_instruct = gr.Markdown(
1469
+ value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
1470
+ visible=True
1471
+ )
1472
+ ref_manual_openpose = gr.Image(
1473
+ value="openpose.png",
1474
+ type="numpy",
1475
+ show_label=False,
1476
+ height=LENGTH // 2,
1477
+ width=LENGTH // 2,
1478
+ interactive=False,
1479
+ visible=True
1480
  )
1481
  gr.Markdown(
1482
+ """<p style="text-align: center;">Optionally flip the hand</p>"""
1483
  )
1484
+ ref_flip = gr.Checkbox(
1485
+ value=False, label="Flip Handedness (Reference)", interactive=False
 
 
 
 
 
1486
  )
1487
 
1488
+ # target column
1489
  with gr.Column():
1490
  gr.Markdown(
1491
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Upload a hand image for target hand pose 📥</p>"""
1492
  )
1493
+ # gr.Markdown(
1494
+ # """<p style="text-align: center;">Optionally crop the image</p>"""
1495
+ # )
1496
+ target = gr.ImageEditor(
1497
  type="numpy",
1498
+ label="Target",
1499
  show_label=True,
1500
+ height=LENGTH,
1501
+ width=LENGTH,
1502
+ brush=False,
1503
+ layers=False,
1504
+ crop_size="1:1",
1505
  )
1506
+ gr.Examples(example_target_imgs, [target], examples_per_page=20)
1507
+ with gr.Accordion(label="See hand pose", open=False):
1508
+ with gr.Tab("Automatic hand keypoints"):
1509
+ target_pose = gr.Image(
1510
+ type="numpy",
1511
+ label="Target Pose",
1512
+ show_label=True,
1513
+ height=LENGTH,
1514
+ width=LENGTH,
1515
+ interactive=False,
1516
+ )
1517
+ target_use_auto = gr.Button(value="Click here to use automatic, not manual", interactive=False, visible=True)
1518
+ with gr.Tab("Manual hand keypoints"):
1519
+ target_manual_checkbox_info = gr.Markdown(
1520
+ """<p style="text-align: center;"><b>Step 1.</b> Tell us if this is right, left, or both hands.</p>""",
1521
+ visible=True,
1522
+ )
1523
+ target_manual_checkbox = gr.CheckboxGroup(
1524
+ ["Right hand", "Left hand"],
1525
+ show_label=False,
1526
+ visible=True,
1527
+ interactive=True,
1528
+ )
1529
+ target_manual_kp_r_info = gr.Markdown(
1530
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>right</b> hand. See \"OpenPose Keypoint Convention\" for guidance.</p>""",
1531
+ visible=False,
1532
+ )
1533
+ target_manual_kp_right = gr.Image(
1534
+ type="numpy",
1535
+ label="Keypoint Selection (right hand)",
1536
+ show_label=True,
1537
+ height=LENGTH,
1538
+ width=LENGTH,
1539
+ interactive=False,
1540
+ visible=False,
1541
+ sources=[],
1542
+ )
1543
+ with gr.Row():
1544
+ target_manual_undo_right = gr.Button(
1545
+ value="Undo", interactive=True, visible=False
1546
+ )
1547
+ target_manual_reset_right = gr.Button(
1548
+ value="Reset", interactive=True, visible=False
1549
+ )
1550
+ target_manual_kp_l_info = gr.Markdown(
1551
+ """<p style="text-align: center;"><b>Step 2.</b> Click on image to provide hand keypoints for <b>left</b> hand. See \"OpenPose keypoint convention\" for guidance.</p>""",
1552
+ visible=False
1553
+ )
1554
+ target_manual_kp_left = gr.Image(
1555
+ type="numpy",
1556
+ label="Keypoint Selection (left hand)",
1557
+ show_label=True,
1558
+ height=LENGTH,
1559
+ width=LENGTH,
1560
+ interactive=False,
1561
+ visible=False,
1562
+ sources=[],
1563
+ )
1564
+ with gr.Row():
1565
+ target_manual_undo_left = gr.Button(
1566
+ value="Undo", interactive=True, visible=False
1567
+ )
1568
+ target_manual_reset_left = gr.Button(
1569
+ value="Reset", interactive=True, visible=False
1570
+ )
1571
+ target_manual_done_info = gr.Markdown(
1572
+ """<p style="text-align: center;"><b>Step 3.</b> Hit \"Done\" button to confirm.</p>""",
1573
+ visible=False,
1574
+ )
1575
+ target_manual_done = gr.Button(value="Done", interactive=True, visible=False)
1576
+ target_manual_pose = gr.Image(
1577
+ type="numpy",
1578
+ label="Target Pose",
1579
+ show_label=True,
1580
+ height=LENGTH,
1581
+ width=LENGTH,
1582
+ interactive=False,
1583
+ visible=False
1584
+ )
1585
+ target_use_manual = gr.Button(value="Click here to use manual, not automatic", interactive=True, visible=False)
1586
+ target_manual_instruct = gr.Markdown(
1587
+ value="""<p style="text-align: left; font-weight: bold; ">OpenPose Keypoints Convention</p>""",
1588
+ visible=True
1589
+ )
1590
+ target_manual_openpose = gr.Image(
1591
+ value="openpose.png",
1592
+ type="numpy",
1593
+ show_label=False,
1594
+ height=LENGTH // 2,
1595
+ width=LENGTH // 2,
1596
+ interactive=False,
1597
+ visible=True
1598
+ )
1599
+ gr.Markdown(
1600
+ """<p style="text-align: center;">Optionally flip the hand</p>"""
1601
+ )
1602
+ target_flip = gr.Checkbox(
1603
+ value=False, label="Flip Handedness (Target)", interactive=False
1604
  )
1605
+
1606
+ # result column
1607
+ with gr.Column():
1608
  gr.Markdown(
1609
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Press &quot;Run&quot; 🎯</p>"""
1610
  )
1611
+ run = gr.Button(value="Run", interactive=False, elem_id="run_button")
1612
+ # gr.Markdown(
1613
+ # """<p style="text-align: center;">⚠️ ~50s per generation</p>""" # with RTX3090. ~50s with A100. <br>(For example, if you set Number of generations as 2, it would take around 40s)
1614
+ # )
1615
+ results = gr.Gallery(
1616
  type="numpy",
1617
+ label="Results",
1618
  show_label=True,
1619
  height=LENGTH,
1620
  min_width=LENGTH,
1621
+ columns=MAX_N,
1622
  interactive=False,
1623
  preview=True,
1624
  )
1625
+ with gr.Accordion(label="Results with pose", open=False):
1626
+ results_pose = gr.Gallery(
 
 
 
 
 
 
 
 
 
 
1627
  type="numpy",
1628
  label="Results Pose",
1629
  show_label=True,
1630
  height=LENGTH,
1631
  min_width=LENGTH,
1632
+ columns=MAX_N,
1633
  interactive=False,
1634
  preview=True,
1635
  )
1636
+ # gr.Markdown(
1637
+ # """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
1638
+ # )
1639
+ clear = gr.ClearButton(elem_id="clear_button")
 
 
 
 
 
 
 
 
1640
 
1641
+ # more options
1642
+ with gr.Accordion(label="More options", open=False):
1643
+ with gr.Row():
1644
+ n_generation = gr.Slider(
1645
+ label="Number of generations",
1646
+ value=1,
1647
+ minimum=1,
1648
+ maximum=MAX_N,
1649
+ step=1,
1650
+ randomize=False,
1651
+ interactive=True,
1652
+ )
1653
+ seed = gr.Slider(
1654
+ label="Seed",
1655
+ value=42,
1656
+ minimum=0,
1657
+ maximum=10000,
1658
+ step=1,
1659
+ randomize=False,
1660
+ interactive=True,
1661
+ )
1662
+ cfg = gr.Slider(
1663
+ label="Classifier free guidance scale",
1664
+ value=2.5,
1665
+ minimum=0.0,
1666
+ maximum=10.0,
1667
+ step=0.1,
1668
+ randomize=False,
1669
+ interactive=True,
1670
+ )
1671
+
1672
+ # tutorial video
1673
+ with gr.Accordion("Tutorial Video of Demo 1", elem_id="accordion_bold_large_center"):
1674
+ # gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;"></p>""")
1675
+ with gr.Row(variant="panel", elem_id="repose_tutorial"):
1676
+ with gr.Column():
1677
+ # gr.Video(
1678
+ # "how_to_videos/subtitled_repose_hands.mp4",
1679
+ # label="Tutorial",
1680
+ # autoplay=True,
1681
+ # loop=True,
1682
+ # show_label=True,
1683
+ # )
1684
+ gr.HTML(tut2_example)
1685
+
1686
+ # reference listeners
1687
+ ref.change(prepare_anno, [ref, ref_is_user], [ref_im_raw, ref_kp_raw])
1688
+ ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_right)
1689
+ ref_kp_raw.change(lambda x: x, ref_im_raw, ref_manual_kp_left)
1690
+ ref_kp_raw.change(get_ref_anno, [ref_im_raw, ref_kp_raw], [ref_img, ref_pose, ref_auto_cond, ref, ref_is_user])
1691
+ ref_pose.change(enable_component, [ref_kp_raw, ref_pose], ref_use_auto)
1692
+ ref_pose.change(enable_component, [ref_img, ref_pose], ref_flip)
1693
+ ref_auto_cond.change(lambda x: x, ref_auto_cond, ref_cond)
1694
+ ref_use_auto.click(lambda x: x, ref_auto_cond, ref_cond)
1695
+ ref_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Reference'", duration=3))
1696
+
1697
+ ref_manual_checkbox.select(
1698
+ set_visible,
1699
+ [ref_manual_checkbox, ref_kp_got, ref_im_raw, ref_manual_kp_right, ref_manual_kp_left, ref_manual_done],
1700
+ [
1701
+ ref_kp_got,
1702
+ ref_manual_kp_right,
1703
+ ref_manual_kp_left,
1704
+ ref_manual_kp_right,
1705
+ ref_manual_undo_right,
1706
+ ref_manual_reset_right,
1707
+ ref_manual_kp_left,
1708
+ ref_manual_undo_left,
1709
+ ref_manual_reset_left,
1710
+ ref_manual_kp_r_info,
1711
+ ref_manual_kp_l_info,
1712
+ ref_manual_done,
1713
+ ref_manual_done_info
1714
+ ]
1715
  )
1716
+ ref_manual_kp_right.select(
1717
+ get_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1718
  )
1719
+ ref_manual_undo_right.click(
1720
+ undo_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1721
  )
1722
+ ref_manual_reset_right.click(
1723
+ reset_kps, [ref_im_raw, ref_kp_got, gr.State("right")], [ref_manual_kp_right, ref_kp_got]
1724
  )
1725
+ ref_manual_kp_left.select(
1726
+ get_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1727
  )
1728
+ ref_manual_undo_left.click(
1729
+ undo_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1730
  )
1731
+ ref_manual_reset_left.click(
1732
+ reset_kps, [ref_im_raw, ref_kp_got, gr.State("left")], [ref_manual_kp_left, ref_kp_got]
1733
+ )
1734
+ ref_manual_done.click(visible_component, [gr.State(0), ref_manual_pose], ref_manual_pose)
1735
+ ref_manual_done.click(visible_component, [gr.State(0), ref_use_manual], ref_use_manual)
1736
+ ref_manual_done.click(get_ref_anno, [ref_im_raw, ref_kp_got], [ref_img, ref_manual_pose, ref_manual_cond])
1737
+ ref_manual_pose.change(enable_component, [ref_manual_pose, ref_manual_pose], ref_manual_done)
1738
+ ref_manual_pose.change(enable_component, [ref_img, ref_manual_pose], ref_flip)
1739
+ ref_manual_cond.change(lambda x: x, ref_manual_cond, ref_cond)
1740
+ ref_use_manual.click(lambda x: x, ref_manual_cond, ref_cond)
1741
+ ref_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
1742
+
1743
+ ref_flip.select(
1744
+ flip_hand,
1745
+ [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond],
1746
+ [ref, ref_im_raw, ref_pose, ref_manual_pose, ref_manual_kp_right, ref_manual_kp_left, ref_cond, ref_auto_cond, ref_manual_cond]
1747
+ )
1748
+
1749
+ # target listeners
1750
+ target.change(prepare_anno, [target, target_is_user], [target_im_raw, target_kp_raw])
1751
+ target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_right)
1752
+ target_kp_raw.change(lambda x:x, target_im_raw, target_manual_kp_left)
1753
+ target_kp_raw.change(get_target_anno, [target_im_raw, target_kp_raw], [target_img, target_pose, target_auto_cond, target_auto_keypts, target, target_is_user])
1754
+ target_pose.change(enable_component, [target_kp_raw, target_pose], target_use_auto)
1755
+ target_pose.change(enable_component, [target_img, target_pose], target_flip)
1756
+ target_auto_cond.change(lambda x: x, target_auto_cond, target_cond)
1757
+ target_auto_keypts.change(lambda x: x, target_auto_keypts, target_keypts)
1758
+ target_use_auto.click(lambda x: x, target_auto_cond, target_cond)
1759
+ target_use_auto.click(lambda x: x, target_auto_keypts, target_keypts)
1760
+ target_use_auto.click(lambda x: gr.Info("Automatic hand keypoints will be used for 'Target'", duration=3))
1761
+
1762
+ target_manual_checkbox.select(
1763
  set_visible,
1764
+ [target_manual_checkbox, target_kp_got, target_im_raw, target_manual_kp_right, target_manual_kp_left, target_manual_done],
1765
  [
1766
+ target_kp_got,
1767
+ target_manual_kp_right,
1768
+ target_manual_kp_left,
1769
+ target_manual_kp_right,
1770
+ target_manual_undo_right,
1771
+ target_manual_reset_right,
1772
+ target_manual_kp_left,
1773
+ target_manual_undo_left,
1774
+ target_manual_reset_left,
1775
+ target_manual_kp_r_info,
1776
+ target_manual_kp_l_info,
1777
+ target_manual_done,
1778
+ target_manual_done_info
1779
+ ]
1780
  )
1781
+ target_manual_kp_right.select(
1782
+ get_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
1783
  )
1784
+ target_manual_undo_right.click(
1785
+ undo_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
1786
  )
1787
+ target_manual_reset_right.click(
1788
+ reset_kps, [target_im_raw, target_kp_got, gr.State("right")], [target_manual_kp_right, target_kp_got]
1789
  )
1790
+ target_manual_kp_left.select(
1791
+ get_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
1792
  )
1793
+ target_manual_undo_left.click(
1794
+ undo_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
1795
  )
1796
+ target_manual_reset_left.click(
1797
+ reset_kps, [target_im_raw, target_kp_got, gr.State("left")], [target_manual_kp_left, target_kp_got]
1798
  )
1799
+ target_manual_done.click(visible_component, [gr.State(0), target_manual_pose], target_manual_pose)
1800
+ target_manual_done.click(visible_component, [gr.State(0), target_use_manual], target_use_manual)
1801
+ target_manual_done.click(get_target_anno, [target_im_raw, target_kp_got], [target_img, target_manual_pose, target_manual_cond, target_manual_keypts])
1802
+ target_manual_pose.change(enable_component, [target_manual_pose, target_manual_pose], target_manual_done)
1803
+ target_manual_pose.change(enable_component, [target_img, target_manual_pose], target_flip)
1804
+ target_manual_cond.change(lambda x: x, target_manual_cond, target_cond)
1805
+ target_manual_keypts.change(lambda x: x, target_manual_keypts, target_keypts)
1806
+ target_use_manual.click(lambda x: x, target_manual_cond, target_cond)
1807
+ target_use_manual.click(lambda x: x, target_manual_keypts, target_keypts)
1808
+ target_use_manual.click(lambda x: gr.Info("Manual hand keypoints will be used for 'Reference'", duration=3))
1809
+
1810
+ target_flip.select(
1811
+ flip_hand,
1812
+ [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
1813
+ [target, target_im_raw, target_pose, target_manual_pose, target_manual_kp_right, target_manual_kp_left, target_cond, target_auto_cond, target_manual_cond, target_keypts, target_auto_keypts, target_manual_keypts],
1814
  )
1815
+
1816
+ # run listerners
1817
+ ref_cond.change(enable_component, [ref_cond, target_cond], run)
1818
+ target_cond.change(enable_component, [ref_cond, target_cond], run)
1819
+ run.click(
1820
+ sample_diff,
1821
+ [ref_cond, target_cond, target_keypts, n_generation, seed, cfg],
1822
+ [results, results_pose],
 
 
 
 
 
 
 
 
1823
  )
1824
+ clear.click(
1825
+ clear_all,
1826
  [],
1827
  [
1828
+ ref,
1829
+ ref_manual_checkbox,
1830
+ ref_manual_kp_right,
1831
+ ref_manual_kp_left,
1832
+ ref_img,
1833
+ ref_pose,
1834
+ ref_manual_pose,
1835
+ ref_cond,
1836
+ ref_flip,
1837
+ target,
1838
+ target_keypts,
1839
+ target_manual_checkbox,
1840
+ target_manual_kp_right,
1841
+ target_manual_kp_left,
1842
+ target_img,
1843
+ target_pose,
1844
+ target_manual_pose,
1845
+ target_cond,
1846
+ target_flip,
1847
+ results,
1848
+ results_pose,
1849
+ n_generation,
1850
+ seed,
1851
+ cfg,
1852
+ ref_kp_raw,
 
1853
  ],
1854
  )
1855
+ clear.click(
1856
+ set_unvisible,
1857
  [],
1858
  [
1859
+ ref_manual_kp_l_info,
1860
+ ref_manual_kp_r_info,
1861
+ ref_manual_kp_left,
1862
+ ref_manual_kp_right,
1863
+ ref_manual_undo_left,
1864
+ ref_manual_undo_right,
1865
+ ref_manual_reset_left,
1866
+ ref_manual_reset_right,
1867
+ ref_manual_done,
1868
+ ref_manual_done_info,
1869
+ ref_manual_pose,
1870
+ ref_use_manual,
1871
+ target_manual_kp_l_info,
1872
+ target_manual_kp_r_info,
1873
+ target_manual_kp_left,
1874
+ target_manual_kp_right,
1875
+ target_manual_undo_left,
1876
+ target_manual_undo_right,
1877
+ target_manual_reset_left,
1878
+ target_manual_reset_right,
1879
+ target_manual_done,
1880
+ target_manual_done_info,
1881
+ target_manual_pose,
1882
+ target_use_manual,
1883
  ]
1884
  )
1885
+
1886
+ with gr.Tab("Demo 2. Malformed Hand Correction", elem_id="fix-tab"):
1887
+ fix_inpaint_mask = gr.State(value=None)
1888
+ fix_original = gr.State(value=None)
1889
+ fix_crop_coord = gr.State(value=None)
1890
+ fix_img = gr.State(value=None)
1891
+ fix_kpts = gr.State(value=None)
1892
+ fix_kpts_path = gr.Textbox(visible=False)
1893
+ fix_kpts_np = gr.State(value=None)
1894
+ fix_ref_cond = gr.State(value=None)
1895
+ fix_target_cond = gr.State(value=None)
1896
+ fix_latent = gr.State(value=None)
1897
+ fix_inpaint_latent = gr.State(value=None)
1898
+
1899
+ # more options
1900
+ with gr.Accordion(label="More options", open=False):
1901
+ gr.Markdown(
1902
+ "⚠️ Currently, Number of generation > 1 could lead to out-of-memory"
1903
+ )
1904
+ with gr.Row():
1905
+ fix_n_generation = gr.Slider(
1906
+ label="Number of generations",
1907
+ value=1,
1908
+ minimum=1,
1909
+ maximum=FIX_MAX_N,
1910
+ step=1,
1911
+ randomize=False,
1912
+ interactive=True,
1913
+ )
1914
+ fix_seed = gr.Slider(
1915
+ label="Seed",
1916
+ value=42,
1917
+ minimum=0,
1918
+ maximum=10000,
1919
+ step=1,
1920
+ randomize=False,
1921
+ interactive=True,
1922
+ )
1923
+ fix_cfg = gr.Slider(
1924
+ label="Classifier free guidance scale",
1925
+ value=3.0,
1926
+ minimum=0.0,
1927
+ maximum=10.0,
1928
+ step=0.1,
1929
+ randomize=False,
1930
+ interactive=True,
1931
+ )
1932
+ fix_quality = gr.Slider(
1933
+ label="Quality",
1934
+ value=10,
1935
+ minimum=1,
1936
+ maximum=10,
1937
+ step=1,
1938
+ randomize=False,
1939
+ interactive=True,
1940
+ )
1941
+
1942
  # main tabs
1943
  with gr.Row():
1944
+ # crop & brush
1945
  with gr.Column():
1946
  gr.Markdown(
1947
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">1. Upload a malformed hand image 📥</p>"""
 
 
 
1948
  )
1949
+ fix_crop = gr.Image(
1950
  type="numpy",
1951
+ sources=["upload", "webcam", "clipboard"],
1952
+ label="Input Image",
1953
  show_label=True,
1954
  height=LENGTH,
1955
  width=LENGTH,
1956
+ interactive=True,
1957
+ visible=True,
1958
+ image_mode="RGBA"
1959
  )
1960
+ # gr.Markdown(
1961
+ # """<p style="text-align: center;">💡 If you crop, the model can focus on more details of the cropped area. Square crops might work better than rectangle crops.</p>"""
1962
+ # )
1963
+ # fix_example = gr.Examples(
1964
+ # fix_example_imgs,
1965
+ # inputs=[fix_crop],
1966
+ # examples_per_page=20,
1967
+ # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1968
  gr.Markdown(
1969
+ """<p style="text-align: center;">To crop, click <b>top left</b> and <b>bottom right</b></p>""" # of your desired bounding box around the hand)
1970
  )
1971
+ with gr.Column():
1972
  gr.Markdown(
1973
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">2. Brush wrong finger and surrounding area</p>"""
1974
+ )
1975
+ # gr.Markdown(
1976
+ # """<p style="text-align: center;">Don't brush the entire hand!</p>"""
1977
+ # )
1978
+ fix_ref = gr.ImageEditor(
1979
+ type="numpy",
1980
+ label="Image Brushing",
1981
+ sources=(),
1982
+ show_label=True,
1983
+ height=LENGTH,
1984
+ width=LENGTH,
1985
+ layers=False,
1986
+ transforms=("brush"),
1987
+ brush=gr.Brush(
1988
+ colors=["rgb(255, 255, 255)"], default_size=20
1989
+ ), # 204, 50, 50
1990
+ image_mode="RGBA",
1991
+ container=False,
1992
+ interactive=True,
1993
+ )
1994
+ # fix_ex_brush = gr.Examples(
1995
+ # fix_example_brush,
1996
+ # inputs=[fix_ref],
1997
+ # examples_per_page=20,
1998
+ # )
1999
+
2000
+ # keypoint selection
2001
+ with gr.Column():
2002
+ gr.Markdown(
2003
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">3. Target hand pose</p>"""
2004
+ )
2005
+ # gr.Markdown(
2006
+ # """<p style="text-align: center;">Either get hand pose from Examples, or manually give hand pose (located at the bottom)</p>"""
2007
+ # )
2008
+ fix_kp_all = gr.Image(
2009
+ type="numpy",
2010
+ label="Target Hand Pose",
2011
+ show_label=False,
2012
+ height=LENGTH,
2013
+ width=LENGTH,
2014
+ interactive=False,
2015
+ visible=True,
2016
+ sources=(),
2017
+ image_mode="RGBA"
2018
+ )
2019
+ # with gr.Accordion(open=True):
2020
+ # fix_ex_kpts = gr.Examples(
2021
+ # fix_example_kpts,
2022
+ # inputs=[fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
2023
+ # examples_per_page=20,
2024
+ # postprocess=False,
2025
+ # elem_id="kpts_examples"
2026
+ # )
2027
+ with gr.Accordion("[Your own image] Manually give hand pose", open=False, elem_id="accordion_bold"):
2028
+ gr.Markdown(
2029
+ """<p style="text-align: center;">&#9312; Tell us if this is right, left, or both hands</p>"""
2030
+ )
2031
+ fix_checkbox = gr.CheckboxGroup(
2032
+ ["Right hand", "Left hand"],
2033
+ show_label=False,
2034
+ interactive=False,
2035
+ )
2036
+ fix_kp_r_info = gr.Markdown(
2037
+ """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>right hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
2038
+ visible=False
2039
+ )
2040
+ fix_kp_right = gr.Image(
2041
+ type="numpy",
2042
+ label="Keypoint Selection (right hand)",
2043
+ show_label=True,
2044
+ height=LENGTH,
2045
+ width=LENGTH,
2046
+ interactive=False,
2047
+ visible=False,
2048
+ sources=[],
2049
+ )
2050
+ with gr.Row():
2051
+ fix_undo_right = gr.Button(
2052
+ value="Undo", interactive=False, visible=False
2053
  )
2054
+ fix_reset_right = gr.Button(
2055
+ value="Reset", interactive=False, visible=False
 
 
 
 
 
 
 
2056
  )
2057
+ fix_kp_l_info = gr.Markdown(
2058
+ """<p style="text-align: center;">&#9313; Click 21 keypoints on the image to provide the target hand pose of <b>left hand</b>. See the \"OpenPose keypoints convention\" for guidance.</p>""",
2059
+ visible=False
2060
+ )
2061
+ fix_kp_left = gr.Image(
2062
+ type="numpy",
2063
+ label="Keypoint Selection (left hand)",
2064
+ show_label=True,
2065
+ height=LENGTH,
2066
+ width=LENGTH,
2067
+ interactive=False,
2068
+ visible=False,
2069
+ sources=[],
2070
+ )
2071
+ with gr.Row():
2072
+ fix_undo_left = gr.Button(
2073
+ value="Undo", interactive=False, visible=False
2074
  )
2075
+ fix_reset_left = gr.Button(
2076
+ value="Reset", interactive=False, visible=False
 
 
 
 
 
 
2077
  )
2078
  gr.Markdown(
2079
+ """<p style="text-align: left; font-weight: bold; ">OpenPose keypoints convention</p>"""
2080
  )
2081
+ fix_openpose = gr.Image(
2082
+ value="openpose.png",
2083
+ type="numpy",
2084
+ show_label=False,
2085
+ height=LENGTH // 2,
2086
+ width=LENGTH // 2,
2087
+ interactive=False,
2088
  )
2089
+
2090
  # result column
2091
  with gr.Column():
2092
  gr.Markdown(
2093
+ """<p style="text-align: center; font-size: 18px; font-weight: bold;">4. Press &quot;Run&quot; 🎯</p>"""
2094
  )
2095
+ fix_vis_mask32 = gr.Image(
2096
+ type="numpy",
2097
+ label=f"Visualized {opts.latent_size} Inpaint Mask",
2098
+ show_label=True,
2099
+ height=opts.latent_size,
2100
+ width=opts.latent_size,
2101
+ interactive=False,
2102
+ visible=False,
2103
  )
2104
+ fix_run = gr.Button(value="Run", interactive=False, elem_id="run_button")
2105
+ fix_vis_mask256 = gr.Image(
2106
+ type="numpy",
2107
+ show_label=False,
2108
+ height=opts.image_size,
2109
+ width=opts.image_size,
2110
+ interactive=False,
2111
+ visible=False,
2112
+ )
2113
+ # gr.Markdown(
2114
+ # """<p style="text-align: center;">⚠️ >3min per generation</p>"""
2115
+ # )
2116
+ fix_result_original = gr.Gallery(
2117
  type="numpy",
2118
  label="Results",
2119
  show_label=True,
2120
  height=LENGTH,
2121
  min_width=LENGTH,
2122
+ columns=FIX_MAX_N,
2123
  interactive=False,
2124
  preview=True,
2125
  )
2126
+ with gr.Accordion(label="Results of cropped area / Results with pose", open=False):
2127
+ fix_result = gr.Gallery(
2128
+ type="numpy",
2129
+ label="Results of cropped area",
2130
+ show_label=True,
2131
+ height=LENGTH,
2132
+ min_width=LENGTH,
2133
+ columns=FIX_MAX_N,
2134
+ interactive=False,
2135
+ preview=True,
2136
+ )
2137
+ fix_result_pose = gr.Gallery(
2138
  type="numpy",
2139
  label="Results Pose",
2140
  show_label=True,
2141
  height=LENGTH,
2142
  min_width=LENGTH,
2143
+ columns=FIX_MAX_N,
2144
  interactive=False,
2145
  preview=True,
2146
  )
2147
+ # gr.Markdown(
2148
+ # """<p style="text-align: center;">✨ Hit &quot;Clear&quot; to restart from the beginning</p>"""
2149
+ # )
2150
+ fix_clear = gr.ClearButton(elem_id="clear_button")
2151
 
2152
+ gr.Examples(
2153
+ fix_example_all,
2154
+ inputs=[fix_crop, fix_ref, fix_kp_all, fix_cfg, fix_seed, fix_kpts_path],
2155
+ examples_per_page=20,
2156
+ postprocess=False,
2157
+ elem_id="fix_examples_all",
2158
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2159
 
2160
+ # tutorial video
2161
+ with gr.Accordion("Tutorial Videos of Demo 2", elem_id="accordion_bold_large_center"):
2162
+ # gr.Markdown("""<p style="text-align: center; font-size: 20px; font-weight: bold;"></p>""")
2163
+ with gr.Row(variant="panel"):
2164
+ with gr.Column():
2165
+ # gr.Video(
2166
+ # "how_to_videos/subtitled_fix_hands_custom.mp4",
2167
+ # label="Using your own image",
2168
+ # autoplay=True,
2169
+ # loop=True,
2170
+ # show_label=True,
2171
+ # )
2172
+ gr.HTML(tut1_custom)
2173
+ with gr.Column():
2174
+ # gr.Video(
2175
+ # "how_to_videos/subtitled_fix_hands_example.mp4",
2176
+ # label="Using our example image",
2177
+ # autoplay=True,
2178
+ # loop=True,
2179
+ # show_label=True,
2180
+ # )
2181
+ gr.HTML(tut1_example)
2182
+
2183
+ # listeners
2184
+ fix_crop.change(stash_original, fix_crop, fix_original) # fix_original: (real_H, real_W, 3)
2185
+ fix_crop.change(stay_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref])
2186
+ fix_crop.select(process_crop, [fix_crop, fix_crop_coord], [fix_crop_coord, fix_ref, fix_crop])
2187
+ fix_ref.change(visualize_ref, [fix_ref], [fix_img, fix_inpaint_mask])
2188
+ fix_img.change(lambda x: x, [fix_img], [fix_kp_right])
2189
+ fix_img.change(lambda x: x, [fix_img], [fix_kp_left])
2190
+ fix_ref.change(
2191
+ enable_component, [fix_ref, fix_ref], fix_checkbox
2192
  )
2193
+ fix_inpaint_mask.change(
2194
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_kp_right
2195
  )
2196
+ fix_inpaint_mask.change(
2197
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_undo_right
2198
  )
2199
+ fix_inpaint_mask.change(
2200
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_reset_right
2201
  )
2202
+ fix_inpaint_mask.change(
2203
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_kp_left
2204
  )
2205
+ fix_inpaint_mask.change(
2206
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_undo_left
2207
  )
2208
+ fix_inpaint_mask.change(
2209
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_reset_left
2210
  )
2211
+ fix_inpaint_mask.change(
2212
+ enable_component, [fix_inpaint_mask, fix_inpaint_mask], fix_run
 
 
 
 
 
 
 
 
 
 
 
2213
  )
2214
+ fix_checkbox.select(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2215
  set_visible,
2216
+ [fix_checkbox, fix_kpts, fix_img, fix_kp_right, fix_kp_left],
2217
  [
2218
+ fix_kpts,
2219
+ fix_kp_right,
2220
+ fix_kp_left,
2221
+ fix_kp_right,
2222
+ fix_undo_right,
2223
+ fix_reset_right,
2224
+ fix_kp_left,
2225
+ fix_undo_left,
2226
+ fix_reset_left,
2227
+ fix_kp_r_info,
2228
+ fix_kp_l_info,
2229
+ ],
 
 
2230
  )
2231
+ fix_kp_right.select(
2232
+ get_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts] # fix_img: (real_cropped_H, real_cropped_W, 3)
2233
  )
2234
+ fix_undo_right.click(
2235
+ undo_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
2236
  )
2237
+ fix_reset_right.click(
2238
+ reset_kps, [fix_img, fix_kpts, gr.State("right")], [fix_kp_right, fix_kpts]
2239
  )
2240
+ fix_kp_left.select(
2241
+ get_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
2242
  )
2243
+ fix_undo_left.click(
2244
+ undo_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
2245
  )
2246
+ fix_reset_left.click(
2247
+ reset_kps, [fix_img, fix_kpts, gr.State("left")], [fix_kp_left, fix_kpts]
2248
  )
2249
+ fix_kpts_path.change(read_kpts, fix_kpts_path, fix_kpts_np)
2250
+ fix_inpaint_mask.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
2251
+ fix_kpts_np.change(enable_component, [fix_inpaint_mask, fix_kpts_np], fix_run)
2252
+ fix_run.click(
2253
+ ready_sample,
2254
+ [fix_ref, fix_inpaint_mask, fix_kpts, fix_kpts_np],
2255
+ [
2256
+ fix_ref_cond,
2257
+ fix_target_cond,
2258
+ fix_latent,
2259
+ fix_inpaint_latent,
2260
+ fix_kpts_np,
2261
+ fix_vis_mask32,
2262
+ fix_vis_mask256,
2263
+ ],
2264
  )
2265
+ fix_inpaint_latent.change(
2266
+ sample_inpaint,
2267
+ [
2268
+ fix_ref_cond,
2269
+ fix_target_cond,
2270
+ fix_latent,
2271
+ fix_inpaint_latent,
2272
+ fix_kpts_np,
2273
+ fix_original,
2274
+ fix_crop_coord,
2275
+ fix_n_generation,
2276
+ fix_seed,
2277
+ fix_cfg,
2278
+ fix_quality,
2279
+ ],
2280
+ [fix_result, fix_result_pose, fix_result_original],
2281
  )
2282
+ fix_clear.click(
2283
+ fix_clear_all,
2284
  [],
2285
  [
2286
+ fix_crop,
2287
+ fix_crop_coord,
2288
+ fix_ref,
2289
+ fix_checkbox,
2290
+ fix_kp_all,
2291
+ fix_kp_right,
2292
+ fix_kp_left,
2293
+ fix_result,
2294
+ fix_result_pose,
2295
+ fix_result_original,
2296
+ fix_inpaint_mask,
2297
+ fix_original,
2298
+ fix_img,
2299
+ fix_vis_mask32,
2300
+ fix_vis_mask256,
2301
+ fix_kpts,
2302
+ fix_kpts_np,
2303
+ fix_ref_cond,
2304
+ fix_target_cond,
2305
+ fix_latent,
2306
+ fix_inpaint_latent,
2307
+ fix_kpts_path,
2308
+ fix_n_generation,
2309
+ fix_seed,
2310
+ fix_cfg,
2311
+ fix_quality,
2312
  ],
2313
  )
2314
+ fix_clear.click(
2315
+ fix_set_unvisible,
2316
  [],
2317
  [
2318
+ fix_kp_right,
2319
+ fix_kp_left,
2320
+ fix_kp_r_info,
2321
+ fix_kp_l_info,
2322
+ fix_undo_left,
2323
+ fix_undo_right,
2324
+ fix_reset_left,
2325
+ fix_reset_right
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2326
  ]
2327
  )
2328
 
2329
+ # gr.Markdown("<h1>Trouble Shooting</h1>")
2330
  gr.Markdown(_ACK_)
 
 
 
2331
  gr.Markdown("<h1>Citation</h1>")
2332
  gr.Markdown(
2333
  """<p style="text-align: left;">If this was useful, please cite us! ❤️</p>"""
2334
  )
2335
  gr.Markdown(_CITE_)
2336
+ with gr.Accordion("Trouble Shooting", open=False, elem_id="accordion_bold_large"):
2337
+ gr.Markdown("If error persists, please try the following steps:<br>1. Refresh the page and try again.<br>2. The issue might be due to compatibility with HuggingFace or GPU memory limitations. We recommend cloning this repository and trying it with your own GPU if possible.<br>3. Kindly leave a message on our HuggingFace Spaces Community tab (located at the top right), on our GitHub repository's Issues page, or send us an email. We are happy to help you as soon as possible.")
2338
+ gr.Markdown("If the result is not satisfactory:<br>1. Try changing either <b>Classifier Free Guidance Scale</b> or <b>Seed</b>, that can be found at \"More Options\".")
2339
+ # gr.Markdown("<h1>Acknowledgement</h1>")
2340
 
2341
  # print("Ready to launch..")
2342
  # _, _, shared_url = demo.queue().launch(
diffusion/gaussian_diffusion.py CHANGED
@@ -427,7 +427,7 @@ class GaussianDiffusion:
427
  cond_fn=None,
428
  model_kwargs=None,
429
  device=None,
430
- progress=False,
431
  ):
432
  """
433
  Generate samples from the model.
@@ -474,7 +474,7 @@ class GaussianDiffusion:
474
  cond_fn=None,
475
  model_kwargs=None,
476
  device=None,
477
- progress=False,
478
  jump_length=10,
479
  jump_n_sample=10,
480
  ):
@@ -527,7 +527,7 @@ class GaussianDiffusion:
527
  cond_fn=None,
528
  model_kwargs=None,
529
  device=None,
530
- progress=False,
531
  jump_length=10,
532
  jump_n_sample=10,
533
  ):
@@ -592,9 +592,12 @@ class GaussianDiffusion:
592
  times = get_schedule_jump(t_T=250, n_sample=1, jump_length=jump_length, jump_n_sample=jump_n_sample)
593
  time_pairs = list(zip(times[:-1], times[1:]))
594
 
595
- if progress:
596
- from tqdm.auto import tqdm
597
- time_pairs = tqdm(time_pairs)
 
 
 
598
 
599
  for t_last, t_cur in time_pairs:
600
  idx_wall += 1
@@ -716,7 +719,7 @@ class GaussianDiffusion:
716
  cond_fn=None,
717
  model_kwargs=None,
718
  device=None,
719
- progress=False,
720
  ):
721
  """
722
  Generate samples from the model and yield intermediate samples from
@@ -734,11 +737,13 @@ class GaussianDiffusion:
734
  img = th.randn(*shape, device=device)
735
  indices = list(range(self.num_timesteps))[::-1]
736
 
737
- if progress:
738
  # Lazy import so that we don't depend on tqdm.
739
- from tqdm.auto import tqdm
740
 
741
- indices = tqdm(indices)
 
 
742
 
743
  for i in indices:
744
  t = th.tensor([i] * shape[0], device=device)
 
427
  cond_fn=None,
428
  model_kwargs=None,
429
  device=None,
430
+ progress=None,
431
  ):
432
  """
433
  Generate samples from the model.
 
474
  cond_fn=None,
475
  model_kwargs=None,
476
  device=None,
477
+ progress=None,
478
  jump_length=10,
479
  jump_n_sample=10,
480
  ):
 
527
  cond_fn=None,
528
  model_kwargs=None,
529
  device=None,
530
+ progress=None,
531
  jump_length=10,
532
  jump_n_sample=10,
533
  ):
 
592
  times = get_schedule_jump(t_T=250, n_sample=1, jump_length=jump_length, jump_n_sample=jump_n_sample)
593
  time_pairs = list(zip(times[:-1], times[1:]))
594
 
595
+ # if progress:
596
+ # from tqdm.auto import tqdm
597
+ # time_pairs = tqdm(time_pairs)
598
+ if progress is not None:
599
+ progress(len(time_pairs))
600
+ time_pairs = progress.tqdm(time_pairs)
601
 
602
  for t_last, t_cur in time_pairs:
603
  idx_wall += 1
 
719
  cond_fn=None,
720
  model_kwargs=None,
721
  device=None,
722
+ progress=None,
723
  ):
724
  """
725
  Generate samples from the model and yield intermediate samples from
 
737
  img = th.randn(*shape, device=device)
738
  indices = list(range(self.num_timesteps))[::-1]
739
 
740
+ if progress is not None:
741
  # Lazy import so that we don't depend on tqdm.
742
+ # from tqdm.auto import tqdm
743
 
744
+ # indices = tqdm(indices)
745
+ progress(len(indices))
746
+ indices = progress.tqdm(indices)
747
 
748
  for i in indices:
749
  t = th.tensor([i] * shape[0], device=device)