multimodalart HF Staff commited on
Commit
190cbef
·
verified ·
1 Parent(s): 7d1232d
Files changed (2) hide show
  1. app.py +1 -1
  2. inference.py +11 -15
app.py CHANGED
@@ -346,7 +346,7 @@ with gr.Blocks(css=css) as demo:
346
  with gr.Column():
347
  with gr.Tab("image-to-video") as image_tab:
348
  video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
349
- image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
350
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
351
  i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
352
  with gr.Tab("text-to-video") as text_tab:
 
346
  with gr.Column():
347
  with gr.Tab("image-to-video") as image_tab:
348
  video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
349
+ image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
350
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
351
  i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
352
  with gr.Tab("text-to-video") as text_tab:
inference.py CHANGED
@@ -11,6 +11,7 @@ import imageio
11
  import json
12
  import numpy as np
13
  import torch
 
14
  from safetensors import safe_open
15
  from PIL import Image
16
  from transformers import (
@@ -35,6 +36,7 @@ from ltx_video.pipelines.pipeline_ltx_video import (
35
  from ltx_video.schedulers.rf import RectifiedFlowScheduler
36
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
37
  from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
 
38
 
39
  MAX_HEIGHT = 720
40
  MAX_WIDTH = 1280
@@ -96,7 +98,12 @@ def load_image_to_tensor_with_resize_and_crop(
96
  image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
97
  if not just_crop:
98
  image = image.resize((target_width, target_height))
99
- frame_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float()
 
 
 
 
 
100
  frame_tensor = (frame_tensor / 127.5) - 1.0
101
  # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
102
  return frame_tensor.unsqueeze(0).unsqueeze(2)
@@ -266,13 +273,6 @@ def main():
266
  help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
267
  )
268
 
269
- parser.add_argument(
270
- "--strength",
271
- type=float,
272
- default=1.0,
273
- help="Editing strength (noising level) for video-to-video pipeline.",
274
- )
275
-
276
  # Conditioning arguments
277
  parser.add_argument(
278
  "--conditioning_media_paths",
@@ -407,7 +407,6 @@ def infer(
407
  negative_prompt: str,
408
  offload_to_cpu: bool,
409
  input_media_path: Optional[str] = None,
410
- strength: Optional[float] = 1.0,
411
  conditioning_media_paths: Optional[List[str]] = None,
412
  conditioning_strengths: Optional[List[float]] = None,
413
  conditioning_start_frames: Optional[List[int]] = None,
@@ -422,12 +421,10 @@ def infer(
422
 
423
  models_dir = "MODEL_DIR"
424
 
425
- #ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
426
- ltxv_model_name_or_path = "ltxv-13b-0.9.7-distilled-rc3.safetensors"
427
  if not os.path.isfile(ltxv_model_name_or_path):
428
  ltxv_model_path = hf_hub_download(
429
- repo_id="LTX-Colab/LTX-Video-Preview",
430
- #repo_id="Lightricks/LTX-Video",
431
  filename=ltxv_model_name_or_path,
432
  local_dir=models_dir,
433
  repo_type="model",
@@ -616,7 +613,6 @@ def infer(
616
  frame_rate=frame_rate,
617
  **sample,
618
  media_items=media_item,
619
- strength=strength,
620
  conditioning_items=conditioning_items,
621
  is_video=True,
622
  vae_per_channel_normalize=True,
@@ -775,4 +771,4 @@ def load_media_file(
775
 
776
 
777
  if __name__ == "__main__":
778
- main()
 
11
  import json
12
  import numpy as np
13
  import torch
14
+ import cv2
15
  from safetensors import safe_open
16
  from PIL import Image
17
  from transformers import (
 
36
  from ltx_video.schedulers.rf import RectifiedFlowScheduler
37
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
38
  from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
39
+ import ltx_video.pipelines.crf_compressor as crf_compressor
40
 
41
  MAX_HEIGHT = 720
42
  MAX_WIDTH = 1280
 
98
  image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
99
  if not just_crop:
100
  image = image.resize((target_width, target_height))
101
+
102
+ image = np.array(image)
103
+ image = cv2.GaussianBlur(image, (3, 3), 0)
104
+ frame_tensor = torch.from_numpy(image).float()
105
+ frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
106
+ frame_tensor = frame_tensor.permute(2, 0, 1)
107
  frame_tensor = (frame_tensor / 127.5) - 1.0
108
  # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
109
  return frame_tensor.unsqueeze(0).unsqueeze(2)
 
273
  help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
274
  )
275
 
 
 
 
 
 
 
 
276
  # Conditioning arguments
277
  parser.add_argument(
278
  "--conditioning_media_paths",
 
407
  negative_prompt: str,
408
  offload_to_cpu: bool,
409
  input_media_path: Optional[str] = None,
 
410
  conditioning_media_paths: Optional[List[str]] = None,
411
  conditioning_strengths: Optional[List[float]] = None,
412
  conditioning_start_frames: Optional[List[int]] = None,
 
421
 
422
  models_dir = "MODEL_DIR"
423
 
424
+ ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
 
425
  if not os.path.isfile(ltxv_model_name_or_path):
426
  ltxv_model_path = hf_hub_download(
427
+ repo_id="Lightricks/LTX-Video",
 
428
  filename=ltxv_model_name_or_path,
429
  local_dir=models_dir,
430
  repo_type="model",
 
613
  frame_rate=frame_rate,
614
  **sample,
615
  media_items=media_item,
 
616
  conditioning_items=conditioning_items,
617
  is_video=True,
618
  vae_per_channel_normalize=True,
 
771
 
772
 
773
  if __name__ == "__main__":
774
+ main()