Spaces:
Running
on
Zero
Running
on
Zero
update-inference
#4
by
multimodalart
HF Staff
- opened
- app.py +1 -1
- inference.py +11 -15
app.py
CHANGED
@@ -346,7 +346,7 @@ with gr.Blocks(css=css) as demo:
|
|
346 |
with gr.Column():
|
347 |
with gr.Tab("image-to-video") as image_tab:
|
348 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
349 |
-
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
|
350 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
351 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
352 |
with gr.Tab("text-to-video") as text_tab:
|
|
|
346 |
with gr.Column():
|
347 |
with gr.Tab("image-to-video") as image_tab:
|
348 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
349 |
+
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
|
350 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
351 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
352 |
with gr.Tab("text-to-video") as text_tab:
|
inference.py
CHANGED
@@ -11,6 +11,7 @@ import imageio
|
|
11 |
import json
|
12 |
import numpy as np
|
13 |
import torch
|
|
|
14 |
from safetensors import safe_open
|
15 |
from PIL import Image
|
16 |
from transformers import (
|
@@ -35,6 +36,7 @@ from ltx_video.pipelines.pipeline_ltx_video import (
|
|
35 |
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
36 |
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
37 |
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
|
|
38 |
|
39 |
MAX_HEIGHT = 720
|
40 |
MAX_WIDTH = 1280
|
@@ -96,7 +98,12 @@ def load_image_to_tensor_with_resize_and_crop(
|
|
96 |
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
97 |
if not just_crop:
|
98 |
image = image.resize((target_width, target_height))
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
100 |
frame_tensor = (frame_tensor / 127.5) - 1.0
|
101 |
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
102 |
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
@@ -266,13 +273,6 @@ def main():
|
|
266 |
help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
|
267 |
)
|
268 |
|
269 |
-
parser.add_argument(
|
270 |
-
"--strength",
|
271 |
-
type=float,
|
272 |
-
default=1.0,
|
273 |
-
help="Editing strength (noising level) for video-to-video pipeline.",
|
274 |
-
)
|
275 |
-
|
276 |
# Conditioning arguments
|
277 |
parser.add_argument(
|
278 |
"--conditioning_media_paths",
|
@@ -407,7 +407,6 @@ def infer(
|
|
407 |
negative_prompt: str,
|
408 |
offload_to_cpu: bool,
|
409 |
input_media_path: Optional[str] = None,
|
410 |
-
strength: Optional[float] = 1.0,
|
411 |
conditioning_media_paths: Optional[List[str]] = None,
|
412 |
conditioning_strengths: Optional[List[float]] = None,
|
413 |
conditioning_start_frames: Optional[List[int]] = None,
|
@@ -422,12 +421,10 @@ def infer(
|
|
422 |
|
423 |
models_dir = "MODEL_DIR"
|
424 |
|
425 |
-
|
426 |
-
ltxv_model_name_or_path = "ltxv-13b-0.9.7-distilled-rc3.safetensors"
|
427 |
if not os.path.isfile(ltxv_model_name_or_path):
|
428 |
ltxv_model_path = hf_hub_download(
|
429 |
-
repo_id="
|
430 |
-
#repo_id="Lightricks/LTX-Video",
|
431 |
filename=ltxv_model_name_or_path,
|
432 |
local_dir=models_dir,
|
433 |
repo_type="model",
|
@@ -616,7 +613,6 @@ def infer(
|
|
616 |
frame_rate=frame_rate,
|
617 |
**sample,
|
618 |
media_items=media_item,
|
619 |
-
strength=strength,
|
620 |
conditioning_items=conditioning_items,
|
621 |
is_video=True,
|
622 |
vae_per_channel_normalize=True,
|
@@ -775,4 +771,4 @@ def load_media_file(
|
|
775 |
|
776 |
|
777 |
if __name__ == "__main__":
|
778 |
-
main()
|
|
|
11 |
import json
|
12 |
import numpy as np
|
13 |
import torch
|
14 |
+
import cv2
|
15 |
from safetensors import safe_open
|
16 |
from PIL import Image
|
17 |
from transformers import (
|
|
|
36 |
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
37 |
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
38 |
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
39 |
+
import ltx_video.pipelines.crf_compressor as crf_compressor
|
40 |
|
41 |
MAX_HEIGHT = 720
|
42 |
MAX_WIDTH = 1280
|
|
|
98 |
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
99 |
if not just_crop:
|
100 |
image = image.resize((target_width, target_height))
|
101 |
+
|
102 |
+
image = np.array(image)
|
103 |
+
image = cv2.GaussianBlur(image, (3, 3), 0)
|
104 |
+
frame_tensor = torch.from_numpy(image).float()
|
105 |
+
frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
|
106 |
+
frame_tensor = frame_tensor.permute(2, 0, 1)
|
107 |
frame_tensor = (frame_tensor / 127.5) - 1.0
|
108 |
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
109 |
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
|
|
273 |
help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
|
274 |
)
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
# Conditioning arguments
|
277 |
parser.add_argument(
|
278 |
"--conditioning_media_paths",
|
|
|
407 |
negative_prompt: str,
|
408 |
offload_to_cpu: bool,
|
409 |
input_media_path: Optional[str] = None,
|
|
|
410 |
conditioning_media_paths: Optional[List[str]] = None,
|
411 |
conditioning_strengths: Optional[List[float]] = None,
|
412 |
conditioning_start_frames: Optional[List[int]] = None,
|
|
|
421 |
|
422 |
models_dir = "MODEL_DIR"
|
423 |
|
424 |
+
ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
|
|
|
425 |
if not os.path.isfile(ltxv_model_name_or_path):
|
426 |
ltxv_model_path = hf_hub_download(
|
427 |
+
repo_id="Lightricks/LTX-Video",
|
|
|
428 |
filename=ltxv_model_name_or_path,
|
429 |
local_dir=models_dir,
|
430 |
repo_type="model",
|
|
|
613 |
frame_rate=frame_rate,
|
614 |
**sample,
|
615 |
media_items=media_item,
|
|
|
616 |
conditioning_items=conditioning_items,
|
617 |
is_video=True,
|
618 |
vae_per_channel_normalize=True,
|
|
|
771 |
|
772 |
|
773 |
if __name__ == "__main__":
|
774 |
+
main()
|