acceleration:
load_text_encoder_in_8bit: true
mixed_precision_mode: bf16
quantization: int8-quanto
text_encoder_device: cuda
checkpoints:
interval: 250
keep_last_n: -1
precision: bfloat16
data:
num_dataloader_workers: 2
preprocessed_data_root: DELETED
flow_matching:
timestep_sampling_mode: shifted_logit_normal
timestep_sampling_params: {}
hub:
hub_model_id: null
push_to_hub: false
lora:
alpha: 128
dropout: 0.0
rank: 128
target_modules:
- to_k
- to_q
- to_v
- to_out.0
model:
load_checkpoint: DELETED
model_path: DELETED
text_encoder_path: DELETED
training_mode: lora
optimization:
batch_size: 1
enable_gradient_checkpointing: true
gradient_accumulation_steps: 1
learning_rate: 0.0001
max_grad_norm: 1.0
optimizer_type: adamw8bit
scheduler_params: {}
scheduler_type: linear
steps: 10000
output_dir: DELETED
seed: 42
training_strategy:
audio_latents_dir: audio_latents
first_frame_conditioning_p: 0.5
name: text_to_video
with_audio: true
validation:
frame_rate: 25.0
generate_audio: true
guidance_scale: 4.0
images: null
include_reference_in_output: false
inference_steps: 50
interval: 100
negative_prompt: worst quality, inconsistent motion, blurry, jittery, distorted
prompts:
- DELETED
- DELETED
reference_downscale_factor: 1
reference_videos: null
seed: 42
skip_initial_validation: true
stg_blocks:
- 29
stg_mode: stg_av
stg_scale: 0.0
video_dims:
- 960
- 544
- 89
videos_per_prompt: 1
wandb:
enabled: false
entity: null
log_validation_videos: true
project: ltx-2-trainer
tags:
- ltx2
- lora0 views