Spaces:
Configuration error
Configuration error
File size: 4,062 Bytes
66e2a44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/usr/bin/env python3
"""
Pipeline completo: YouTube -> Audio com voz clonada -> Lip Sync
Uso: python full_pipeline.py --youtube-url "..." --text "Seu texto" --output video_final.mp4
"""
import argparse
import os
import subprocess
import tempfile
# Fix para PyTorch 2.6+
import torch
original_load = torch.load
def patched_load(*args, **kwargs):
kwargs['weights_only'] = False
return original_load(*args, **kwargs)
torch.load = patched_load
def download_youtube_video(url: str, output_path: str, start: int = 0, duration: int = 15):
"""Baixa video do YouTube."""
print(f"[1/4] Baixando video do YouTube...")
cmd = [
'yt-dlp',
'-f', 'best[height<=720]',
'--postprocessor-args', f'ffmpeg:-ss {start} -t {duration}',
'-o', output_path,
url
]
subprocess.run(cmd, check=True)
return output_path
def extract_voice_reference(video_path: str, output_path: str):
"""Extrai audio de referencia do video."""
print(f"[2/4] Extraindo audio de referencia...")
cmd = [
'ffmpeg', '-y',
'-i', video_path,
'-ar', '22050',
'-ac', '1',
'-t', '15',
output_path
]
subprocess.run(cmd, capture_output=True, check=True)
return output_path
def generate_cloned_audio(text: str, voice_ref: str, output_path: str):
"""Gera audio com voz clonada."""
print(f"[3/4] Gerando audio com StyleTTS2...")
from styletts2 import tts
import scipy.io.wavfile as wavfile
my_tts = tts.StyleTTS2()
wav = my_tts.inference(
text,
target_voice_path=voice_ref,
diffusion_steps=10
)
wavfile.write(output_path, 24000, wav)
return output_path
def run_lipsync(video_path: str, audio_path: str, output_dir: str):
"""Executa lip sync."""
print(f"[4/4] Executando lip sync...")
import yaml
# Criar config
config = {
'task_0': {
'video_path': os.path.abspath(video_path),
'audio_path': os.path.abspath(audio_path),
'bbox_shift': 5
}
}
config_file = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False)
yaml.dump(config, config_file)
config_file.close()
musetalk_dir = os.environ.get('MUSETALK_DIR', '/root/musetalk-space')
cmd = [
'python3', '-m', 'scripts.inference',
'--inference_config', config_file.name,
'--result_dir', output_dir
]
subprocess.run(cmd, cwd=musetalk_dir, check=True)
os.unlink(config_file.name)
# Encontrar video de saida
for f in os.listdir(os.path.join(output_dir, 'v15')):
if f.endswith('.mp4'):
return os.path.join(output_dir, 'v15', f)
return None
def main():
parser = argparse.ArgumentParser(description='Pipeline completo de video com lip sync')
parser.add_argument('--youtube-url', '-y', required=True, help='URL do YouTube')
parser.add_argument('--text', '-t', required=True, help='Texto para falar')
parser.add_argument('--output', '-o', default='./output', help='Diretorio de saida')
parser.add_argument('--start', '-s', type=int, default=0, help='Segundo inicial do video')
parser.add_argument('--duration', '-d', type=int, default=15, help='Duracao em segundos')
args = parser.parse_args()
# Criar diretorio de saida
os.makedirs(args.output, exist_ok=True)
# Arquivos temporarios
video_path = os.path.join(args.output, 'source_video.mp4')
voice_ref_path = os.path.join(args.output, 'voice_ref.wav')
audio_path = os.path.join(args.output, 'generated_audio.wav')
# Executar pipeline
download_youtube_video(args.youtube_url, video_path, args.start, args.duration)
extract_voice_reference(video_path, voice_ref_path)
generate_cloned_audio(args.text, voice_ref_path, audio_path)
final_video = run_lipsync(video_path, audio_path, args.output)
print(f"\n{'='*50}")
print(f"Pipeline concluido!")
print(f"Video final: {final_video}")
print(f"{'='*50}")
if __name__ == '__main__':
main()
|