Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| """ | |
| Pipeline completo: YouTube -> Audio com voz clonada -> Lip Sync | |
| Uso: python full_pipeline.py --youtube-url "..." --text "Seu texto" --output video_final.mp4 | |
| """ | |
| import argparse | |
| import os | |
| import subprocess | |
| import tempfile | |
| # Fix para PyTorch 2.6+ | |
| import torch | |
| original_load = torch.load | |
| def patched_load(*args, **kwargs): | |
| kwargs['weights_only'] = False | |
| return original_load(*args, **kwargs) | |
| torch.load = patched_load | |
| def download_youtube_video(url: str, output_path: str, start: int = 0, duration: int = 15): | |
| """Baixa video do YouTube.""" | |
| print(f"[1/4] Baixando video do YouTube...") | |
| cmd = [ | |
| 'yt-dlp', | |
| '-f', 'best[height<=720]', | |
| '--postprocessor-args', f'ffmpeg:-ss {start} -t {duration}', | |
| '-o', output_path, | |
| url | |
| ] | |
| subprocess.run(cmd, check=True) | |
| return output_path | |
| def extract_voice_reference(video_path: str, output_path: str): | |
| """Extrai audio de referencia do video.""" | |
| print(f"[2/4] Extraindo audio de referencia...") | |
| cmd = [ | |
| 'ffmpeg', '-y', | |
| '-i', video_path, | |
| '-ar', '22050', | |
| '-ac', '1', | |
| '-t', '15', | |
| output_path | |
| ] | |
| subprocess.run(cmd, capture_output=True, check=True) | |
| return output_path | |
| def generate_cloned_audio(text: str, voice_ref: str, output_path: str): | |
| """Gera audio com voz clonada.""" | |
| print(f"[3/4] Gerando audio com StyleTTS2...") | |
| from styletts2 import tts | |
| import scipy.io.wavfile as wavfile | |
| my_tts = tts.StyleTTS2() | |
| wav = my_tts.inference( | |
| text, | |
| target_voice_path=voice_ref, | |
| diffusion_steps=10 | |
| ) | |
| wavfile.write(output_path, 24000, wav) | |
| return output_path | |
| def run_lipsync(video_path: str, audio_path: str, output_dir: str): | |
| """Executa lip sync.""" | |
| print(f"[4/4] Executando lip sync...") | |
| import yaml | |
| # Criar config | |
| config = { | |
| 'task_0': { | |
| 'video_path': os.path.abspath(video_path), | |
| 'audio_path': os.path.abspath(audio_path), | |
| 'bbox_shift': 5 | |
| } | |
| } | |
| config_file = tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) | |
| yaml.dump(config, config_file) | |
| config_file.close() | |
| musetalk_dir = os.environ.get('MUSETALK_DIR', '/root/musetalk-space') | |
| cmd = [ | |
| 'python3', '-m', 'scripts.inference', | |
| '--inference_config', config_file.name, | |
| '--result_dir', output_dir | |
| ] | |
| subprocess.run(cmd, cwd=musetalk_dir, check=True) | |
| os.unlink(config_file.name) | |
| # Encontrar video de saida | |
| for f in os.listdir(os.path.join(output_dir, 'v15')): | |
| if f.endswith('.mp4'): | |
| return os.path.join(output_dir, 'v15', f) | |
| return None | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Pipeline completo de video com lip sync') | |
| parser.add_argument('--youtube-url', '-y', required=True, help='URL do YouTube') | |
| parser.add_argument('--text', '-t', required=True, help='Texto para falar') | |
| parser.add_argument('--output', '-o', default='./output', help='Diretorio de saida') | |
| parser.add_argument('--start', '-s', type=int, default=0, help='Segundo inicial do video') | |
| parser.add_argument('--duration', '-d', type=int, default=15, help='Duracao em segundos') | |
| args = parser.parse_args() | |
| # Criar diretorio de saida | |
| os.makedirs(args.output, exist_ok=True) | |
| # Arquivos temporarios | |
| video_path = os.path.join(args.output, 'source_video.mp4') | |
| voice_ref_path = os.path.join(args.output, 'voice_ref.wav') | |
| audio_path = os.path.join(args.output, 'generated_audio.wav') | |
| # Executar pipeline | |
| download_youtube_video(args.youtube_url, video_path, args.start, args.duration) | |
| extract_voice_reference(video_path, voice_ref_path) | |
| generate_cloned_audio(args.text, voice_ref_path, audio_path) | |
| final_video = run_lipsync(video_path, audio_path, args.output) | |
| print(f"\n{'='*50}") | |
| print(f"Pipeline concluido!") | |
| print(f"Video final: {final_video}") | |
| print(f"{'='*50}") | |
| if __name__ == '__main__': | |
| main() | |