MuseTalk / scripts /run_lipsync.py
marcos
Add StyleTTS2 integration scripts for voice cloning and lip sync pipeline
66e2a44
#!/usr/bin/env python3
"""
Script para executar lip sync com MuseTalk V1.5.
Uso: python run_lipsync.py --video input.mp4 --audio audio.wav --output output.mp4
"""
import argparse
import os
import subprocess
import tempfile
import yaml
def create_config(video_path: str, audio_path: str, bbox_shift: int = 5) -> str:
"""Cria arquivo de configuracao temporario para o MuseTalk."""
config = {
'task_0': {
'video_path': os.path.abspath(video_path),
'audio_path': os.path.abspath(audio_path),
'bbox_shift': bbox_shift
}
}
# Criar arquivo temporario
config_file = tempfile.NamedTemporaryFile(
mode='w',
suffix='.yaml',
delete=False
)
yaml.dump(config, config_file)
config_file.close()
return config_file.name
def run_lipsync(video_path: str, audio_path: str, output_dir: str, bbox_shift: int = 5):
"""Executa lip sync usando MuseTalk."""
# Verificar arquivos
if not os.path.exists(video_path):
raise FileNotFoundError(f"Video nao encontrado: {video_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio nao encontrado: {audio_path}")
# Criar config
config_path = create_config(video_path, audio_path, bbox_shift)
print(f"Config criado: {config_path}")
# Criar diretorio de saida
os.makedirs(output_dir, exist_ok=True)
# Diretorio do MuseTalk
musetalk_dir = os.environ.get('MUSETALK_DIR', '/root/musetalk-space')
# Executar MuseTalk
cmd = [
'python3', '-m', 'scripts.inference',
'--inference_config', config_path,
'--result_dir', output_dir
]
print(f"Executando: {' '.join(cmd)}")
print(f"Diretorio: {musetalk_dir}")
result = subprocess.run(
cmd,
cwd=musetalk_dir,
capture_output=False
)
# Limpar config temporario
os.unlink(config_path)
if result.returncode != 0:
raise RuntimeError(f"MuseTalk falhou com codigo {result.returncode}")
# Encontrar video de saida
video_name = os.path.splitext(os.path.basename(video_path))[0]
audio_name = os.path.splitext(os.path.basename(audio_path))[0]
expected_output = os.path.join(output_dir, 'v15', f'{video_name}_{audio_name}.mp4')
if os.path.exists(expected_output):
print(f"Video gerado: {expected_output}")
return expected_output
else:
# Procurar qualquer mp4 no diretorio
for f in os.listdir(os.path.join(output_dir, 'v15')):
if f.endswith('.mp4'):
return os.path.join(output_dir, 'v15', f)
return None
def main():
parser = argparse.ArgumentParser(description='Executar lip sync com MuseTalk')
parser.add_argument('--video', '-v', required=True, help='Video de entrada')
parser.add_argument('--audio', '-a', required=True, help='Audio para sincronizar')
parser.add_argument('--output', '-o', default='./lipsync_output', help='Diretorio de saida')
parser.add_argument('--bbox-shift', '-b', type=int, default=5, help='Ajuste do bounding box')
args = parser.parse_args()
output = run_lipsync(
video_path=args.video,
audio_path=args.audio,
output_dir=args.output,
bbox_shift=args.bbox_shift
)
if output:
print(f"\nSucesso! Video salvo em: {output}")
else:
print("\nErro: Nenhum video foi gerado")
if __name__ == '__main__':
main()