import sys import torch import gradio as gr import opencc # 添加第三方库路径 sys.path.append('third_party/Matcha-TTS') from cosyvoice.cli.cosyvoice import CosyVoice2 from cosyvoice.utils.file_utils import load_wav from huggingface_hub import hf_hub_download # 繁简转换 converter = opencc.OpenCC('s2t.json') # 加载模型 cosyvoice_base = CosyVoice2( 'ASLP-lab/WSYue-TTS-Cosyvoice2', load_jit=False, load_trt=False, load_vllm=False, fp16=False ) cosyvoice_zjg = CosyVoice2( 'ASLP-lab/WSYue-TTS-Cosyvoice2-zjg', load_jit=False, load_trt=False, load_vllm=False, fp16=False ) # cosyvoice_biaobei = CosyVoice2( # 'pretrained_models/CosyVoice2-yue-biaobei', # load_jit=False, load_trt=False, load_vllm=False, fp16=False # ) def tts_inference(model_choice, text, prompt_audio): # 选择模型和默认音频 if model_choice == "CosyVoice2-张悦楷粤语评书": model = cosyvoice_zjg prompt_audio = "asset/sg_017_090.wav" elif model_choice == "CosyVoice2-精品女音": model = cosyvoice_base prompt_audio = "asset/F01_中立_20054.wav" elif model_choice == "CosyVoice2-base": model = cosyvoice_base if prompt_audio is None: return None, "请上传参考音频" else: return None, "未知模型" # 繁简转换 text = converter.convert(text) prompt_speech_16k = load_wav(prompt_audio, 16000) all_speech = [] for _, j in enumerate( model.inference_instruct2( text, "用粤语说这句话", prompt_speech_16k, stream=False ) ): all_speech.append(j['tts_speech']) concatenated_speech = torch.cat(all_speech, dim=1) audio_numpy = concatenated_speech.squeeze(0).cpu().numpy() sample_rate = model.sample_rate return (sample_rate, audio_numpy), f"生成成功:{text}" # ---- Gradio Interface ---- demo = gr.Interface( fn=tts_inference, inputs=[ gr.Dropdown( ["CosyVoice2-base", "CosyVoice2-张悦楷粤语评书"], # ["CosyVoice2-base", "CosyVoice2-张悦楷粤语评书", "CosyVoice2-精品女音"], label="选择模型", value="CosyVoice2-base" ), gr.Textbox(lines=2, label="输入文本"), # gr.Audio(source="upload", type="filepath", label="上传参考音频(仅 CosyVoice2-base 必需)") gr.Audio(sources=["upload"], type="filepath", label="上传参考音频(仅 CosyVoice2-base 必需)") ], outputs=[ gr.Audio(type="numpy", label="生成的语音"), gr.Textbox(label="状态信息") ] ) demo.launch()