WSChuan-ASR / infer_paraformer.py
ASLP-lab's picture
Upload 6 files
0741e0c verified
raw
history blame
2.28 kB
import argparse
import json
import os
from funasr import AutoModel
def read_wav_scp(wav_scp_file: str):
"""读取 wav.scp 文件,返回 (id, wav_path) 元组列表。"""
wav_files = []
with open(wav_scp_file, 'r') as f:
for line in f:
id, wav_path = line.strip().split(" ", 1) # 只根据第一个空格切分
wav_files.append((id, wav_path))
return wav_files
def save_results(results, output_file: str):
"""将推理结果保存到指定的文件中,格式为 'key text' 每行一条。"""
with open(output_file, 'w') as f:
for result in results:
key = result.get("key", "")
text = result.get("text", "")
f.write(f"{key} {text}\n")
def main():
# 解析命令行参数
parser = argparse.ArgumentParser(description="Run speech recognition inference")
parser.add_argument('--model', type=str, required=True, help="Model name or path")
parser.add_argument('--wav_scp_file', type=str, required=True, help="Path to wav.scp file")
parser.add_argument('--output_dir', type=str, required=True, help="Directory to save inference results")
parser.add_argument('--device', type=str, default="cpu", choices=["cpu", "cuda"], help="Device to run inference on")
parser.add_argument('--output_file', type=str, required=True, help="File to save the inference results")
args = parser.parse_args()
# 初始化模型
print(f"Initializing model {args.model}...")
model = AutoModel(model=args.model, device=args.device)
# 读取 wav.scp 文件
wav_files = read_wav_scp(args.wav_scp_file)
# 存储所有推理结果
all_results = []
# 遍历每个音频文件并进行推理
for id, wav_path in wav_files:
print(f"正在处理音频文件 {id}: {wav_path}")
res = model.generate(wav_path)
print(f"推理结果: {res}")
if res:
# 提取推理结果中的 key 和 text
key = id
text = res[0].get("text", "")
all_results.append({"key": key, "text": text})
# 将推理结果保存到文件
save_results(all_results, args.output_file)
print(f"推理结果已保存到 {args.output_file}")
if __name__ == "__main__":
main()