ASLP-lab
/

WSChuan-ASR

Automatic Speech Recognition

Model card Files Files and versions

WSChuan-ASR / infer_paraformer.py

ASLP-lab's picture

Upload 6 files

0741e0c verified 3 months ago

2.28 kB

	import argparse
	import json
	import os
	from funasr import AutoModel


	def read_wav_scp(wav_scp_file: str):
	"""读取 wav.scp 文件，返回 (id, wav_path) 元组列表。"""
	wav_files = []
	with open(wav_scp_file, 'r') as f:
	for line in f:
	id, wav_path = line.strip().split(" ", 1) # 只根据第一个空格切分
	wav_files.append((id, wav_path))
	return wav_files


	def save_results(results, output_file: str):
	"""将推理结果保存到指定的文件中，格式为 'key text' 每行一条。"""
	with open(output_file, 'w') as f:
	for result in results:
	key = result.get("key", "")
	text = result.get("text", "")
	f.write(f"{key} {text}\n")


	def main():
	# 解析命令行参数
	parser = argparse.ArgumentParser(description="Run speech recognition inference")
	parser.add_argument('--model', type=str, required=True, help="Model name or path")
	parser.add_argument('--wav_scp_file', type=str, required=True, help="Path to wav.scp file")
	parser.add_argument('--output_dir', type=str, required=True, help="Directory to save inference results")
	parser.add_argument('--device', type=str, default="cpu", choices=["cpu", "cuda"], help="Device to run inference on")
	parser.add_argument('--output_file', type=str, required=True, help="File to save the inference results")

	args = parser.parse_args()

	# 初始化模型
	print(f"Initializing model {args.model}...")
	model = AutoModel(model=args.model, device=args.device)

	# 读取 wav.scp 文件
	wav_files = read_wav_scp(args.wav_scp_file)

	# 存储所有推理结果
	all_results = []

	# 遍历每个音频文件并进行推理
	for id, wav_path in wav_files:
	print(f"正在处理音频文件 {id}: {wav_path}")
	res = model.generate(wav_path)
	print(f"推理结果: {res}")

	if res:
	# 提取推理结果中的 key 和 text
	key = id
	text = res[0].get("text", "")
	all_results.append({"key": key, "text": text})

	# 将推理结果保存到文件
	save_results(all_results, args.output_file)
	print(f"推理结果已保存到 {args.output_file}")


	if __name__ == "__main__":
	main()