Luigi commited on
Commit
cd4de4c
·
1 Parent(s): 1a23ac4

faster-whisper improvement: 1. load wav to mem. to speedup 2. show diarization progression

Browse files
Files changed (2) hide show
  1. app.py +11 -8
  2. requirements.txt +1 -0
app.py CHANGED
@@ -15,6 +15,8 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess
15
 
16
  from termcolor import cprint
17
  import time
 
 
18
 
19
  # —————— Model Lists ——————
20
  WHISPER_MODELS = [
@@ -159,11 +161,11 @@ def _transcribe_fwhisper_cpu(model_id, language, audio_path, enable_diar):
159
  # Diarization-only branch
160
  if enable_diar:
161
  diarizer = get_diarization_pipe()
 
 
162
  diarizer.to(torch.device('cpu'))
163
- diary_starttime=time.time()
164
- diary = diarizer(audio_path)
165
- diary_endtime=time.time()
166
- cprint(f'[_transcribe_fwhisper_cpu] Time spent in CPU diarization: {diary_endtime - diary_starttime} seconds', 'yellow')
167
  snippets = []
168
  for turn, _, speaker in diary.itertracks(yield_label=True):
169
  start_ms = int(turn.start * 1000)
@@ -190,10 +192,11 @@ def _transcribe_fwhisper_gpu(model_id, language, audio_path, enable_diar):
190
  if enable_diar:
191
  diarizer = get_diarization_pipe()
192
  diarizer.to(torch.device('cuda'))
193
- diary_starttime=time.time()
194
- diary = diarizer(audio_path)
195
- diary_endtime=time.time()
196
- cprint(f'[_transcribe_fwhisper_gpu] Time spent in CUDA diarization: {diary_endtime - diary_starttime} seconds', 'yellow')
 
197
  snippets = []
198
  for turn, _, speaker in diary.itertracks(yield_label=True):
199
  start_ms = int(turn.start * 1000)
 
15
 
16
  from termcolor import cprint
17
  import time
18
+ import torchaudio
19
+ from pyannote.audio.pipelines.utils.hook import ProgressHook
20
 
21
  # —————— Model Lists ——————
22
  WHISPER_MODELS = [
 
161
  # Diarization-only branch
162
  if enable_diar:
163
  diarizer = get_diarization_pipe()
164
+ # Pre-loading audio files in memory may result in faster processing
165
+ waveform, sample_rate = torchaudio.load(audio_path)
166
  diarizer.to(torch.device('cpu'))
167
+ with ProgressHook() as hook:
168
+ diary = diarizer({"waveform": waveform, "sample_rate": sample_rate}, hook=hook)
 
 
169
  snippets = []
170
  for turn, _, speaker in diary.itertracks(yield_label=True):
171
  start_ms = int(turn.start * 1000)
 
192
  if enable_diar:
193
  diarizer = get_diarization_pipe()
194
  diarizer.to(torch.device('cuda'))
195
+ # Pre-loading audio files in memory may result in faster processing
196
+ waveform, sample_rate = torchaudio.load(audio_path)
197
+ waveform.to(torch.device('cuda'))
198
+ with ProgressHook() as hook:
199
+ diary = diarizer({"waveform": waveform, "sample_rate": sample_rate}, hook=hook)
200
  snippets = []
201
  for turn, _, speaker in diary.itertracks(yield_label=True):
202
  start_ms = int(turn.start * 1000)
requirements.txt CHANGED
@@ -3,6 +3,7 @@ gradio>=3.39.0
3
 
4
  # Core ASR
5
  torch>=2.0.0
 
6
  transformers>=4.35.0
7
 
8
  # FunASR SenseVoice
 
3
 
4
  # Core ASR
5
  torch>=2.0.0
6
+ torchaudio
7
  transformers>=4.35.0
8
 
9
  # FunASR SenseVoice