Spaces:
Runtime error
Runtime error
faster-whisper improvement: 1. load wav to mem. to speedup 2. show diarization progression
Browse files- app.py +11 -8
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -15,6 +15,8 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess
|
|
| 15 |
|
| 16 |
from termcolor import cprint
|
| 17 |
import time
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# —————— Model Lists ——————
|
| 20 |
WHISPER_MODELS = [
|
|
@@ -159,11 +161,11 @@ def _transcribe_fwhisper_cpu(model_id, language, audio_path, enable_diar):
|
|
| 159 |
# Diarization-only branch
|
| 160 |
if enable_diar:
|
| 161 |
diarizer = get_diarization_pipe()
|
|
|
|
|
|
|
| 162 |
diarizer.to(torch.device('cpu'))
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
diary_endtime=time.time()
|
| 166 |
-
cprint(f'[_transcribe_fwhisper_cpu] Time spent in CPU diarization: {diary_endtime - diary_starttime} seconds', 'yellow')
|
| 167 |
snippets = []
|
| 168 |
for turn, _, speaker in diary.itertracks(yield_label=True):
|
| 169 |
start_ms = int(turn.start * 1000)
|
|
@@ -190,10 +192,11 @@ def _transcribe_fwhisper_gpu(model_id, language, audio_path, enable_diar):
|
|
| 190 |
if enable_diar:
|
| 191 |
diarizer = get_diarization_pipe()
|
| 192 |
diarizer.to(torch.device('cuda'))
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
| 197 |
snippets = []
|
| 198 |
for turn, _, speaker in diary.itertracks(yield_label=True):
|
| 199 |
start_ms = int(turn.start * 1000)
|
|
|
|
| 15 |
|
| 16 |
from termcolor import cprint
|
| 17 |
import time
|
| 18 |
+
import torchaudio
|
| 19 |
+
from pyannote.audio.pipelines.utils.hook import ProgressHook
|
| 20 |
|
| 21 |
# —————— Model Lists ——————
|
| 22 |
WHISPER_MODELS = [
|
|
|
|
| 161 |
# Diarization-only branch
|
| 162 |
if enable_diar:
|
| 163 |
diarizer = get_diarization_pipe()
|
| 164 |
+
# Pre-loading audio files in memory may result in faster processing
|
| 165 |
+
waveform, sample_rate = torchaudio.load(audio_path)
|
| 166 |
diarizer.to(torch.device('cpu'))
|
| 167 |
+
with ProgressHook() as hook:
|
| 168 |
+
diary = diarizer({"waveform": waveform, "sample_rate": sample_rate}, hook=hook)
|
|
|
|
|
|
|
| 169 |
snippets = []
|
| 170 |
for turn, _, speaker in diary.itertracks(yield_label=True):
|
| 171 |
start_ms = int(turn.start * 1000)
|
|
|
|
| 192 |
if enable_diar:
|
| 193 |
diarizer = get_diarization_pipe()
|
| 194 |
diarizer.to(torch.device('cuda'))
|
| 195 |
+
# Pre-loading audio files in memory may result in faster processing
|
| 196 |
+
waveform, sample_rate = torchaudio.load(audio_path)
|
| 197 |
+
waveform.to(torch.device('cuda'))
|
| 198 |
+
with ProgressHook() as hook:
|
| 199 |
+
diary = diarizer({"waveform": waveform, "sample_rate": sample_rate}, hook=hook)
|
| 200 |
snippets = []
|
| 201 |
for turn, _, speaker in diary.itertracks(yield_label=True):
|
| 202 |
start_ms = int(turn.start * 1000)
|
requirements.txt
CHANGED
|
@@ -3,6 +3,7 @@ gradio>=3.39.0
|
|
| 3 |
|
| 4 |
# Core ASR
|
| 5 |
torch>=2.0.0
|
|
|
|
| 6 |
transformers>=4.35.0
|
| 7 |
|
| 8 |
# FunASR SenseVoice
|
|
|
|
| 3 |
|
| 4 |
# Core ASR
|
| 5 |
torch>=2.0.0
|
| 6 |
+
torchaudio
|
| 7 |
transformers>=4.35.0
|
| 8 |
|
| 9 |
# FunASR SenseVoice
|