Upload feature extractor
Browse files- feature_extraction_maest.py +15 -15
- preprocessor_config.json +0 -0
feature_extraction_maest.py
CHANGED
|
@@ -99,12 +99,21 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
|
|
| 99 |
self.std = std
|
| 100 |
self.return_attention_mask = return_attention_mask
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
window_length=self.n_fft,
|
| 104 |
name="hann",
|
| 105 |
-
)
|
| 106 |
|
| 107 |
-
|
| 108 |
num_frequency_bins=self.n_fft // 2 + 1,
|
| 109 |
num_mel_filters=self.num_mel_bins,
|
| 110 |
min_frequency=0,
|
|
@@ -112,24 +121,15 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
|
|
| 112 |
sampling_rate=self.sampling_rate,
|
| 113 |
norm="slaney",
|
| 114 |
mel_scale="slaney",
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
def _extract_fbank_features(
|
| 118 |
-
self,
|
| 119 |
-
waveform: np.ndarray,
|
| 120 |
-
max_length: int,
|
| 121 |
-
) -> np.ndarray:
|
| 122 |
-
"""
|
| 123 |
-
Get mel-spectrogram features using audio_utils.
|
| 124 |
-
"""
|
| 125 |
|
| 126 |
melspec = spectrogram(
|
| 127 |
waveform,
|
| 128 |
-
window=
|
| 129 |
frame_length=self.n_fft,
|
| 130 |
hop_length=self.hop_length,
|
| 131 |
power=2,
|
| 132 |
-
mel_filters=
|
| 133 |
min_value=1e-30,
|
| 134 |
mel_floor=1e-30,
|
| 135 |
pad_mode="constant",
|
|
|
|
| 99 |
self.std = std
|
| 100 |
self.return_attention_mask = return_attention_mask
|
| 101 |
|
| 102 |
+
def _extract_fbank_features(
|
| 103 |
+
self,
|
| 104 |
+
waveform: np.ndarray,
|
| 105 |
+
max_length: int,
|
| 106 |
+
) -> np.ndarray:
|
| 107 |
+
"""
|
| 108 |
+
Get mel-spectrogram features using audio_utils.
|
| 109 |
+
"""
|
| 110 |
+
|
| 111 |
+
window = window_function(
|
| 112 |
window_length=self.n_fft,
|
| 113 |
name="hann",
|
| 114 |
+
)
|
| 115 |
|
| 116 |
+
mel_fb = mel_filter_bank(
|
| 117 |
num_frequency_bins=self.n_fft // 2 + 1,
|
| 118 |
num_mel_filters=self.num_mel_bins,
|
| 119 |
min_frequency=0,
|
|
|
|
| 121 |
sampling_rate=self.sampling_rate,
|
| 122 |
norm="slaney",
|
| 123 |
mel_scale="slaney",
|
| 124 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
melspec = spectrogram(
|
| 127 |
waveform,
|
| 128 |
+
window=window,
|
| 129 |
frame_length=self.n_fft,
|
| 130 |
hop_length=self.hop_length,
|
| 131 |
power=2,
|
| 132 |
+
mel_filters=mel_fb,
|
| 133 |
min_value=1e-30,
|
| 134 |
mel_floor=1e-30,
|
| 135 |
pad_mode="constant",
|
preprocessor_config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|