Upload 6 files
Browse files
Paraformer-large-Chuan/am.mvn
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<Nnet>
|
| 2 |
+
<Splice> 560 560
|
| 3 |
+
[ 0 ]
|
| 4 |
+
<AddShift> 560 560
|
| 5 |
+
<LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
|
| 6 |
+
<Rescale> 560 560
|
| 7 |
+
<LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
|
| 8 |
+
</Nnet>
|
Paraformer-large-Chuan/config.yaml
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model: Paraformer
|
| 2 |
+
model_conf:
|
| 3 |
+
ctc_weight: 0.0
|
| 4 |
+
lsm_weight: 0.1
|
| 5 |
+
length_normalized_loss: true
|
| 6 |
+
predictor_weight: 1.0
|
| 7 |
+
predictor_bias: 1
|
| 8 |
+
sampling_ratio: 0.75
|
| 9 |
+
encoder: SANMEncoder
|
| 10 |
+
encoder_conf:
|
| 11 |
+
output_size: 512
|
| 12 |
+
attention_heads: 4
|
| 13 |
+
linear_units: 2048
|
| 14 |
+
num_blocks: 50
|
| 15 |
+
dropout_rate: 0.1
|
| 16 |
+
positional_dropout_rate: 0.1
|
| 17 |
+
attention_dropout_rate: 0.1
|
| 18 |
+
input_layer: pe
|
| 19 |
+
pos_enc_class: SinusoidalPositionEncoder
|
| 20 |
+
normalize_before: true
|
| 21 |
+
kernel_size: 11
|
| 22 |
+
sanm_shfit: 0
|
| 23 |
+
selfattention_layer_type: sanm
|
| 24 |
+
decoder: ParaformerSANMDecoder
|
| 25 |
+
decoder_conf:
|
| 26 |
+
attention_heads: 4
|
| 27 |
+
linear_units: 2048
|
| 28 |
+
num_blocks: 16
|
| 29 |
+
dropout_rate: 0.1
|
| 30 |
+
positional_dropout_rate: 0.1
|
| 31 |
+
self_attention_dropout_rate: 0.1
|
| 32 |
+
src_attention_dropout_rate: 0.1
|
| 33 |
+
att_layer_num: 16
|
| 34 |
+
kernel_size: 11
|
| 35 |
+
sanm_shfit: 0
|
| 36 |
+
predictor: CifPredictorV2
|
| 37 |
+
predictor_conf:
|
| 38 |
+
idim: 512
|
| 39 |
+
threshold: 1.0
|
| 40 |
+
l_order: 1
|
| 41 |
+
r_order: 1
|
| 42 |
+
tail_threshold: 0.45
|
| 43 |
+
frontend: WavFrontend
|
| 44 |
+
frontend_conf:
|
| 45 |
+
fs: 16000
|
| 46 |
+
window: hamming
|
| 47 |
+
n_mels: 80
|
| 48 |
+
frame_length: 25
|
| 49 |
+
frame_shift: 10
|
| 50 |
+
lfr_m: 7
|
| 51 |
+
lfr_n: 6
|
| 52 |
+
cmvn_file: ./speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/am.mvn
|
| 53 |
+
specaug: SpecAugLFR
|
| 54 |
+
specaug_conf:
|
| 55 |
+
apply_time_warp: false
|
| 56 |
+
time_warp_window: 5
|
| 57 |
+
time_warp_mode: bicubic
|
| 58 |
+
apply_freq_mask: true
|
| 59 |
+
freq_mask_width_range:
|
| 60 |
+
- 0
|
| 61 |
+
- 30
|
| 62 |
+
lfr_rate: 6
|
| 63 |
+
num_freq_mask: 1
|
| 64 |
+
apply_time_mask: true
|
| 65 |
+
time_mask_width_range:
|
| 66 |
+
- 0
|
| 67 |
+
- 12
|
| 68 |
+
num_time_mask: 1
|
| 69 |
+
train_conf:
|
| 70 |
+
accum_grad: 1
|
| 71 |
+
grad_clip: 5
|
| 72 |
+
max_epoch: 5
|
| 73 |
+
val_scheduler_criterion:
|
| 74 |
+
- valid
|
| 75 |
+
- acc
|
| 76 |
+
best_model_criterion:
|
| 77 |
+
- - valid
|
| 78 |
+
- acc
|
| 79 |
+
- max
|
| 80 |
+
keep_nbest_models: 100
|
| 81 |
+
log_interval: 500
|
| 82 |
+
resume: true
|
| 83 |
+
validate_interval: 5000
|
| 84 |
+
save_checkpoint_interval: 5000
|
| 85 |
+
avg_nbest_model: 10
|
| 86 |
+
use_deepspeed: false
|
| 87 |
+
deepspeed_config: ./config/ds_stage1.json
|
| 88 |
+
optim: adam
|
| 89 |
+
optim_conf:
|
| 90 |
+
lr: 0.0002
|
| 91 |
+
scheduler: warmuplr
|
| 92 |
+
scheduler_conf:
|
| 93 |
+
warmup_steps: 30000
|
| 94 |
+
dataset: AudioDataset
|
| 95 |
+
dataset_conf:
|
| 96 |
+
index_ds: IndexDSJsonl
|
| 97 |
+
batch_sampler: BatchSampler
|
| 98 |
+
batch_type: token
|
| 99 |
+
batch_size: 300
|
| 100 |
+
max_token_length: 2048
|
| 101 |
+
buffer_size: 500
|
| 102 |
+
shuffle: true
|
| 103 |
+
num_workers: 4
|
| 104 |
+
data_split_num: 1
|
| 105 |
+
sort_size: 1024
|
| 106 |
+
tokenizer: CharTokenizer
|
| 107 |
+
tokenizer_conf:
|
| 108 |
+
unk_symbol: <unk>
|
| 109 |
+
split_with_space: true
|
| 110 |
+
token_list: ./speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/tokens.json
|
| 111 |
+
seg_dict_file: ./speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/seg_dict
|
| 112 |
+
input_size: 560
|
| 113 |
+
ctc_conf:
|
| 114 |
+
dropout_rate: 0.0
|
| 115 |
+
ctc_type: builtin
|
| 116 |
+
reduce: true
|
| 117 |
+
ignore_nan_grad: true
|
| 118 |
+
normalize: null
|
| 119 |
+
init_param: /home/work_nfs9/sywang/code/paraformer/outputs/model.pt
|
| 120 |
+
config: /home/work_nfs9/sywang/code/paraformer/outputs/config.yaml
|
| 121 |
+
is_training: true
|
| 122 |
+
train_data_set_list: data/train.jsonl
|
| 123 |
+
valid_data_set_list: data/val.jsonl
|
| 124 |
+
output_dir: ./outputs
|
| 125 |
+
model_path: /home/work_nfs9/sywang/code/paraformer/outputs
|
| 126 |
+
device: cpu
|
Paraformer-large-Chuan/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cb8f6c514ada6029c3504f6629a4756a583fe801e56a383a095636dc4c3ee77
|
| 3 |
+
size 2642208221
|
Paraformer-large-Chuan/seg_dict
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Paraformer-large-Chuan/tokens.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
infer_paraformer.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from funasr import AutoModel
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def read_wav_scp(wav_scp_file: str):
|
| 8 |
+
"""读取 wav.scp 文件,返回 (id, wav_path) 元组列表。"""
|
| 9 |
+
wav_files = []
|
| 10 |
+
with open(wav_scp_file, 'r') as f:
|
| 11 |
+
for line in f:
|
| 12 |
+
id, wav_path = line.strip().split(" ", 1) # 只根据第一个空格切分
|
| 13 |
+
wav_files.append((id, wav_path))
|
| 14 |
+
return wav_files
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def save_results(results, output_file: str):
|
| 18 |
+
"""将推理结果保存到指定的文件中,格式为 'key text' 每行一条。"""
|
| 19 |
+
with open(output_file, 'w') as f:
|
| 20 |
+
for result in results:
|
| 21 |
+
key = result.get("key", "")
|
| 22 |
+
text = result.get("text", "")
|
| 23 |
+
f.write(f"{key} {text}\n")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def main():
|
| 27 |
+
# 解析命令行参数
|
| 28 |
+
parser = argparse.ArgumentParser(description="Run speech recognition inference")
|
| 29 |
+
parser.add_argument('--model', type=str, required=True, help="Model name or path")
|
| 30 |
+
parser.add_argument('--wav_scp_file', type=str, required=True, help="Path to wav.scp file")
|
| 31 |
+
parser.add_argument('--output_dir', type=str, required=True, help="Directory to save inference results")
|
| 32 |
+
parser.add_argument('--device', type=str, default="cpu", choices=["cpu", "cuda"], help="Device to run inference on")
|
| 33 |
+
parser.add_argument('--output_file', type=str, required=True, help="File to save the inference results")
|
| 34 |
+
|
| 35 |
+
args = parser.parse_args()
|
| 36 |
+
|
| 37 |
+
# 初始化模型
|
| 38 |
+
print(f"Initializing model {args.model}...")
|
| 39 |
+
model = AutoModel(model=args.model, device=args.device)
|
| 40 |
+
|
| 41 |
+
# 读取 wav.scp 文件
|
| 42 |
+
wav_files = read_wav_scp(args.wav_scp_file)
|
| 43 |
+
|
| 44 |
+
# 存储所有推理结果
|
| 45 |
+
all_results = []
|
| 46 |
+
|
| 47 |
+
# 遍历每个音频文件并进行推理
|
| 48 |
+
for id, wav_path in wav_files:
|
| 49 |
+
print(f"正在处理音频文件 {id}: {wav_path}")
|
| 50 |
+
res = model.generate(wav_path)
|
| 51 |
+
print(f"推理结果: {res}")
|
| 52 |
+
|
| 53 |
+
if res:
|
| 54 |
+
# 提取推理结果中的 key 和 text
|
| 55 |
+
key = id
|
| 56 |
+
text = res[0].get("text", "")
|
| 57 |
+
all_results.append({"key": key, "text": text})
|
| 58 |
+
|
| 59 |
+
# 将推理结果保存到文件
|
| 60 |
+
save_results(all_results, args.output_file)
|
| 61 |
+
print(f"推理结果已保存到 {args.output_file}")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
main()
|