This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +0 -36
  2. xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav +0 -3
  3. xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav +0 -3
  4. xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav +0 -3
  5. xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav +0 -3
  6. xtts-v2/eng/AsmrRacoon/config.json +0 -158
  7. xtts-v2/eng/AsmrRacoon/dataset.zip +0 -3
  8. xtts-v2/eng/AsmrRacoon/model.pth +0 -3
  9. xtts-v2/eng/AsmrRacoon/speakers_xtts.pth +0 -3
  10. xtts-v2/eng/AsmrRacoon/vocab.json +0 -0
  11. xtts-v2/eng/Awkwafina/Awkwafina_16000.wav +0 -3
  12. xtts-v2/eng/Awkwafina/Awkwafina_24000.wav +0 -3
  13. xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav +0 -3
  14. xtts-v2/eng/Awkwafina/config.json +0 -158
  15. xtts-v2/eng/Awkwafina/dataset.zip +0 -3
  16. xtts-v2/eng/Awkwafina/model.pth +0 -3
  17. xtts-v2/eng/Awkwafina/quiet_ref.mp3 +0 -3
  18. xtts-v2/eng/Awkwafina/speakers_xtts.pth +0 -3
  19. xtts-v2/eng/Awkwafina/vocab.json +0 -0
  20. xtts-v2/eng/BadCartmanSouthPark/BadCartmanSouthPark.zip +0 -3
  21. xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav +0 -3
  22. xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav +0 -3
  23. xtts-v2/eng/JohnButlerASMR/config.json +0 -159
  24. xtts-v2/eng/JohnButlerASMR/dataset.zip +0 -3
  25. xtts-v2/eng/JohnButlerASMR/model.pth +0 -3
  26. xtts-v2/eng/JohnButlerASMR/ref.wav +0 -3
  27. xtts-v2/eng/JohnButlerASMR/vocab.json +0 -0
  28. xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav +0 -3
  29. xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav +0 -3
  30. xtts-v2/eng/JohnMulaney/config.json +0 -159
  31. xtts-v2/eng/JohnMulaney/dataset.zip +0 -3
  32. xtts-v2/eng/JohnMulaney/model.pth +0 -3
  33. xtts-v2/eng/JohnMulaney/ref.wav +0 -3
  34. xtts-v2/eng/JohnMulaney/vocab.json +0 -0
  35. xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav +0 -3
  36. xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav +0 -3
  37. xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav +0 -3
  38. xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav +0 -3
  39. xtts-v2/eng/PeterGriffinFamilyGuy/config.json +0 -158
  40. xtts-v2/eng/PeterGriffinFamilyGuy/dataset.zip +0 -3
  41. xtts-v2/eng/PeterGriffinFamilyGuy/model.pth +0 -3
  42. xtts-v2/eng/PeterGriffinFamilyGuy/speakers_xtts.pth +0 -3
  43. xtts-v2/eng/PeterGriffinFamilyGuy/vocab.json +0 -0
  44. xtts-v2/eng/RafeBeckley/RafeBeckley.wav +0 -3
  45. xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav +0 -3
  46. xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav +0 -3
  47. xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav +0 -3
  48. xtts-v2/eng/RafeBeckley/config.json +0 -158
  49. xtts-v2/eng/RafeBeckley/dataset.zip +0 -3
  50. xtts-v2/eng/RafeBeckley/model.pth +0 -3
.gitattributes CHANGED
@@ -125,39 +125,3 @@ xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_16000.wav filter=lfs diff=lfs merge=lf
125
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_24000.wav filter=lfs diff=lfs merge=lfs -text
126
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_generated_example.wav filter=lfs diff=lfs merge=lfs -text
127
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile.wav filter=lfs diff=lfs merge=lfs -text
128
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav filter=lfs diff=lfs merge=lfs -text
129
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav filter=lfs diff=lfs merge=lfs -text
130
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav filter=lfs diff=lfs merge=lfs -text
131
- xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav filter=lfs diff=lfs merge=lfs -text
132
- xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_16000.wav filter=lfs diff=lfs merge=lfs -text
133
- xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_24000.wav filter=lfs diff=lfs merge=lfs -text
134
- xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_converted_00000025.wav filter=lfs diff=lfs merge=lfs -text
135
- xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_generated_example.wav filter=lfs diff=lfs merge=lfs -text
136
- xtts-v2/eng/SladeTeenTitans/SladeTeenTitans.wav filter=lfs diff=lfs merge=lfs -text
137
- xtts-v2/rus/Konishev/ref.wav filter=lfs diff=lfs merge=lfs -text
138
- xtts-v2/eng/Awkwafina/Awkwafina_16000.wav filter=lfs diff=lfs merge=lfs -text
139
- xtts-v2/eng/Awkwafina/Awkwafina_24000.wav filter=lfs diff=lfs merge=lfs -text
140
- xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav filter=lfs diff=lfs merge=lfs -text
141
- xtts-v2/eng/Awkwafina/quiet_ref.mp3 filter=lfs diff=lfs merge=lfs -text
142
- xtts-v2/eng/SubZeroMKX/SubZeroMKX_24000.wav filter=lfs diff=lfs merge=lfs -text
143
- xtts-v2/eng/SubZeroMKX/SubZeroMKX_generated_example.wav filter=lfs diff=lfs merge=lfs -text
144
- xtts-v2/eng/SubZeroMKX/SubZeroMKX.wav filter=lfs diff=lfs merge=lfs -text
145
- xtts-v2/eng/Top15s/Top15s_16000.wav filter=lfs diff=lfs merge=lfs -text
146
- xtts-v2/eng/Top15s/Top15s_24000.wav filter=lfs diff=lfs merge=lfs -text
147
- xtts-v2/eng/Top15s/Top15s_generated_example.wav filter=lfs diff=lfs merge=lfs -text
148
- xtts-v2/eng/Top15s/Top15s.wav filter=lfs diff=lfs merge=lfs -text
149
- xtts-v2/eng/Top15s/Top15sTrainingData.mp3 filter=lfs diff=lfs merge=lfs -text
150
- xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav filter=lfs diff=lfs merge=lfs -text
151
- xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav filter=lfs diff=lfs merge=lfs -text
152
- xtts-v2/eng/JohnButlerASMR/ref.wav filter=lfs diff=lfs merge=lfs -text
153
- xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav filter=lfs diff=lfs merge=lfs -text
154
- xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav filter=lfs diff=lfs merge=lfs -text
155
- xtts-v2/eng/JohnMulaney/ref.wav filter=lfs diff=lfs merge=lfs -text
156
- xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav filter=lfs diff=lfs merge=lfs -text
157
- xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav filter=lfs diff=lfs merge=lfs -text
158
- xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav filter=lfs diff=lfs merge=lfs -text
159
- xtts-v2/eng/RafeBeckley/RafeBeckley.wav filter=lfs diff=lfs merge=lfs -text
160
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav filter=lfs diff=lfs merge=lfs -text
161
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav filter=lfs diff=lfs merge=lfs -text
162
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav filter=lfs diff=lfs merge=lfs -text
163
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav filter=lfs diff=lfs merge=lfs -text
 
125
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_24000.wav filter=lfs diff=lfs merge=lfs -text
126
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_generated_example.wav filter=lfs diff=lfs merge=lfs -text
127
  xtts-v2/eng/RelaxForAWhile/RelaxForAWhile.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a31ad6fbba04eb20f9ccbd34c8b78fbf5d697036035af37b18a66281316daf24
3
- size 3752718
 
 
 
 
xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed0022ad554d5c064c8a9a59932f2d76c731d91c75d97c36c18088d1685b9fb9
3
- size 1250958
 
 
 
 
xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e550baa3825adc6a67ff6ecff23127046db1bf0e66f7e693e8c909217b3d03f3
3
- size 1876398
 
 
 
 
xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cce4fbe854e4f5022163bdf60fa2c4e1ad36cb721e9804069c04fb157f3b0b0
3
- size 389710
 
 
 
 
xtts-v2/eng/AsmrRacoon/config.json DELETED
@@ -1,158 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6153,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja"
147
- ],
148
- "temperature": 0.75,
149
- "length_penalty": 1.0,
150
- "repetition_penalty": 10.0,
151
- "top_k": 50,
152
- "top_p": 0.85,
153
- "num_gpt_outputs": 1,
154
- "gpt_cond_len": 30,
155
- "gpt_cond_chunk_len": 4,
156
- "max_ref_len": 30,
157
- "sound_norm_refs": false
158
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/AsmrRacoon/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3af20563020c8b71da9e1b8aab2302b1d9b52dd606f0bc7b9d7d2a6227c505
3
- size 110685528
 
 
 
 
xtts-v2/eng/AsmrRacoon/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:530ead1f38e8aa28ba686d70758615f9dcdc2cc985a36c225e38021eaea5f1d4
3
- size 1863948438
 
 
 
 
xtts-v2/eng/AsmrRacoon/speakers_xtts.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
3
- size 7754818
 
 
 
 
xtts-v2/eng/AsmrRacoon/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
xtts-v2/eng/Awkwafina/Awkwafina_16000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d22b3d88f634befaa0e287446cb6906784563f13b7602df0733dade14934b00
3
- size 385038
 
 
 
 
xtts-v2/eng/Awkwafina/Awkwafina_24000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb0e790689d74d7b618589d26ab0d5a999ff1b7b255845c5f9102b51d930ec0f
3
- size 577518
 
 
 
 
xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf03b297299ba1f98c323308b98979788f7e9d86cd6879519f4e2046dbee0e6e
3
- size 378960
 
 
 
 
xtts-v2/eng/Awkwafina/config.json DELETED
@@ -1,158 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6153,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja"
147
- ],
148
- "temperature": 0.75,
149
- "length_penalty": 1.0,
150
- "repetition_penalty": 10.0,
151
- "top_k": 50,
152
- "top_p": 0.85,
153
- "num_gpt_outputs": 1,
154
- "gpt_cond_len": 30,
155
- "gpt_cond_chunk_len": 4,
156
- "max_ref_len": 30,
157
- "sound_norm_refs": false
158
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/Awkwafina/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:08eff786cda00924fd248d0597c90d7e1f2a654d5051e2a7e16c7e732e2a6431
3
- size 96880139
 
 
 
 
xtts-v2/eng/Awkwafina/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbcd3da027ec9815f78dc7b1fffa133caa2b3ebe2cd9ac3a72bb26bf55b27062
3
- size 1863948630
 
 
 
 
xtts-v2/eng/Awkwafina/quiet_ref.mp3 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb7602046d5fef3ba3becd314c18542cfba29d225899d26e86ac06d7d7d2fb9
3
- size 117152
 
 
 
 
xtts-v2/eng/Awkwafina/speakers_xtts.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
3
- size 7754818
 
 
 
 
xtts-v2/eng/Awkwafina/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
xtts-v2/eng/BadCartmanSouthPark/BadCartmanSouthPark.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:78caaab499455586889a50e68148105fb739bb7a9463ae820e57a870a92f2b52
3
- size 1727537542
 
 
 
 
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:771650b040d485e0687fe40379472d27c4a5c5ef154acfc8f0ae696bba48df7a
3
- size 1063770
 
 
 
 
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbe36977362327bc9fd04cbbd14e40b9a79c9740f8dda4eb5bbdbe444b9f98b1
3
- size 1157838
 
 
 
 
xtts-v2/eng/JohnButlerASMR/config.json DELETED
@@ -1,159 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6681,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja",
147
- "hi"
148
- ],
149
- "temperature": 0.75,
150
- "length_penalty": 1.0,
151
- "repetition_penalty": 5.0,
152
- "top_k": 50,
153
- "top_p": 0.85,
154
- "num_gpt_outputs": 1,
155
- "gpt_cond_len": 30,
156
- "gpt_cond_chunk_len": 4,
157
- "max_ref_len": 30,
158
- "sound_norm_refs": false
159
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/JohnButlerASMR/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb851a8dc075d06d22822c31fa1f06392a101d57afc2f2f44a476707221a6111
3
- size 228344815
 
 
 
 
xtts-v2/eng/JohnButlerASMR/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a44f502429e143b80da7578178f6bddd900795631dd5306720627342a96eb86
3
- size 1868275926
 
 
 
 
xtts-v2/eng/JohnButlerASMR/ref.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:143cb2e0835c1edbde82c650fdab003b783711d7bce7ae6b0b668a02de60b0ad
3
- size 2315598
 
 
 
 
xtts-v2/eng/JohnButlerASMR/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:87e3c129eb3c165cb201768f2da76a6a444fbcc090b1149b4f91dfbd746da290
3
- size 788586
 
 
 
 
xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab22fbabfcbc4195ce80f8a9cc04e17e043f96e76cbc81965d287afd639223dd
3
- size 858318
 
 
 
 
xtts-v2/eng/JohnMulaney/config.json DELETED
@@ -1,159 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6681,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja",
147
- "hi"
148
- ],
149
- "temperature": 0.75,
150
- "length_penalty": 1.0,
151
- "repetition_penalty": 5.0,
152
- "top_k": 50,
153
- "top_p": 0.85,
154
- "num_gpt_outputs": 1,
155
- "gpt_cond_len": 30,
156
- "gpt_cond_chunk_len": 4,
157
- "max_ref_len": 30,
158
- "sound_norm_refs": false
159
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/JohnMulaney/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d24c8c24bc3841d83ca2d1d721b9dde87ba4a592a114dcced545968e58dd2d
3
- size 110231783
 
 
 
 
xtts-v2/eng/JohnMulaney/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f8db4ed5378d504c1212dd0ff63cb3fb8785b8567cabb5724849f6ed701b6eb
3
- size 1868275926
 
 
 
 
xtts-v2/eng/JohnMulaney/ref.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f605584ae157c40e54e8b8145267a290da705704f1e731a190ac4033dca7263
3
- size 3433040
 
 
 
 
xtts-v2/eng/JohnMulaney/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7822016434b50ea91251c4341bbbe4dbb360b1c8626f86d29bacc120fa779cd
3
- size 5963600
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e7f8116a4b19442c6ae394edda90802285a1137c2d476423cad7ec41b3c4e1e
3
- size 993998
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d8a0143cc5d730b5762cd52bb0139a620fd19c0a4d8b9a518a041c0a4b20ee8
3
- size 1490958
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b56eda560d6a72cdd3b0a6fd096daa0ffb27056eb53707686b8e94d48cbdd4
3
- size 405584
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/config.json DELETED
@@ -1,158 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6153,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja"
147
- ],
148
- "temperature": 0.75,
149
- "length_penalty": 1.0,
150
- "repetition_penalty": 10.0,
151
- "top_k": 50,
152
- "top_p": 0.85,
153
- "num_gpt_outputs": 1,
154
- "gpt_cond_len": 30,
155
- "gpt_cond_chunk_len": 4,
156
- "max_ref_len": 30,
157
- "sound_norm_refs": false
158
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7ef467663be687b369688f16125090d565a0bb528215607996ca1ead6e1a63
3
- size 78219316
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc8b369e54d7d30a41e56cabbf83b942042546c81f675a72932ec604f11a4cc9
3
- size 1863948630
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/speakers_xtts.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
3
- size 7754818
 
 
 
 
xtts-v2/eng/PeterGriffinFamilyGuy/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
xtts-v2/eng/RafeBeckley/RafeBeckley.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d105d1ddc00542c43a7689f45443948e98004c25cdf57d43236914a36762f7
3
- size 5475532
 
 
 
 
xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2560c5c8ed2d87dd9ba9121a3e9f5d03c03acd2c25a35df15192227bdad26fc5
3
- size 993358
 
 
 
 
xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc090b0b5b91ec4d8665ddc2c945dcfc3c1fea687be84af810846b81074b1fd1
3
- size 1489998
 
 
 
 
xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0db9dda56c2e7729e18046c35b33207dae48f44136ab7c8e38aa5471a9512d35
3
- size 445520
 
 
 
 
xtts-v2/eng/RafeBeckley/config.json DELETED
@@ -1,158 +0,0 @@
1
- {
2
- "output_path": "output",
3
- "logger_uri": null,
4
- "run_name": "run",
5
- "project_name": null,
6
- "run_description": "\ud83d\udc38Coqui trainer run.",
7
- "print_step": 25,
8
- "plot_step": 100,
9
- "model_param_stats": false,
10
- "wandb_entity": null,
11
- "dashboard_logger": "tensorboard",
12
- "save_on_interrupt": true,
13
- "log_model_step": null,
14
- "save_step": 10000,
15
- "save_n_checkpoints": 5,
16
- "save_checkpoints": true,
17
- "save_all_best": false,
18
- "save_best_after": 10000,
19
- "target_loss": null,
20
- "print_eval": false,
21
- "test_delay_epochs": 0,
22
- "run_eval": true,
23
- "run_eval_steps": null,
24
- "distributed_backend": "nccl",
25
- "distributed_url": "tcp://localhost:54321",
26
- "mixed_precision": false,
27
- "precision": "fp16",
28
- "epochs": 1000,
29
- "batch_size": 32,
30
- "eval_batch_size": 16,
31
- "grad_clip": 0.0,
32
- "scheduler_after_epoch": true,
33
- "lr": 0.001,
34
- "optimizer": "radam",
35
- "optimizer_params": null,
36
- "lr_scheduler": null,
37
- "lr_scheduler_params": {},
38
- "use_grad_scaler": false,
39
- "allow_tf32": false,
40
- "cudnn_enable": true,
41
- "cudnn_deterministic": false,
42
- "cudnn_benchmark": false,
43
- "training_seed": 54321,
44
- "model": "xtts",
45
- "num_loader_workers": 0,
46
- "num_eval_loader_workers": 0,
47
- "use_noise_augment": false,
48
- "audio": {
49
- "sample_rate": 22050,
50
- "output_sample_rate": 24000
51
- },
52
- "use_phonemes": false,
53
- "phonemizer": null,
54
- "phoneme_language": null,
55
- "compute_input_seq_cache": false,
56
- "text_cleaner": null,
57
- "enable_eos_bos_chars": false,
58
- "test_sentences_file": "",
59
- "phoneme_cache_path": null,
60
- "characters": null,
61
- "add_blank": false,
62
- "batch_group_size": 0,
63
- "loss_masking": null,
64
- "min_audio_len": 1,
65
- "max_audio_len": Infinity,
66
- "min_text_len": 1,
67
- "max_text_len": Infinity,
68
- "compute_f0": false,
69
- "compute_energy": false,
70
- "compute_linear_spec": false,
71
- "precompute_num_workers": 0,
72
- "start_by_longest": false,
73
- "shuffle": false,
74
- "drop_last": false,
75
- "datasets": [
76
- {
77
- "formatter": "",
78
- "dataset_name": "",
79
- "path": "",
80
- "meta_file_train": "",
81
- "ignored_speakers": null,
82
- "language": "",
83
- "phonemizer": "",
84
- "meta_file_val": "",
85
- "meta_file_attn_mask": ""
86
- }
87
- ],
88
- "test_sentences": [],
89
- "eval_split_max_size": null,
90
- "eval_split_size": 0.01,
91
- "use_speaker_weighted_sampler": false,
92
- "speaker_weighted_sampler_alpha": 1.0,
93
- "use_language_weighted_sampler": false,
94
- "language_weighted_sampler_alpha": 1.0,
95
- "use_length_weighted_sampler": false,
96
- "length_weighted_sampler_alpha": 1.0,
97
- "model_args": {
98
- "gpt_batch_size": 1,
99
- "enable_redaction": false,
100
- "kv_cache": true,
101
- "gpt_checkpoint": null,
102
- "clvp_checkpoint": null,
103
- "decoder_checkpoint": null,
104
- "num_chars": 255,
105
- "tokenizer_file": "",
106
- "gpt_max_audio_tokens": 605,
107
- "gpt_max_text_tokens": 402,
108
- "gpt_max_prompt_tokens": 70,
109
- "gpt_layers": 30,
110
- "gpt_n_model_channels": 1024,
111
- "gpt_n_heads": 16,
112
- "gpt_number_text_tokens": 6153,
113
- "gpt_start_text_token": null,
114
- "gpt_stop_text_token": null,
115
- "gpt_num_audio_tokens": 1026,
116
- "gpt_start_audio_token": 1024,
117
- "gpt_stop_audio_token": 1025,
118
- "gpt_code_stride_len": 1024,
119
- "gpt_use_masking_gt_prompt_approach": true,
120
- "gpt_use_perceiver_resampler": true,
121
- "input_sample_rate": 22050,
122
- "output_sample_rate": 24000,
123
- "output_hop_length": 256,
124
- "decoder_input_dim": 1024,
125
- "d_vector_dim": 512,
126
- "cond_d_vector_in_each_upsampling_layer": true,
127
- "duration_const": 102400
128
- },
129
- "model_dir": null,
130
- "languages": [
131
- "en",
132
- "es",
133
- "fr",
134
- "de",
135
- "it",
136
- "pt",
137
- "pl",
138
- "tr",
139
- "ru",
140
- "nl",
141
- "cs",
142
- "ar",
143
- "zh-cn",
144
- "hu",
145
- "ko",
146
- "ja"
147
- ],
148
- "temperature": 0.75,
149
- "length_penalty": 1.0,
150
- "repetition_penalty": 10.0,
151
- "top_k": 50,
152
- "top_p": 0.85,
153
- "num_gpt_outputs": 1,
154
- "gpt_cond_len": 30,
155
- "gpt_cond_chunk_len": 4,
156
- "max_ref_len": 30,
157
- "sound_norm_refs": false
158
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xtts-v2/eng/RafeBeckley/dataset.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c903976d2309e354ce25588345545d13d6ea3260624507beb689c7a33753dbb5
3
- size 372991588
 
 
 
 
xtts-v2/eng/RafeBeckley/model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:77394126f73ea8aea62b63c9b521128056cc0d3e1afee8437968133c276a5476
3
- size 1863948630