EADX
#3
by
eadx
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- .gitattributes +0 -36
- xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav +0 -3
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav +0 -3
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav +0 -3
- xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav +0 -3
- xtts-v2/eng/AsmrRacoon/config.json +0 -158
- xtts-v2/eng/AsmrRacoon/dataset.zip +0 -3
- xtts-v2/eng/AsmrRacoon/model.pth +0 -3
- xtts-v2/eng/AsmrRacoon/speakers_xtts.pth +0 -3
- xtts-v2/eng/AsmrRacoon/vocab.json +0 -0
- xtts-v2/eng/Awkwafina/Awkwafina_16000.wav +0 -3
- xtts-v2/eng/Awkwafina/Awkwafina_24000.wav +0 -3
- xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav +0 -3
- xtts-v2/eng/Awkwafina/config.json +0 -158
- xtts-v2/eng/Awkwafina/dataset.zip +0 -3
- xtts-v2/eng/Awkwafina/model.pth +0 -3
- xtts-v2/eng/Awkwafina/quiet_ref.mp3 +0 -3
- xtts-v2/eng/Awkwafina/speakers_xtts.pth +0 -3
- xtts-v2/eng/Awkwafina/vocab.json +0 -0
- xtts-v2/eng/BadCartmanSouthPark/BadCartmanSouthPark.zip +0 -3
- xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav +0 -3
- xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav +0 -3
- xtts-v2/eng/JohnButlerASMR/config.json +0 -159
- xtts-v2/eng/JohnButlerASMR/dataset.zip +0 -3
- xtts-v2/eng/JohnButlerASMR/model.pth +0 -3
- xtts-v2/eng/JohnButlerASMR/ref.wav +0 -3
- xtts-v2/eng/JohnButlerASMR/vocab.json +0 -0
- xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav +0 -3
- xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav +0 -3
- xtts-v2/eng/JohnMulaney/config.json +0 -159
- xtts-v2/eng/JohnMulaney/dataset.zip +0 -3
- xtts-v2/eng/JohnMulaney/model.pth +0 -3
- xtts-v2/eng/JohnMulaney/ref.wav +0 -3
- xtts-v2/eng/JohnMulaney/vocab.json +0 -0
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/config.json +0 -158
- xtts-v2/eng/PeterGriffinFamilyGuy/dataset.zip +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/model.pth +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/speakers_xtts.pth +0 -3
- xtts-v2/eng/PeterGriffinFamilyGuy/vocab.json +0 -0
- xtts-v2/eng/RafeBeckley/RafeBeckley.wav +0 -3
- xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav +0 -3
- xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav +0 -3
- xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav +0 -3
- xtts-v2/eng/RafeBeckley/config.json +0 -158
- xtts-v2/eng/RafeBeckley/dataset.zip +0 -3
- xtts-v2/eng/RafeBeckley/model.pth +0 -3
.gitattributes
CHANGED
|
@@ -125,39 +125,3 @@ xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_16000.wav filter=lfs diff=lfs merge=lf
|
|
| 125 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 126 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 127 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile.wav filter=lfs diff=lfs merge=lfs -text
|
| 128 |
-
xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 129 |
-
xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 130 |
-
xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 131 |
-
xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav filter=lfs diff=lfs merge=lfs -text
|
| 132 |
-
xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 133 |
-
xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 134 |
-
xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_converted_00000025.wav filter=lfs diff=lfs merge=lfs -text
|
| 135 |
-
xtts-v2/eng/SladeTeenTitans/SladeTeenTitans_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 136 |
-
xtts-v2/eng/SladeTeenTitans/SladeTeenTitans.wav filter=lfs diff=lfs merge=lfs -text
|
| 137 |
-
xtts-v2/rus/Konishev/ref.wav filter=lfs diff=lfs merge=lfs -text
|
| 138 |
-
xtts-v2/eng/Awkwafina/Awkwafina_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 139 |
-
xtts-v2/eng/Awkwafina/Awkwafina_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 140 |
-
xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 141 |
-
xtts-v2/eng/Awkwafina/quiet_ref.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 142 |
-
xtts-v2/eng/SubZeroMKX/SubZeroMKX_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 143 |
-
xtts-v2/eng/SubZeroMKX/SubZeroMKX_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 144 |
-
xtts-v2/eng/SubZeroMKX/SubZeroMKX.wav filter=lfs diff=lfs merge=lfs -text
|
| 145 |
-
xtts-v2/eng/Top15s/Top15s_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 146 |
-
xtts-v2/eng/Top15s/Top15s_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 147 |
-
xtts-v2/eng/Top15s/Top15s_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 148 |
-
xtts-v2/eng/Top15s/Top15s.wav filter=lfs diff=lfs merge=lfs -text
|
| 149 |
-
xtts-v2/eng/Top15s/Top15sTrainingData.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 150 |
-
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav filter=lfs diff=lfs merge=lfs -text
|
| 151 |
-
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav filter=lfs diff=lfs merge=lfs -text
|
| 152 |
-
xtts-v2/eng/JohnButlerASMR/ref.wav filter=lfs diff=lfs merge=lfs -text
|
| 153 |
-
xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav filter=lfs diff=lfs merge=lfs -text
|
| 154 |
-
xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav filter=lfs diff=lfs merge=lfs -text
|
| 155 |
-
xtts-v2/eng/JohnMulaney/ref.wav filter=lfs diff=lfs merge=lfs -text
|
| 156 |
-
xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 157 |
-
xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 158 |
-
xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 159 |
-
xtts-v2/eng/RafeBeckley/RafeBeckley.wav filter=lfs diff=lfs merge=lfs -text
|
| 160 |
-
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav filter=lfs diff=lfs merge=lfs -text
|
| 161 |
-
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 162 |
-
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 163 |
-
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 125 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_24000.wav filter=lfs diff=lfs merge=lfs -text
|
| 126 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile_generated_example.wav filter=lfs diff=lfs merge=lfs -text
|
| 127 |
xtts-v2/eng/RelaxForAWhile/RelaxForAWhile.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/AsmrRacoon.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a31ad6fbba04eb20f9ccbd34c8b78fbf5d697036035af37b18a66281316daf24
|
| 3 |
-
size 3752718
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/AsmrRacoon_16000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ed0022ad554d5c064c8a9a59932f2d76c731d91c75d97c36c18088d1685b9fb9
|
| 3 |
-
size 1250958
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/AsmrRacoon_24000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e550baa3825adc6a67ff6ecff23127046db1bf0e66f7e693e8c909217b3d03f3
|
| 3 |
-
size 1876398
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/AsmrRacoon_generated_example.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8cce4fbe854e4f5022163bdf60fa2c4e1ad36cb721e9804069c04fb157f3b0b0
|
| 3 |
-
size 389710
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/config.json
DELETED
|
@@ -1,158 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6153,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja"
|
| 147 |
-
],
|
| 148 |
-
"temperature": 0.75,
|
| 149 |
-
"length_penalty": 1.0,
|
| 150 |
-
"repetition_penalty": 10.0,
|
| 151 |
-
"top_k": 50,
|
| 152 |
-
"top_p": 0.85,
|
| 153 |
-
"num_gpt_outputs": 1,
|
| 154 |
-
"gpt_cond_len": 30,
|
| 155 |
-
"gpt_cond_chunk_len": 4,
|
| 156 |
-
"max_ref_len": 30,
|
| 157 |
-
"sound_norm_refs": false
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:3c3af20563020c8b71da9e1b8aab2302b1d9b52dd606f0bc7b9d7d2a6227c505
|
| 3 |
-
size 110685528
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:530ead1f38e8aa28ba686d70758615f9dcdc2cc985a36c225e38021eaea5f1d4
|
| 3 |
-
size 1863948438
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/speakers_xtts.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
|
| 3 |
-
size 7754818
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/AsmrRacoon/vocab.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
xtts-v2/eng/Awkwafina/Awkwafina_16000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8d22b3d88f634befaa0e287446cb6906784563f13b7602df0733dade14934b00
|
| 3 |
-
size 385038
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/Awkwafina_24000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:eb0e790689d74d7b618589d26ab0d5a999ff1b7b255845c5f9102b51d930ec0f
|
| 3 |
-
size 577518
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/Awkwafina_generated_example.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:cf03b297299ba1f98c323308b98979788f7e9d86cd6879519f4e2046dbee0e6e
|
| 3 |
-
size 378960
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/config.json
DELETED
|
@@ -1,158 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6153,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja"
|
| 147 |
-
],
|
| 148 |
-
"temperature": 0.75,
|
| 149 |
-
"length_penalty": 1.0,
|
| 150 |
-
"repetition_penalty": 10.0,
|
| 151 |
-
"top_k": 50,
|
| 152 |
-
"top_p": 0.85,
|
| 153 |
-
"num_gpt_outputs": 1,
|
| 154 |
-
"gpt_cond_len": 30,
|
| 155 |
-
"gpt_cond_chunk_len": 4,
|
| 156 |
-
"max_ref_len": 30,
|
| 157 |
-
"sound_norm_refs": false
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:08eff786cda00924fd248d0597c90d7e1f2a654d5051e2a7e16c7e732e2a6431
|
| 3 |
-
size 96880139
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:fbcd3da027ec9815f78dc7b1fffa133caa2b3ebe2cd9ac3a72bb26bf55b27062
|
| 3 |
-
size 1863948630
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/quiet_ref.mp3
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1fb7602046d5fef3ba3becd314c18542cfba29d225899d26e86ac06d7d7d2fb9
|
| 3 |
-
size 117152
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/speakers_xtts.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
|
| 3 |
-
size 7754818
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/Awkwafina/vocab.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
xtts-v2/eng/BadCartmanSouthPark/BadCartmanSouthPark.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:78caaab499455586889a50e68148105fb739bb7a9463ae820e57a870a92f2b52
|
| 3 |
-
size 1727537542
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_22khz.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:771650b040d485e0687fe40379472d27c4a5c5ef154acfc8f0ae696bba48df7a
|
| 3 |
-
size 1063770
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/JohnButlerASMR_24khz.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:fbe36977362327bc9fd04cbbd14e40b9a79c9740f8dda4eb5bbdbe444b9f98b1
|
| 3 |
-
size 1157838
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/config.json
DELETED
|
@@ -1,159 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6681,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja",
|
| 147 |
-
"hi"
|
| 148 |
-
],
|
| 149 |
-
"temperature": 0.75,
|
| 150 |
-
"length_penalty": 1.0,
|
| 151 |
-
"repetition_penalty": 5.0,
|
| 152 |
-
"top_k": 50,
|
| 153 |
-
"top_p": 0.85,
|
| 154 |
-
"num_gpt_outputs": 1,
|
| 155 |
-
"gpt_cond_len": 30,
|
| 156 |
-
"gpt_cond_chunk_len": 4,
|
| 157 |
-
"max_ref_len": 30,
|
| 158 |
-
"sound_norm_refs": false
|
| 159 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:fb851a8dc075d06d22822c31fa1f06392a101d57afc2f2f44a476707221a6111
|
| 3 |
-
size 228344815
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:8a44f502429e143b80da7578178f6bddd900795631dd5306720627342a96eb86
|
| 3 |
-
size 1868275926
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/ref.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:143cb2e0835c1edbde82c650fdab003b783711d7bce7ae6b0b668a02de60b0ad
|
| 3 |
-
size 2315598
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnButlerASMR/vocab.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
xtts-v2/eng/JohnMulaney/JohnMulaney_22khz.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:87e3c129eb3c165cb201768f2da76a6a444fbcc090b1149b4f91dfbd746da290
|
| 3 |
-
size 788586
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/JohnMulaney_24khz.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ab22fbabfcbc4195ce80f8a9cc04e17e043f96e76cbc81965d287afd639223dd
|
| 3 |
-
size 858318
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/config.json
DELETED
|
@@ -1,159 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6681,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja",
|
| 147 |
-
"hi"
|
| 148 |
-
],
|
| 149 |
-
"temperature": 0.75,
|
| 150 |
-
"length_penalty": 1.0,
|
| 151 |
-
"repetition_penalty": 5.0,
|
| 152 |
-
"top_k": 50,
|
| 153 |
-
"top_p": 0.85,
|
| 154 |
-
"num_gpt_outputs": 1,
|
| 155 |
-
"gpt_cond_len": 30,
|
| 156 |
-
"gpt_cond_chunk_len": 4,
|
| 157 |
-
"max_ref_len": 30,
|
| 158 |
-
"sound_norm_refs": false
|
| 159 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:89d24c8c24bc3841d83ca2d1d721b9dde87ba4a592a114dcced545968e58dd2d
|
| 3 |
-
size 110231783
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5f8db4ed5378d504c1212dd0ff63cb3fb8785b8567cabb5724849f6ed701b6eb
|
| 3 |
-
size 1868275926
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/ref.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7f605584ae157c40e54e8b8145267a290da705704f1e731a190ac4033dca7263
|
| 3 |
-
size 3433040
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/JohnMulaney/vocab.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:e7822016434b50ea91251c4341bbbe4dbb360b1c8626f86d29bacc120fa779cd
|
| 3 |
-
size 5963600
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_16000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2e7f8116a4b19442c6ae394edda90802285a1137c2d476423cad7ec41b3c4e1e
|
| 3 |
-
size 993998
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_24000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1d8a0143cc5d730b5762cd52bb0139a620fd19c0a4d8b9a518a041c0a4b20ee8
|
| 3 |
-
size 1490958
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/PeterGriffinFamilyGuy_generated_example.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:d2b56eda560d6a72cdd3b0a6fd096daa0ffb27056eb53707686b8e94d48cbdd4
|
| 3 |
-
size 405584
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/config.json
DELETED
|
@@ -1,158 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6153,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja"
|
| 147 |
-
],
|
| 148 |
-
"temperature": 0.75,
|
| 149 |
-
"length_penalty": 1.0,
|
| 150 |
-
"repetition_penalty": 10.0,
|
| 151 |
-
"top_k": 50,
|
| 152 |
-
"top_p": 0.85,
|
| 153 |
-
"num_gpt_outputs": 1,
|
| 154 |
-
"gpt_cond_len": 30,
|
| 155 |
-
"gpt_cond_chunk_len": 4,
|
| 156 |
-
"max_ref_len": 30,
|
| 157 |
-
"sound_norm_refs": false
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:5f7ef467663be687b369688f16125090d565a0bb528215607996ca1ead6e1a63
|
| 3 |
-
size 78219316
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:bc8b369e54d7d30a41e56cabbf83b942042546c81f675a72932ec604f11a4cc9
|
| 3 |
-
size 1863948630
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/speakers_xtts.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f0f6137c19a4eab0cbbe4c99b5babacf68b1746e50da90807708c10e645b943b
|
| 3 |
-
size 7754818
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/PeterGriffinFamilyGuy/vocab.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
xtts-v2/eng/RafeBeckley/RafeBeckley.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:95d105d1ddc00542c43a7689f45443948e98004c25cdf57d43236914a36762f7
|
| 3 |
-
size 5475532
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/RafeBeckley_16000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:2560c5c8ed2d87dd9ba9121a3e9f5d03c03acd2c25a35df15192227bdad26fc5
|
| 3 |
-
size 993358
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/RafeBeckley_24000.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:fc090b0b5b91ec4d8665ddc2c945dcfc3c1fea687be84af810846b81074b1fd1
|
| 3 |
-
size 1489998
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/RafeBeckley_generated_example.wav
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:0db9dda56c2e7729e18046c35b33207dae48f44136ab7c8e38aa5471a9512d35
|
| 3 |
-
size 445520
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/config.json
DELETED
|
@@ -1,158 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"output_path": "output",
|
| 3 |
-
"logger_uri": null,
|
| 4 |
-
"run_name": "run",
|
| 5 |
-
"project_name": null,
|
| 6 |
-
"run_description": "\ud83d\udc38Coqui trainer run.",
|
| 7 |
-
"print_step": 25,
|
| 8 |
-
"plot_step": 100,
|
| 9 |
-
"model_param_stats": false,
|
| 10 |
-
"wandb_entity": null,
|
| 11 |
-
"dashboard_logger": "tensorboard",
|
| 12 |
-
"save_on_interrupt": true,
|
| 13 |
-
"log_model_step": null,
|
| 14 |
-
"save_step": 10000,
|
| 15 |
-
"save_n_checkpoints": 5,
|
| 16 |
-
"save_checkpoints": true,
|
| 17 |
-
"save_all_best": false,
|
| 18 |
-
"save_best_after": 10000,
|
| 19 |
-
"target_loss": null,
|
| 20 |
-
"print_eval": false,
|
| 21 |
-
"test_delay_epochs": 0,
|
| 22 |
-
"run_eval": true,
|
| 23 |
-
"run_eval_steps": null,
|
| 24 |
-
"distributed_backend": "nccl",
|
| 25 |
-
"distributed_url": "tcp://localhost:54321",
|
| 26 |
-
"mixed_precision": false,
|
| 27 |
-
"precision": "fp16",
|
| 28 |
-
"epochs": 1000,
|
| 29 |
-
"batch_size": 32,
|
| 30 |
-
"eval_batch_size": 16,
|
| 31 |
-
"grad_clip": 0.0,
|
| 32 |
-
"scheduler_after_epoch": true,
|
| 33 |
-
"lr": 0.001,
|
| 34 |
-
"optimizer": "radam",
|
| 35 |
-
"optimizer_params": null,
|
| 36 |
-
"lr_scheduler": null,
|
| 37 |
-
"lr_scheduler_params": {},
|
| 38 |
-
"use_grad_scaler": false,
|
| 39 |
-
"allow_tf32": false,
|
| 40 |
-
"cudnn_enable": true,
|
| 41 |
-
"cudnn_deterministic": false,
|
| 42 |
-
"cudnn_benchmark": false,
|
| 43 |
-
"training_seed": 54321,
|
| 44 |
-
"model": "xtts",
|
| 45 |
-
"num_loader_workers": 0,
|
| 46 |
-
"num_eval_loader_workers": 0,
|
| 47 |
-
"use_noise_augment": false,
|
| 48 |
-
"audio": {
|
| 49 |
-
"sample_rate": 22050,
|
| 50 |
-
"output_sample_rate": 24000
|
| 51 |
-
},
|
| 52 |
-
"use_phonemes": false,
|
| 53 |
-
"phonemizer": null,
|
| 54 |
-
"phoneme_language": null,
|
| 55 |
-
"compute_input_seq_cache": false,
|
| 56 |
-
"text_cleaner": null,
|
| 57 |
-
"enable_eos_bos_chars": false,
|
| 58 |
-
"test_sentences_file": "",
|
| 59 |
-
"phoneme_cache_path": null,
|
| 60 |
-
"characters": null,
|
| 61 |
-
"add_blank": false,
|
| 62 |
-
"batch_group_size": 0,
|
| 63 |
-
"loss_masking": null,
|
| 64 |
-
"min_audio_len": 1,
|
| 65 |
-
"max_audio_len": Infinity,
|
| 66 |
-
"min_text_len": 1,
|
| 67 |
-
"max_text_len": Infinity,
|
| 68 |
-
"compute_f0": false,
|
| 69 |
-
"compute_energy": false,
|
| 70 |
-
"compute_linear_spec": false,
|
| 71 |
-
"precompute_num_workers": 0,
|
| 72 |
-
"start_by_longest": false,
|
| 73 |
-
"shuffle": false,
|
| 74 |
-
"drop_last": false,
|
| 75 |
-
"datasets": [
|
| 76 |
-
{
|
| 77 |
-
"formatter": "",
|
| 78 |
-
"dataset_name": "",
|
| 79 |
-
"path": "",
|
| 80 |
-
"meta_file_train": "",
|
| 81 |
-
"ignored_speakers": null,
|
| 82 |
-
"language": "",
|
| 83 |
-
"phonemizer": "",
|
| 84 |
-
"meta_file_val": "",
|
| 85 |
-
"meta_file_attn_mask": ""
|
| 86 |
-
}
|
| 87 |
-
],
|
| 88 |
-
"test_sentences": [],
|
| 89 |
-
"eval_split_max_size": null,
|
| 90 |
-
"eval_split_size": 0.01,
|
| 91 |
-
"use_speaker_weighted_sampler": false,
|
| 92 |
-
"speaker_weighted_sampler_alpha": 1.0,
|
| 93 |
-
"use_language_weighted_sampler": false,
|
| 94 |
-
"language_weighted_sampler_alpha": 1.0,
|
| 95 |
-
"use_length_weighted_sampler": false,
|
| 96 |
-
"length_weighted_sampler_alpha": 1.0,
|
| 97 |
-
"model_args": {
|
| 98 |
-
"gpt_batch_size": 1,
|
| 99 |
-
"enable_redaction": false,
|
| 100 |
-
"kv_cache": true,
|
| 101 |
-
"gpt_checkpoint": null,
|
| 102 |
-
"clvp_checkpoint": null,
|
| 103 |
-
"decoder_checkpoint": null,
|
| 104 |
-
"num_chars": 255,
|
| 105 |
-
"tokenizer_file": "",
|
| 106 |
-
"gpt_max_audio_tokens": 605,
|
| 107 |
-
"gpt_max_text_tokens": 402,
|
| 108 |
-
"gpt_max_prompt_tokens": 70,
|
| 109 |
-
"gpt_layers": 30,
|
| 110 |
-
"gpt_n_model_channels": 1024,
|
| 111 |
-
"gpt_n_heads": 16,
|
| 112 |
-
"gpt_number_text_tokens": 6153,
|
| 113 |
-
"gpt_start_text_token": null,
|
| 114 |
-
"gpt_stop_text_token": null,
|
| 115 |
-
"gpt_num_audio_tokens": 1026,
|
| 116 |
-
"gpt_start_audio_token": 1024,
|
| 117 |
-
"gpt_stop_audio_token": 1025,
|
| 118 |
-
"gpt_code_stride_len": 1024,
|
| 119 |
-
"gpt_use_masking_gt_prompt_approach": true,
|
| 120 |
-
"gpt_use_perceiver_resampler": true,
|
| 121 |
-
"input_sample_rate": 22050,
|
| 122 |
-
"output_sample_rate": 24000,
|
| 123 |
-
"output_hop_length": 256,
|
| 124 |
-
"decoder_input_dim": 1024,
|
| 125 |
-
"d_vector_dim": 512,
|
| 126 |
-
"cond_d_vector_in_each_upsampling_layer": true,
|
| 127 |
-
"duration_const": 102400
|
| 128 |
-
},
|
| 129 |
-
"model_dir": null,
|
| 130 |
-
"languages": [
|
| 131 |
-
"en",
|
| 132 |
-
"es",
|
| 133 |
-
"fr",
|
| 134 |
-
"de",
|
| 135 |
-
"it",
|
| 136 |
-
"pt",
|
| 137 |
-
"pl",
|
| 138 |
-
"tr",
|
| 139 |
-
"ru",
|
| 140 |
-
"nl",
|
| 141 |
-
"cs",
|
| 142 |
-
"ar",
|
| 143 |
-
"zh-cn",
|
| 144 |
-
"hu",
|
| 145 |
-
"ko",
|
| 146 |
-
"ja"
|
| 147 |
-
],
|
| 148 |
-
"temperature": 0.75,
|
| 149 |
-
"length_penalty": 1.0,
|
| 150 |
-
"repetition_penalty": 10.0,
|
| 151 |
-
"top_k": 50,
|
| 152 |
-
"top_p": 0.85,
|
| 153 |
-
"num_gpt_outputs": 1,
|
| 154 |
-
"gpt_cond_len": 30,
|
| 155 |
-
"gpt_cond_chunk_len": 4,
|
| 156 |
-
"max_ref_len": 30,
|
| 157 |
-
"sound_norm_refs": false
|
| 158 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/dataset.zip
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c903976d2309e354ce25588345545d13d6ea3260624507beb689c7a33753dbb5
|
| 3 |
-
size 372991588
|
|
|
|
|
|
|
|
|
|
|
|
xtts-v2/eng/RafeBeckley/model.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:77394126f73ea8aea62b63c9b521128056cc0d3e1afee8437968133c276a5476
|
| 3 |
-
size 1863948630
|
|
|
|
|
|
|
|
|
|
|
|