Spaces:

Warholt
/

CaroTTS-DE

Running on Zero

App Files Files Community

Warholt commited on 25 days ago

Commit

822739a

1 Parent(s): d5fd481

add character counter

Browse files

Files changed (1) hide show

app.py +20 -12

app.py CHANGED Viewed

@@ -123,7 +123,7 @@ def synthesize_speech_cpu(text: str, voice: str, pace: float = 1.0):
     Synthesize speech using ONNX models on CPU.
     """
     if not text.strip():
-        return None, ""
     # Preprocess text
     preprocessed_text = preprocess_german_text(text)
@@ -156,7 +156,7 @@ def synthesize_speech_cpu(text: str, voice: str, pace: float = 1.0):
     sample_rate = 44100
     audio_array = audio.squeeze()
-    return (sample_rate, audio_array), preprocessed_text
 # --- 4. GPU Inference Function ---
@@ -167,7 +167,7 @@ def synthesize_speech(text: str, voice: str, pace: float = 1.0):
     for the duration of this function.
     """
     if not text.strip():
-        return None, ""
     # Preprocess text: convert numbers, dates, decimals to spoken form
     preprocessed_text = preprocess_german_text(text)
@@ -202,7 +202,7 @@ def synthesize_speech(text: str, voice: str, pace: float = 1.0):
     sample_rate = 44100
     audio_array = audio.squeeze().cpu().numpy()
-    return (sample_rate, audio_array), preprocessed_text
 # --- 5. Combined Inference Function ---
@@ -232,9 +232,11 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
         with gr.Column():
             text_input = gr.Textbox(
                 label="Text to synthesize",
-                value="Guten Tag. Herzlich Willkommen zu dieser Demonstration deutscher Sprachsynthese-Modelle. Es stehen Ihnen zwei Stimmen zur Auswahl: Caro und Karlsson. Probieren Sie es aus!",
                 lines=3,
             )
             voice_dropdown = gr.Dropdown(
                 choices=["Caro", "Karlsson"], label="Voice", value="Karlsson"
             )
@@ -243,17 +245,12 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
             )
             use_gpu_checkbox = gr.Checkbox(
                 label="Use GPU (ZeroGPU)",
-                value=False,
                 info="Enable for faster inference on GPU. Disable for CPU inference (slower but always available).",
             )
             generate_btn = gr.Button("Generate Speech 🔊", variant="primary")
         with gr.Column():
-            preprocessed_output = gr.Textbox(
-                label="Preprocessed Text (what will be spoken)",
-                lines=3,
-                interactive=False,
-            )
             audio_output = gr.Audio(label="Generated Audio", type="numpy")
     # Example sentences section
@@ -292,10 +289,21 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
         label="Try these examples:",
     )
     generate_btn.click(
         fn=synthesize_speech_combined,
         inputs=[text_input, voice_dropdown, pace_slider, use_gpu_checkbox],
-        outputs=[audio_output, preprocessed_output],
     )
 if __name__ == "__main__":

     Synthesize speech using ONNX models on CPU.
     """
     if not text.strip():
+        return None
     # Preprocess text
     preprocessed_text = preprocess_german_text(text)
     sample_rate = 44100
     audio_array = audio.squeeze()
+    return (sample_rate, audio_array)
 # --- 4. GPU Inference Function ---
     for the duration of this function.
     """
     if not text.strip():
+        return None
     # Preprocess text: convert numbers, dates, decimals to spoken form
     preprocessed_text = preprocess_german_text(text)
     sample_rate = 44100
     audio_array = audio.squeeze().cpu().numpy()
+    return (sample_rate, audio_array)
 # --- 5. Combined Inference Function ---
         with gr.Column():
             text_input = gr.Textbox(
                 label="Text to synthesize",
+                value="Guten Tag. Herzlich Willkommen zu dieser Demonstration. Es stehen Ihnen zwei Stimmen zur Auswahl: Caro und Karlsson. Sie können außerdem die Sprechgeschwindigkeit anpassen. Unten finden Sie ein Paar Beispielsätze. Probieren Sie es aus!",
                 lines=3,
+                max_length=1024,
             )
+            char_counter = gr.Markdown("**Characters: 0 / 1024**")
             voice_dropdown = gr.Dropdown(
                 choices=["Caro", "Karlsson"], label="Voice", value="Karlsson"
             )
             )
             use_gpu_checkbox = gr.Checkbox(
                 label="Use GPU (ZeroGPU)",
+                value=True,
                 info="Enable for faster inference on GPU. Disable for CPU inference (slower but always available).",
             )
             generate_btn = gr.Button("Generate Speech 🔊", variant="primary")
         with gr.Column():
             audio_output = gr.Audio(label="Generated Audio", type="numpy")
     # Example sentences section
         label="Try these examples:",
     )
+    # Update character counter
+    def update_char_count(text):
+        count = len(text)
+        return f"**Characters: {count} / 1024**"
+    text_input.change(
+        fn=update_char_count,
+        inputs=text_input,
+        outputs=char_counter,
+    )
     generate_btn.click(
         fn=synthesize_speech_combined,
         inputs=[text_input, voice_dropdown, pace_slider, use_gpu_checkbox],
+        outputs=audio_output,
     )
 if __name__ == "__main__":