Warholt commited on
Commit
822739a
·
1 Parent(s): d5fd481

add character counter

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -123,7 +123,7 @@ def synthesize_speech_cpu(text: str, voice: str, pace: float = 1.0):
123
  Synthesize speech using ONNX models on CPU.
124
  """
125
  if not text.strip():
126
- return None, ""
127
 
128
  # Preprocess text
129
  preprocessed_text = preprocess_german_text(text)
@@ -156,7 +156,7 @@ def synthesize_speech_cpu(text: str, voice: str, pace: float = 1.0):
156
  sample_rate = 44100
157
  audio_array = audio.squeeze()
158
 
159
- return (sample_rate, audio_array), preprocessed_text
160
 
161
 
162
  # --- 4. GPU Inference Function ---
@@ -167,7 +167,7 @@ def synthesize_speech(text: str, voice: str, pace: float = 1.0):
167
  for the duration of this function.
168
  """
169
  if not text.strip():
170
- return None, ""
171
 
172
  # Preprocess text: convert numbers, dates, decimals to spoken form
173
  preprocessed_text = preprocess_german_text(text)
@@ -202,7 +202,7 @@ def synthesize_speech(text: str, voice: str, pace: float = 1.0):
202
  sample_rate = 44100
203
  audio_array = audio.squeeze().cpu().numpy()
204
 
205
- return (sample_rate, audio_array), preprocessed_text
206
 
207
 
208
  # --- 5. Combined Inference Function ---
@@ -232,9 +232,11 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
232
  with gr.Column():
233
  text_input = gr.Textbox(
234
  label="Text to synthesize",
235
- value="Guten Tag. Herzlich Willkommen zu dieser Demonstration deutscher Sprachsynthese-Modelle. Es stehen Ihnen zwei Stimmen zur Auswahl: Caro und Karlsson. Probieren Sie es aus!",
236
  lines=3,
 
237
  )
 
238
  voice_dropdown = gr.Dropdown(
239
  choices=["Caro", "Karlsson"], label="Voice", value="Karlsson"
240
  )
@@ -243,17 +245,12 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
243
  )
244
  use_gpu_checkbox = gr.Checkbox(
245
  label="Use GPU (ZeroGPU)",
246
- value=False,
247
  info="Enable for faster inference on GPU. Disable for CPU inference (slower but always available).",
248
  )
249
  generate_btn = gr.Button("Generate Speech 🔊", variant="primary")
250
 
251
  with gr.Column():
252
- preprocessed_output = gr.Textbox(
253
- label="Preprocessed Text (what will be spoken)",
254
- lines=3,
255
- interactive=False,
256
- )
257
  audio_output = gr.Audio(label="Generated Audio", type="numpy")
258
 
259
  # Example sentences section
@@ -292,10 +289,21 @@ with gr.Blocks(title="German TTS - Caro & Karlsson") as demo:
292
  label="Try these examples:",
293
  )
294
 
 
 
 
 
 
 
 
 
 
 
 
295
  generate_btn.click(
296
  fn=synthesize_speech_combined,
297
  inputs=[text_input, voice_dropdown, pace_slider, use_gpu_checkbox],
298
- outputs=[audio_output, preprocessed_output],
299
  )
300
 
301
  if __name__ == "__main__":
 
123
  Synthesize speech using ONNX models on CPU.
124
  """
125
  if not text.strip():
126
+ return None
127
 
128
  # Preprocess text
129
  preprocessed_text = preprocess_german_text(text)
 
156
  sample_rate = 44100
157
  audio_array = audio.squeeze()
158
 
159
+ return (sample_rate, audio_array)
160
 
161
 
162
  # --- 4. GPU Inference Function ---
 
167
  for the duration of this function.
168
  """
169
  if not text.strip():
170
+ return None
171
 
172
  # Preprocess text: convert numbers, dates, decimals to spoken form
173
  preprocessed_text = preprocess_german_text(text)
 
202
  sample_rate = 44100
203
  audio_array = audio.squeeze().cpu().numpy()
204
 
205
+ return (sample_rate, audio_array)
206
 
207
 
208
  # --- 5. Combined Inference Function ---
 
232
  with gr.Column():
233
  text_input = gr.Textbox(
234
  label="Text to synthesize",
235
+ value="Guten Tag. Herzlich Willkommen zu dieser Demonstration. Es stehen Ihnen zwei Stimmen zur Auswahl: Caro und Karlsson. Sie können außerdem die Sprechgeschwindigkeit anpassen. Unten finden Sie ein Paar Beispielsätze. Probieren Sie es aus!",
236
  lines=3,
237
+ max_length=1024,
238
  )
239
+ char_counter = gr.Markdown("**Characters: 0 / 1024**")
240
  voice_dropdown = gr.Dropdown(
241
  choices=["Caro", "Karlsson"], label="Voice", value="Karlsson"
242
  )
 
245
  )
246
  use_gpu_checkbox = gr.Checkbox(
247
  label="Use GPU (ZeroGPU)",
248
+ value=True,
249
  info="Enable for faster inference on GPU. Disable for CPU inference (slower but always available).",
250
  )
251
  generate_btn = gr.Button("Generate Speech 🔊", variant="primary")
252
 
253
  with gr.Column():
 
 
 
 
 
254
  audio_output = gr.Audio(label="Generated Audio", type="numpy")
255
 
256
  # Example sentences section
 
289
  label="Try these examples:",
290
  )
291
 
292
+ # Update character counter
293
+ def update_char_count(text):
294
+ count = len(text)
295
+ return f"**Characters: {count} / 1024**"
296
+
297
+ text_input.change(
298
+ fn=update_char_count,
299
+ inputs=text_input,
300
+ outputs=char_counter,
301
+ )
302
+
303
  generate_btn.click(
304
  fn=synthesize_speech_combined,
305
  inputs=[text_input, voice_dropdown, pace_slider, use_gpu_checkbox],
306
+ outputs=audio_output,
307
  )
308
 
309
  if __name__ == "__main__":