Y Phung Nguyen commited on
Commit
b61cc05
·
1 Parent(s): ff07c94

Upd concise prompt. Upd TTS loader

Browse files
Files changed (5) hide show
  1. pipeline.py +1 -1
  2. search.py +1 -1
  3. supervisor.py +2 -2
  4. ui.py +1 -1
  5. voice.py +50 -18
pipeline.py CHANGED
@@ -706,7 +706,7 @@ def stream_chat(
706
 
707
  medical_model_obj, medical_tokenizer = initialize_medical_model(medical_model)
708
 
709
- base_system_prompt = system_prompt if system_prompt else "As a medical specialist, provide clinical and concise answers. Use Markdown format with bullet points. Do not use tables."
710
 
711
  context_sections = []
712
  if clinical_intake_context_block:
 
706
 
707
  medical_model_obj, medical_tokenizer = initialize_medical_model(medical_model)
708
 
709
+ base_system_prompt = system_prompt if system_prompt else "As a medical specialist, provide clinical and concise answers. Use Markdown format with bullet points. Do not use tables. Provide answers directly without conversational prefixes like 'Here is...', 'This is...'. Start with the actual content immediately."
710
 
711
  context_sections = []
712
  if clinical_intake_context_block:
search.py CHANGED
@@ -208,7 +208,7 @@ Search Results:
208
  {combined_content}
209
  Summary:"""
210
 
211
- system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
212
 
213
  result = await call_agent(
214
  user_prompt=user_prompt,
 
208
  {combined_content}
209
  Summary:"""
210
 
211
+ system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately. Provide the summary directly without conversational prefixes like 'Here is...', 'This is...', or 'To summarize...'. Start with the actual content immediately."
212
 
213
  result = await call_agent(
214
  user_prompt=user_prompt,
supervisor.py CHANGED
@@ -739,7 +739,7 @@ Your task:
739
 
740
  Return the final synthesized answer in Markdown format. Do not add meta-commentary or explanations - just provide the final answer."""
741
 
742
- system_prompt = "You are a medical answer synthesis supervisor. Create comprehensive, well-structured final answers from multiple specialist responses."
743
 
744
  result = await call_agent(
745
  user_prompt=prompt,
@@ -861,7 +861,7 @@ Create an enhanced version of the answer that:
861
 
862
  Return the enhanced answer in Markdown format. Do not add meta-commentary."""
863
 
864
- system_prompt = "You are a medical answer enhancement supervisor. Improve answers based on evaluation feedback while maintaining accuracy."
865
 
866
  result = await call_agent(
867
  user_prompt=prompt,
 
739
 
740
  Return the final synthesized answer in Markdown format. Do not add meta-commentary or explanations - just provide the final answer."""
741
 
742
+ system_prompt = "You are a medical answer synthesis supervisor. Create comprehensive, well-structured final answers from multiple specialist responses. Provide the answer directly without conversational prefixes like 'Here is...', 'This is...'. Start with the actual content immediately."
743
 
744
  result = await call_agent(
745
  user_prompt=prompt,
 
861
 
862
  Return the enhanced answer in Markdown format. Do not add meta-commentary."""
863
 
864
+ system_prompt = "You are a medical answer enhancement supervisor. Improve answers based on evaluation feedback while maintaining accuracy. Provide the enhanced answer directly without conversational prefixes like 'Here is...', 'This is...'. Start with the actual content immediately."
865
 
866
  result = await call_agent(
867
  user_prompt=prompt,
ui.py CHANGED
@@ -217,7 +217,7 @@ def create_demo():
217
  )
218
 
219
  system_prompt = gr.Textbox(
220
- value="As a medical specialist, provide detailed and accurate answers based on the provided medical documents and context. Ensure all information is clinically accurate and cite sources when available.",
221
  label="System Prompt",
222
  lines=3
223
  )
 
217
  )
218
 
219
  system_prompt = gr.Textbox(
220
+ value="As a medical specialist, provide detailed and accurate answers based on the provided medical documents and context. Ensure all information is clinically accurate and cite sources when available. Provide answers directly without conversational prefixes like 'Here is...', 'This is...', or 'To answer your question...'. Start with the actual content immediately.",
221
  label="System Prompt",
222
  lines=3
223
  )
voice.py CHANGED
@@ -408,13 +408,42 @@ def _generate_speech_via_mcp(text: str):
408
  logger.warning(f"MCP TTS error (sync wrapper): {e}")
409
  return None
410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  @spaces.GPU(max_duration=120)
 
 
 
 
412
  def generate_speech(text: str):
413
  """Generate speech from text using local maya1 TTS model (with MCP fallback).
414
 
415
  The primary path uses the local TTS model (maya-research/maya1). MCP-based
416
  TTS is only used as a last-resort fallback if the local model is unavailable
417
  or fails.
 
 
418
  """
419
  if not text or len(text.strip()) == 0:
420
  logger.warning("[TTS] Empty text provided")
@@ -422,29 +451,32 @@ def generate_speech(text: str):
422
 
423
  logger.info(f"[TTS] Generating speech for text: {text[:50]}...")
424
 
 
425
  if not TTS_AVAILABLE:
426
- logger.error("[TTS] TTS library not installed. Please install TTS to use voice generation.")
427
- # As a last resort, try MCP-based TTS if available
428
- return _generate_speech_via_mcp(text)
429
-
430
- if config.global_tts_model is None:
431
- logger.info("[TTS] TTS model not loaded, initializing...")
432
- initialize_tts_model()
433
-
434
- if config.global_tts_model is None:
435
- logger.error("[TTS] TTS model not available. Please check dependencies.")
436
- return _generate_speech_via_mcp(text)
437
 
 
438
  try:
439
- logger.info("[TTS] Running TTS generation...")
440
- wav = config.global_tts_model.tts(text)
441
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
442
- sf.write(tmp_file.name, wav, samplerate=22050)
443
- logger.info(f"[TTS] Speech generated successfully: {tmp_file.name}")
444
- return tmp_file.name
 
445
  except Exception as e:
446
- logger.error(f"[TTS] TTS error (local maya1): {e}")
447
  import traceback
448
  logger.debug(f"[TTS] Full traceback: {traceback.format_exc()}")
 
 
449
  return _generate_speech_via_mcp(text)
450
 
 
408
  logger.warning(f"MCP TTS error (sync wrapper): {e}")
409
  return None
410
 
411
+ def _generate_speech_with_gpu(text: str):
412
+ """Internal GPU-decorated function for TTS generation when TTS is available."""
413
+ if config.global_tts_model is None:
414
+ logger.info("[TTS] TTS model not loaded, initializing...")
415
+ initialize_tts_model()
416
+
417
+ if config.global_tts_model is None:
418
+ logger.error("[TTS] TTS model not available. Please check dependencies.")
419
+ return None
420
+
421
+ try:
422
+ logger.info("[TTS] Running TTS generation...")
423
+ wav = config.global_tts_model.tts(text)
424
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
425
+ sf.write(tmp_file.name, wav, samplerate=22050)
426
+ logger.info(f"[TTS] ✅ Speech generated successfully: {tmp_file.name}")
427
+ return tmp_file.name
428
+ except Exception as e:
429
+ logger.error(f"[TTS] TTS error (local maya1): {e}")
430
+ import traceback
431
+ logger.debug(f"[TTS] Full traceback: {traceback.format_exc()}")
432
+ return None
433
+
434
  @spaces.GPU(max_duration=120)
435
+ def _generate_speech_gpu_wrapper(text: str):
436
+ """GPU wrapper for TTS generation - only called when TTS is available."""
437
+ return _generate_speech_with_gpu(text)
438
+
439
  def generate_speech(text: str):
440
  """Generate speech from text using local maya1 TTS model (with MCP fallback).
441
 
442
  The primary path uses the local TTS model (maya-research/maya1). MCP-based
443
  TTS is only used as a last-resort fallback if the local model is unavailable
444
  or fails.
445
+
446
+ This function checks TTS availability before attempting GPU allocation.
447
  """
448
  if not text or len(text.strip()) == 0:
449
  logger.warning("[TTS] Empty text provided")
 
451
 
452
  logger.info(f"[TTS] Generating speech for text: {text[:50]}...")
453
 
454
+ # Check TTS availability first - avoid GPU allocation if not available
455
  if not TTS_AVAILABLE:
456
+ logger.warning("[TTS] TTS library not installed. Trying MCP fallback...")
457
+ # Try MCP-based TTS if available (doesn't require GPU)
458
+ audio_path = _generate_speech_via_mcp(text)
459
+ if audio_path:
460
+ logger.info(f"[TTS] Generated via MCP fallback: {audio_path}")
461
+ return audio_path
462
+ else:
463
+ logger.error("[TTS] ❌ TTS library not installed and MCP fallback failed. Please install TTS: pip install TTS --no-deps && pip install coqui-tts")
464
+ return None
 
 
465
 
466
+ # TTS is available - use GPU-decorated function
467
  try:
468
+ audio_path = _generate_speech_gpu_wrapper(text)
469
+ if audio_path:
470
+ return audio_path
471
+ else:
472
+ # GPU generation failed, try MCP fallback
473
+ logger.warning("[TTS] Local TTS generation failed, trying MCP fallback...")
474
+ return _generate_speech_via_mcp(text)
475
  except Exception as e:
476
+ logger.error(f"[TTS] GPU TTS generation error: {e}")
477
  import traceback
478
  logger.debug(f"[TTS] Full traceback: {traceback.format_exc()}")
479
+ # Try MCP fallback on error
480
+ logger.info("[TTS] Attempting MCP fallback after error...")
481
  return _generate_speech_via_mcp(text)
482