LiamKhoaLe commited on
Commit
fce8688
·
1 Parent(s): 6e8bf5a

Rm Deepseek for Gemini MCP, Remove PDF reader lib for Gemini OCR, Rm Whisper for Gemini MCP transcribe audio

Browse files
Files changed (2) hide show
  1. app.py +369 -333
  2. requirements.txt +1 -3
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import os
3
- import PyPDF2
4
  import logging
5
  import torch
6
  import threading
@@ -33,7 +33,6 @@ from llama_index.llms.huggingface import HuggingFaceLLM
33
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
34
  from tqdm import tqdm
35
  from langdetect import detect, LangDetectException
36
- import whisper
37
  # MCP imports
38
  try:
39
  from mcp import ClientSession, StdioServerParameters
@@ -67,7 +66,6 @@ logger = logging.getLogger(__name__)
67
  hf_logging.set_verbosity_error()
68
 
69
  # Model configurations
70
- TRANSLATION_MODEL = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
71
  MEDSWIN_MODELS = {
72
  "MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
73
  "MedSwin KD": "MedSwin/MedSwin-7B-KD",
@@ -75,12 +73,16 @@ MEDSWIN_MODELS = {
75
  }
76
  DEFAULT_MEDICAL_MODEL = "MedSwin TA"
77
  EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1" # Domain-tuned medical embedding model
78
- WHISPER_MODEL = "openai/whisper-large-v3-turbo"
79
  TTS_MODEL = "maya-research/maya1"
80
  HF_TOKEN = os.environ.get("HF_TOKEN")
81
  if not HF_TOKEN:
82
  raise ValueError("HF_TOKEN not found in environment variables")
83
 
 
 
 
 
 
84
  # Custom UI
85
  TITLE = "<h1><center>🩺 MedLLM Agent - Medical RAG & Web Search System</center></h1>"
86
  DESCRIPTION = """
@@ -188,12 +190,9 @@ CSS = """
188
  """
189
 
190
  # Global model storage
191
- global_translation_model = None
192
- global_translation_tokenizer = None
193
  global_medical_models = {}
194
  global_medical_tokenizers = {}
195
  global_file_info = {}
196
- global_whisper_model = None
197
  global_tts_model = None
198
 
199
  # MCP client storage
@@ -201,10 +200,10 @@ global_mcp_session = None
201
  global_mcp_stdio_ctx = None # Store stdio context to keep it alive
202
  global_mcp_lock = threading.Lock() # Lock for thread-safe session access
203
  # MCP server configuration via environment variables
204
- # Example: MCP_SERVER_COMMAND="python" MCP_SERVER_ARGS="-m duckduckgo_mcp_server"
205
- # Or: MCP_SERVER_COMMAND="npx" MCP_SERVER_ARGS="-y @modelcontextprotocol/server-duckduckgo"
206
- MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "python")
207
- MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", "-m duckduckgo_mcp_server").split() if os.environ.get("MCP_SERVER_ARGS") else ["-m", "duckduckgo_mcp_server"]
208
 
209
  async def get_mcp_session():
210
  """Get or create MCP client session with proper context management"""
@@ -265,20 +264,55 @@ async def get_mcp_session():
265
  global_mcp_stdio_ctx = None
266
  return None
267
 
268
- def initialize_translation_model():
269
- """Initialize DeepSeek-R1 model for translation purposes"""
270
- global global_translation_model, global_translation_tokenizer
271
- if global_translation_model is None or global_translation_tokenizer is None:
272
- logger.info("Initializing translation model (DeepSeek-R1-8B)...")
273
- global_translation_tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_MODEL, token=HF_TOKEN)
274
- global_translation_model = AutoModelForCausalLM.from_pretrained(
275
- TRANSLATION_MODEL,
276
- device_map="auto",
277
- trust_remote_code=True,
278
- token=HF_TOKEN,
279
- torch_dtype=torch.float16
280
- )
281
- logger.info("Translation model initialized successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
  def initialize_medical_model(model_name: str):
284
  """Initialize medical model (MedSwin) - download on demand"""
@@ -299,57 +333,6 @@ def initialize_medical_model(model_name: str):
299
  logger.info(f"Medical model {model_name} initialized successfully")
300
  return global_medical_models[model_name], global_medical_tokenizers[model_name]
301
 
302
- def initialize_whisper_model():
303
- """Initialize Whisper model for speech-to-text"""
304
- global global_whisper_model
305
- if global_whisper_model is None:
306
- logger.info("Initializing Whisper model for speech transcription...")
307
- try:
308
- # Check if we're in a spaces environment (has spaces patching)
309
- in_spaces_env = hasattr(torch, '_spaces_patched') or 'spaces' in str(type(torch.Tensor.to))
310
-
311
- # Try loading from HuggingFace with device handling for spaces compatibility
312
- try:
313
- if in_spaces_env:
314
- # In spaces environment, load on CPU and don't move to device
315
- logger.info("Detected spaces environment, loading Whisper on CPU")
316
- global_whisper_model = whisper.load_model("large-v3-turbo", device="cpu")
317
- # Don't move to GPU in spaces environment
318
- else:
319
- # Normal environment, let whisper handle device
320
- global_whisper_model = whisper.load_model("large-v3-turbo")
321
- except NotImplementedError as e:
322
- # Handle sparse tensor error from spaces library
323
- if "SparseTensorImpl" in str(e) or "storage" in str(e).lower():
324
- logger.warning(f"Spaces library compatibility issue: {e}")
325
- logger.info("Trying to load Whisper model with workaround...")
326
- try:
327
- # Try loading on CPU explicitly
328
- global_whisper_model = whisper.load_model("base", device="cpu")
329
- except Exception as e2:
330
- logger.error(f"Failed to load Whisper with workaround: {e2}")
331
- global_whisper_model = None
332
- return None
333
- else:
334
- raise
335
- except Exception as e1:
336
- logger.warning(f"Failed to load large-v3-turbo: {e1}")
337
- try:
338
- # Fallback to base model with CPU
339
- global_whisper_model = whisper.load_model("base", device="cpu")
340
- except Exception as e2:
341
- logger.error(f"Failed to load Whisper base model: {e2}")
342
- # Set to None to indicate failure - will use MCP or skip transcription
343
- global_whisper_model = None
344
- return None
345
- except Exception as e:
346
- logger.error(f"Whisper model initialization error: {e}")
347
- import traceback
348
- logger.debug(traceback.format_exc())
349
- global_whisper_model = None
350
- return None
351
- logger.info("Whisper model initialized successfully")
352
- return global_whisper_model
353
 
354
  def initialize_tts_model():
355
  """Initialize TTS model for text-to-speech"""
@@ -368,46 +351,41 @@ def initialize_tts_model():
368
  global_tts_model = None
369
  return global_tts_model
370
 
371
- async def transcribe_audio_mcp(audio_path: str) -> str:
372
- """Transcribe audio using MCP Whisper tool"""
373
  if not MCP_AVAILABLE:
374
  return ""
375
 
376
  try:
377
- # Get MCP session
378
- session = await get_mcp_session()
379
- if session is None:
380
- return ""
381
 
382
- # Find Whisper tool
383
- tools = await session.list_tools()
384
- whisper_tool = None
385
- for tool in tools.tools:
386
- if "whisper" in tool.name.lower() or "transcribe" in tool.name.lower() or "speech" in tool.name.lower():
387
- whisper_tool = tool
388
- logger.info(f"Found MCP Whisper tool: {tool.name}")
389
- break
390
 
391
- if whisper_tool:
392
- result = await session.call_tool(
393
- whisper_tool.name,
394
- arguments={"audio_path": audio_path, "language": "en"}
395
- )
396
-
397
- # Parse result
398
- if hasattr(result, 'content') and result.content:
399
- for item in result.content:
400
- if hasattr(item, 'text'):
401
- return item.text.strip()
402
- return ""
 
403
  except Exception as e:
404
- logger.debug(f"MCP transcription error: {e}")
 
 
405
  return ""
406
 
407
  def transcribe_audio(audio):
408
- """Transcribe audio to text using Whisper (with MCP fallback)"""
409
- global global_whisper_model
410
-
411
  if audio is None:
412
  return ""
413
 
@@ -425,40 +403,29 @@ def transcribe_audio(audio):
425
  else:
426
  audio_path = audio
427
 
428
- # Try MCP first if available
429
  if MCP_AVAILABLE:
430
  try:
431
  loop = asyncio.get_event_loop()
432
  if loop.is_running():
433
  try:
434
  import nest_asyncio
435
- transcribed = nest_asyncio.run(transcribe_audio_mcp(audio_path))
436
  if transcribed:
437
- logger.info(f"Transcribed via MCP: {transcribed}")
438
  return transcribed
439
- except:
440
- pass
441
  else:
442
- transcribed = loop.run_until_complete(transcribe_audio_mcp(audio_path))
443
  if transcribed:
444
- logger.info(f"Transcribed via MCP: {transcribed}")
445
  return transcribed
446
  except Exception as e:
447
- logger.debug(f"MCP transcription not available: {e}")
448
 
449
- # Fallback to local Whisper model
450
- if global_whisper_model is None:
451
- initialize_whisper_model()
452
-
453
- if global_whisper_model is None:
454
- logger.warning("Whisper model not available and MCP not working")
455
- return ""
456
-
457
- # Transcribe
458
- result = global_whisper_model.transcribe(audio_path, language="en")
459
- transcribed_text = result["text"].strip()
460
- logger.info(f"Transcribed: {transcribed_text}")
461
- return transcribed_text
462
  except Exception as e:
463
  logger.error(f"Transcription error: {e}")
464
  return ""
@@ -594,103 +561,52 @@ def detect_language(text: str) -> str:
594
  except LangDetectException:
595
  return "en" # Default to English if detection fails
596
 
597
- async def translate_text_mcp(text: str, target_lang: str = "en", source_lang: str = None) -> str:
598
- """Translate text using MCP translation tool"""
599
- if not MCP_AVAILABLE:
600
- return ""
601
-
602
- try:
603
- # Get MCP session
604
- session = await get_mcp_session()
605
- if session is None:
606
- return ""
607
-
608
- # Find translation tool
609
- tools = await session.list_tools()
610
- translate_tool = None
611
- for tool in tools.tools:
612
- if "translate" in tool.name.lower() or "translation" in tool.name.lower():
613
- translate_tool = tool
614
- logger.info(f"Found MCP translation tool: {tool.name}")
615
- break
616
-
617
- if translate_tool:
618
- args = {"text": text, "target_language": target_lang}
619
- if source_lang:
620
- args["source_language"] = source_lang
621
-
622
- result = await session.call_tool(
623
- translate_tool.name,
624
- arguments=args
625
- )
626
-
627
- # Parse result
628
- if hasattr(result, 'content') and result.content:
629
- for item in result.content:
630
- if hasattr(item, 'text'):
631
- return item.text.strip()
632
- return ""
633
- except Exception as e:
634
- logger.debug(f"MCP translation error: {e}")
635
- return ""
636
-
637
- def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
638
- """Translate text using DeepSeek-R1 model (with MCP fallback)"""
639
- # Try MCP first if available
640
- if MCP_AVAILABLE:
641
- try:
642
- loop = asyncio.get_event_loop()
643
- if loop.is_running():
644
- try:
645
- import nest_asyncio
646
- translated = nest_asyncio.run(translate_text_mcp(text, target_lang, source_lang))
647
- if translated:
648
- logger.info(f"Translated via MCP: {translated[:50]}...")
649
- return translated
650
- except:
651
- pass
652
- else:
653
- translated = loop.run_until_complete(translate_text_mcp(text, target_lang, source_lang))
654
- if translated:
655
- logger.info(f"Translated via MCP: {translated[:50]}...")
656
- return translated
657
- except Exception as e:
658
- logger.debug(f"MCP translation not available: {e}")
659
-
660
- # Fallback to local translation model
661
- global global_translation_model, global_translation_tokenizer
662
- if global_translation_model is None or global_translation_tokenizer is None:
663
- initialize_translation_model()
664
-
665
  if source_lang:
666
- prompt = f"Translate the following {source_lang} text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
667
  else:
668
- prompt = f"Translate the following text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
669
 
670
- messages = [
671
- {"role": "system", "content": "You are a professional translator. Translate accurately and concisely."},
672
- {"role": "user", "content": prompt}
673
- ]
674
 
675
- prompt_text = global_translation_tokenizer.apply_chat_template(
676
- messages,
677
- tokenize=False,
678
- add_generation_prompt=True
 
679
  )
680
 
681
- inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
 
 
 
 
 
 
682
 
683
- with torch.no_grad():
684
- outputs = global_translation_model.generate(
685
- **inputs,
686
- max_new_tokens=512,
687
- temperature=0.3,
688
- do_sample=True,
689
- pad_token_id=global_translation_tokenizer.eos_token_id
690
- )
 
 
 
 
 
 
 
 
 
 
691
 
692
- response = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
693
- return response.strip()
694
 
695
  async def search_web_mcp(query: str, max_results: int = 5) -> list:
696
  """Search web using MCP tools"""
@@ -898,15 +814,11 @@ def search_web(query: str, max_results: int = 5) -> list:
898
  else:
899
  return search_web_fallback(query, max_results)
900
 
901
- def summarize_web_content(content_list: list, query: str) -> str:
902
- """Summarize web search results using DeepSeek-R1 model"""
903
- global global_translation_model, global_translation_tokenizer
904
- if global_translation_model is None or global_translation_tokenizer is None:
905
- initialize_translation_model()
906
-
907
  combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
908
 
909
- prompt = f"""Summarize the following web search results related to the query: "{query}"
910
 
911
  Extract key medical information, facts, and insights. Be concise and focus on reliable information.
912
 
@@ -915,41 +827,59 @@ Search Results:
915
 
916
  Summary:"""
917
 
918
- messages = [
919
- {"role": "system", "content": "You are a medical information summarizer. Extract and summarize key medical facts accurately."},
920
- {"role": "user", "content": prompt}
921
- ]
922
 
923
- prompt_text = global_translation_tokenizer.apply_chat_template(
924
- messages,
925
- tokenize=False,
926
- add_generation_prompt=True
 
927
  )
928
 
929
- inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
 
 
 
 
 
 
 
 
 
930
 
931
- with torch.no_grad():
932
- outputs = global_translation_model.generate(
933
- **inputs,
934
- max_new_tokens=512,
935
- temperature=0.5,
936
- do_sample=True,
937
- pad_token_id=global_translation_tokenizer.eos_token_id
938
- )
 
 
 
 
 
 
 
 
939
 
940
- summary = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
941
- return summary.strip()
 
 
942
 
943
  def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
944
- """Get LLM for RAG indexing (uses translation model)"""
945
- if global_translation_model is None or global_translation_tokenizer is None:
946
- initialize_translation_model()
947
 
948
  return HuggingFaceLLM(
949
  context_window=4096,
950
  max_new_tokens=max_new_tokens,
951
- tokenizer=global_translation_tokenizer,
952
- model=global_translation_model,
953
  generate_kwargs={
954
  "do_sample": True,
955
  "temperature": temperature,
@@ -958,15 +888,8 @@ def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
958
  }
959
  )
960
 
961
- def autonomous_reasoning(query: str, history: list) -> dict:
962
- """
963
- Autonomous reasoning: Analyze query complexity, intent, and information needs.
964
- Returns reasoning analysis with query type, complexity, and required information sources.
965
- """
966
- global global_translation_model, global_translation_tokenizer
967
- if global_translation_model is None or global_translation_tokenizer is None:
968
- initialize_translation_model()
969
-
970
  reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
971
 
972
  Query: "{query}"
@@ -989,30 +912,16 @@ Respond in JSON format:
989
  "sub_questions": ["..."]
990
  }}"""
991
 
992
- messages = [
993
- {"role": "system", "content": "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."},
994
- {"role": "user", "content": reasoning_prompt}
995
- ]
996
 
997
- prompt_text = global_translation_tokenizer.apply_chat_template(
998
- messages,
999
- tokenize=False,
1000
- add_generation_prompt=True
 
1001
  )
1002
 
1003
- inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
1004
-
1005
- with torch.no_grad():
1006
- outputs = global_translation_model.generate(
1007
- **inputs,
1008
- max_new_tokens=512,
1009
- temperature=0.3,
1010
- do_sample=True,
1011
- pad_token_id=global_translation_tokenizer.eos_token_id
1012
- )
1013
-
1014
- response = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
1015
-
1016
  # Parse JSON response (with fallback)
1017
  try:
1018
  # Extract JSON from response
@@ -1036,6 +945,46 @@ Respond in JSON format:
1036
  logger.info(f"Reasoning analysis: {reasoning}")
1037
  return reasoning
1038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
  def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
1040
  """
1041
  Planning: Create multi-step execution plan based on reasoning analysis.
@@ -1132,15 +1081,8 @@ def autonomous_execution_strategy(reasoning: dict, plan: dict, use_rag: bool, us
1132
 
1133
  return strategy
1134
 
1135
- def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
1136
- """
1137
- Self-reflection: Evaluate answer quality and completeness.
1138
- Returns reflection with quality score and improvement suggestions.
1139
- """
1140
- global global_translation_model, global_translation_tokenizer
1141
- if global_translation_model is None or global_translation_tokenizer is None:
1142
- initialize_translation_model()
1143
-
1144
  reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
1145
 
1146
  Query: "{query}"
@@ -1163,31 +1105,16 @@ Respond in JSON:
1163
  "improvement_suggestions": ["..."]
1164
  }}"""
1165
 
1166
- messages = [
1167
- {"role": "system", "content": "You are a medical answer quality evaluator. Provide honest, constructive feedback."},
1168
- {"role": "user", "content": reflection_prompt}
1169
- ]
1170
 
1171
- prompt_text = global_translation_tokenizer.apply_chat_template(
1172
- messages,
1173
- tokenize=False,
1174
- add_generation_prompt=True
 
1175
  )
1176
 
1177
- inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
1178
-
1179
- with torch.no_grad():
1180
- outputs = global_translation_model.generate(
1181
- **inputs,
1182
- max_new_tokens=256,
1183
- temperature=0.3,
1184
- do_sample=True,
1185
- pad_token_id=global_translation_tokenizer.eos_token_id
1186
- )
1187
-
1188
- response = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
1189
-
1190
- import json
1191
  try:
1192
  json_start = response.find('{')
1193
  json_end = response.rfind('}') + 1
@@ -1201,19 +1128,139 @@ Respond in JSON:
1201
  logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
1202
  return reflection
1203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1204
  def extract_text_from_document(file):
 
1205
  file_name = file.name
1206
  file_extension = os.path.splitext(file_name)[1].lower()
1207
 
 
1208
  if file_extension == '.txt':
1209
  text = file.read().decode('utf-8')
1210
  return text, len(text.split()), None
1211
- elif file_extension == '.pdf':
1212
- pdf_reader = PyPDF2.PdfReader(file)
1213
- text = "\n\n".join(page.extract_text() for page in pdf_reader.pages)
1214
- return text, len(text.split()), None
1215
- else:
1216
- return None, 0, ValueError(f"Unsupported file format: {file_extension}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1217
 
1218
  @spaces.GPU(max_duration=120)
1219
  def create_or_update_index(files, request: gr.Request):
@@ -1611,7 +1658,7 @@ def create_demo():
1611
  file_upload = gr.File(
1612
  file_count="multiple",
1613
  label="Drag and Drop Files Here",
1614
- file_types=[".pdf", ".txt"],
1615
  elem_id="file-upload"
1616
  )
1617
  upload_button = gr.Button("Upload & Index", elem_classes="upload-button")
@@ -1859,26 +1906,8 @@ def create_demo():
1859
  if __name__ == "__main__":
1860
  # Preload models on startup
1861
  logger.info("Preloading models on startup...")
1862
- logger.info("Initializing translation model (DeepSeek-R1)...")
1863
- try:
1864
- initialize_translation_model()
1865
- logger.info("Translation model (DeepSeek-R1) preloaded successfully!")
1866
- except Exception as e:
1867
- logger.error(f"Translation model preloading failed: {e}")
1868
- logger.warning("Translation features may be limited")
1869
  logger.info("Initializing default medical model (MedSwin TA)...")
1870
  initialize_medical_model(DEFAULT_MEDICAL_MODEL)
1871
- logger.info("Preloading Whisper model...")
1872
- try:
1873
- initialize_whisper_model()
1874
- if global_whisper_model is not None:
1875
- logger.info("Whisper model preloaded successfully!")
1876
- else:
1877
- logger.warning("Whisper model not available - will use MCP or disable transcription")
1878
- except Exception as e:
1879
- logger.warning(f"Whisper model preloading failed: {e}")
1880
- logger.warning("Speech-to-text will use MCP or be disabled")
1881
- global_whisper_model = None
1882
  logger.info("Preloading TTS model...")
1883
  try:
1884
  initialize_tts_model()
@@ -1889,6 +1918,13 @@ if __name__ == "__main__":
1889
  except Exception as e:
1890
  logger.warning(f"TTS model preloading failed: {e}")
1891
  logger.warning("Text-to-speech will use MCP or be disabled")
 
 
 
 
 
 
 
1892
  logger.info("Model preloading complete!")
1893
  demo = create_demo()
1894
  demo.launch()
 
1
  import gradio as gr
2
  import os
3
+ import base64
4
  import logging
5
  import torch
6
  import threading
 
33
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
34
  from tqdm import tqdm
35
  from langdetect import detect, LangDetectException
 
36
  # MCP imports
37
  try:
38
  from mcp import ClientSession, StdioServerParameters
 
66
  hf_logging.set_verbosity_error()
67
 
68
  # Model configurations
 
69
  MEDSWIN_MODELS = {
70
  "MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
71
  "MedSwin KD": "MedSwin/MedSwin-7B-KD",
 
73
  }
74
  DEFAULT_MEDICAL_MODEL = "MedSwin TA"
75
  EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1" # Domain-tuned medical embedding model
 
76
  TTS_MODEL = "maya-research/maya1"
77
  HF_TOKEN = os.environ.get("HF_TOKEN")
78
  if not HF_TOKEN:
79
  raise ValueError("HF_TOKEN not found in environment variables")
80
 
81
+ # Gemini MCP configuration
82
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
83
+ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") # Default for harder tasks
84
+ GEMINI_MODEL_LITE = os.environ.get("GEMINI_MODEL_LITE", "gemini-2.5-flash-lite") # For parsing and simple tasks
85
+
86
  # Custom UI
87
  TITLE = "<h1><center>🩺 MedLLM Agent - Medical RAG & Web Search System</center></h1>"
88
  DESCRIPTION = """
 
190
  """
191
 
192
  # Global model storage
 
 
193
  global_medical_models = {}
194
  global_medical_tokenizers = {}
195
  global_file_info = {}
 
196
  global_tts_model = None
197
 
198
  # MCP client storage
 
200
  global_mcp_stdio_ctx = None # Store stdio context to keep it alive
201
  global_mcp_lock = threading.Lock() # Lock for thread-safe session access
202
  # MCP server configuration via environment variables
203
+ # Gemini MCP server: aistudio-mcp-server
204
+ # Example: MCP_SERVER_COMMAND="npx" MCP_SERVER_ARGS="-y @aistudio-mcp/server"
205
+ MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "npx")
206
+ MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", "-y @aistudio-mcp/server").split() if os.environ.get("MCP_SERVER_ARGS") else ["-y", "@aistudio-mcp/server"]
207
 
208
  async def get_mcp_session():
209
  """Get or create MCP client session with proper context management"""
 
264
  global_mcp_stdio_ctx = None
265
  return None
266
 
267
+ async def call_gemini_mcp(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
268
+ """Call Gemini MCP generate_content tool"""
269
+ if not MCP_AVAILABLE:
270
+ return ""
271
+
272
+ try:
273
+ session = await get_mcp_session()
274
+ if session is None:
275
+ return ""
276
+
277
+ # Find generate_content tool
278
+ tools = await session.list_tools()
279
+ generate_tool = None
280
+ for tool in tools.tools:
281
+ if "generate_content" in tool.name.lower() or "generate" in tool.name.lower():
282
+ generate_tool = tool
283
+ logger.info(f"Found Gemini MCP tool: {tool.name}")
284
+ break
285
+
286
+ if not generate_tool:
287
+ logger.warning("Gemini MCP generate_content tool not found")
288
+ return ""
289
+
290
+ # Prepare arguments
291
+ arguments = {
292
+ "user_prompt": user_prompt
293
+ }
294
+ if system_prompt:
295
+ arguments["system_prompt"] = system_prompt
296
+ if files:
297
+ arguments["files"] = files
298
+ if model:
299
+ arguments["model"] = model
300
+ if temperature is not None:
301
+ arguments["temperature"] = temperature
302
+
303
+ result = await session.call_tool(generate_tool.name, arguments=arguments)
304
+
305
+ # Parse result
306
+ if hasattr(result, 'content') and result.content:
307
+ for item in result.content:
308
+ if hasattr(item, 'text'):
309
+ return item.text.strip()
310
+ return ""
311
+ except Exception as e:
312
+ logger.error(f"Gemini MCP call error: {e}")
313
+ import traceback
314
+ logger.debug(traceback.format_exc())
315
+ return ""
316
 
317
  def initialize_medical_model(model_name: str):
318
  """Initialize medical model (MedSwin) - download on demand"""
 
333
  logger.info(f"Medical model {model_name} initialized successfully")
334
  return global_medical_models[model_name], global_medical_tokenizers[model_name]
335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  def initialize_tts_model():
338
  """Initialize TTS model for text-to-speech"""
 
351
  global_tts_model = None
352
  return global_tts_model
353
 
354
+ async def transcribe_audio_gemini(audio_path: str) -> str:
355
+ """Transcribe audio using Gemini MCP"""
356
  if not MCP_AVAILABLE:
357
  return ""
358
 
359
  try:
360
+ # Ensure we have an absolute path
361
+ audio_path_abs = os.path.abspath(audio_path)
 
 
362
 
363
+ # Prepare file object for Gemini MCP using path (as per Gemini MCP documentation)
364
+ files = [{
365
+ "path": audio_path_abs
366
+ }]
 
 
 
 
367
 
368
+ # Use exact prompts from Gemini MCP documentation
369
+ system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
370
+ user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
371
+
372
+ result = await call_gemini_mcp(
373
+ user_prompt=user_prompt,
374
+ system_prompt=system_prompt,
375
+ files=files,
376
+ model=GEMINI_MODEL_LITE, # Use lite model for transcription
377
+ temperature=0.2
378
+ )
379
+
380
+ return result.strip()
381
  except Exception as e:
382
+ logger.error(f"Gemini transcription error: {e}")
383
+ import traceback
384
+ logger.debug(traceback.format_exc())
385
  return ""
386
 
387
  def transcribe_audio(audio):
388
+ """Transcribe audio to text using Gemini MCP"""
 
 
389
  if audio is None:
390
  return ""
391
 
 
403
  else:
404
  audio_path = audio
405
 
406
+ # Use Gemini MCP for transcription
407
  if MCP_AVAILABLE:
408
  try:
409
  loop = asyncio.get_event_loop()
410
  if loop.is_running():
411
  try:
412
  import nest_asyncio
413
+ transcribed = nest_asyncio.run(transcribe_audio_gemini(audio_path))
414
  if transcribed:
415
+ logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
416
  return transcribed
417
+ except Exception as e:
418
+ logger.error(f"Error in nested async transcription: {e}")
419
  else:
420
+ transcribed = loop.run_until_complete(transcribe_audio_gemini(audio_path))
421
  if transcribed:
422
+ logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
423
  return transcribed
424
  except Exception as e:
425
+ logger.error(f"Gemini MCP transcription error: {e}")
426
 
427
+ logger.warning("Gemini MCP transcription not available")
428
+ return ""
 
 
 
 
 
 
 
 
 
 
 
429
  except Exception as e:
430
  logger.error(f"Transcription error: {e}")
431
  return ""
 
561
  except LangDetectException:
562
  return "en" # Default to English if detection fails
563
 
564
+ async def translate_text_gemini(text: str, target_lang: str = "en", source_lang: str = None) -> str:
565
+ """Translate text using Gemini MCP"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  if source_lang:
567
+ user_prompt = f"Translate the following {source_lang} text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
568
  else:
569
+ user_prompt = f"Translate the following text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
570
 
571
+ # Use concise system prompt
572
+ system_prompt = "You are a professional translator. Translate accurately and concisely."
 
 
573
 
574
+ result = await call_gemini_mcp(
575
+ user_prompt=user_prompt,
576
+ system_prompt=system_prompt,
577
+ model=GEMINI_MODEL_LITE, # Use lite model for translation
578
+ temperature=0.2
579
  )
580
 
581
+ return result.strip()
582
+
583
+ def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
584
+ """Translate text using Gemini MCP"""
585
+ if not MCP_AVAILABLE:
586
+ logger.warning("Gemini MCP not available for translation")
587
+ return text # Return original text if translation fails
588
 
589
+ try:
590
+ loop = asyncio.get_event_loop()
591
+ if loop.is_running():
592
+ try:
593
+ import nest_asyncio
594
+ translated = nest_asyncio.run(translate_text_gemini(text, target_lang, source_lang))
595
+ if translated:
596
+ logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
597
+ return translated
598
+ except Exception as e:
599
+ logger.error(f"Error in nested async translation: {e}")
600
+ else:
601
+ translated = loop.run_until_complete(translate_text_gemini(text, target_lang, source_lang))
602
+ if translated:
603
+ logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
604
+ return translated
605
+ except Exception as e:
606
+ logger.error(f"Gemini MCP translation error: {e}")
607
 
608
+ # Return original text if translation fails
609
+ return text
610
 
611
  async def search_web_mcp(query: str, max_results: int = 5) -> list:
612
  """Search web using MCP tools"""
 
814
  else:
815
  return search_web_fallback(query, max_results)
816
 
817
+ async def summarize_web_content_gemini(content_list: list, query: str) -> str:
818
+ """Summarize web search results using Gemini MCP"""
 
 
 
 
819
  combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
820
 
821
+ user_prompt = f"""Summarize the following web search results related to the query: "{query}"
822
 
823
  Extract key medical information, facts, and insights. Be concise and focus on reliable information.
824
 
 
827
 
828
  Summary:"""
829
 
830
+ # Use concise system prompt
831
+ system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
 
 
832
 
833
+ result = await call_gemini_mcp(
834
+ user_prompt=user_prompt,
835
+ system_prompt=system_prompt,
836
+ model=GEMINI_MODEL, # Use full model for summarization
837
+ temperature=0.5
838
  )
839
 
840
+ return result.strip()
841
+
842
+ def summarize_web_content(content_list: list, query: str) -> str:
843
+ """Summarize web search results using Gemini MCP"""
844
+ if not MCP_AVAILABLE:
845
+ logger.warning("Gemini MCP not available for summarization")
846
+ # Fallback: return first result's content
847
+ if content_list:
848
+ return content_list[0].get('content', '')[:500]
849
+ return ""
850
 
851
+ try:
852
+ loop = asyncio.get_event_loop()
853
+ if loop.is_running():
854
+ try:
855
+ import nest_asyncio
856
+ summary = nest_asyncio.run(summarize_web_content_gemini(content_list, query))
857
+ if summary:
858
+ return summary
859
+ except Exception as e:
860
+ logger.error(f"Error in nested async summarization: {e}")
861
+ else:
862
+ summary = loop.run_until_complete(summarize_web_content_gemini(content_list, query))
863
+ if summary:
864
+ return summary
865
+ except Exception as e:
866
+ logger.error(f"Gemini MCP summarization error: {e}")
867
 
868
+ # Fallback: return first result's content
869
+ if content_list:
870
+ return content_list[0].get('content', '')[:500]
871
+ return ""
872
 
873
  def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
874
+ """Get LLM for RAG indexing (uses medical model)"""
875
+ # Use medical model for RAG indexing instead of translation model
876
+ medical_model_obj, medical_tokenizer = initialize_medical_model(DEFAULT_MEDICAL_MODEL)
877
 
878
  return HuggingFaceLLM(
879
  context_window=4096,
880
  max_new_tokens=max_new_tokens,
881
+ tokenizer=medical_tokenizer,
882
+ model=medical_model_obj,
883
  generate_kwargs={
884
  "do_sample": True,
885
  "temperature": temperature,
 
888
  }
889
  )
890
 
891
+ async def autonomous_reasoning_gemini(query: str) -> dict:
892
+ """Autonomous reasoning using Gemini MCP"""
 
 
 
 
 
 
 
893
  reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
894
 
895
  Query: "{query}"
 
912
  "sub_questions": ["..."]
913
  }}"""
914
 
915
+ # Use concise system prompt
916
+ system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
 
 
917
 
918
+ response = await call_gemini_mcp(
919
+ user_prompt=reasoning_prompt,
920
+ system_prompt=system_prompt,
921
+ model=GEMINI_MODEL, # Use full model for reasoning
922
+ temperature=0.3
923
  )
924
 
 
 
 
 
 
 
 
 
 
 
 
 
 
925
  # Parse JSON response (with fallback)
926
  try:
927
  # Extract JSON from response
 
945
  logger.info(f"Reasoning analysis: {reasoning}")
946
  return reasoning
947
 
948
+ def autonomous_reasoning(query: str, history: list) -> dict:
949
+ """
950
+ Autonomous reasoning: Analyze query complexity, intent, and information needs.
951
+ Returns reasoning analysis with query type, complexity, and required information sources.
952
+ """
953
+ if not MCP_AVAILABLE:
954
+ logger.warning("Gemini MCP not available for reasoning, using fallback")
955
+ # Fallback reasoning
956
+ return {
957
+ "query_type": "general_info",
958
+ "complexity": "moderate",
959
+ "information_needs": ["medical information"],
960
+ "requires_rag": True,
961
+ "requires_web_search": False,
962
+ "sub_questions": [query]
963
+ }
964
+
965
+ try:
966
+ loop = asyncio.get_event_loop()
967
+ if loop.is_running():
968
+ try:
969
+ import nest_asyncio
970
+ return nest_asyncio.run(autonomous_reasoning_gemini(query))
971
+ except Exception as e:
972
+ logger.error(f"Error in nested async reasoning: {e}")
973
+ else:
974
+ return loop.run_until_complete(autonomous_reasoning_gemini(query))
975
+ except Exception as e:
976
+ logger.error(f"Gemini MCP reasoning error: {e}")
977
+
978
+ # Fallback reasoning
979
+ return {
980
+ "query_type": "general_info",
981
+ "complexity": "moderate",
982
+ "information_needs": ["medical information"],
983
+ "requires_rag": True,
984
+ "requires_web_search": False,
985
+ "sub_questions": [query]
986
+ }
987
+
988
  def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
989
  """
990
  Planning: Create multi-step execution plan based on reasoning analysis.
 
1081
 
1082
  return strategy
1083
 
1084
+ async def self_reflection_gemini(answer: str, query: str) -> dict:
1085
+ """Self-reflection using Gemini MCP"""
 
 
 
 
 
 
 
1086
  reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
1087
 
1088
  Query: "{query}"
 
1105
  "improvement_suggestions": ["..."]
1106
  }}"""
1107
 
1108
+ # Use concise system prompt
1109
+ system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
 
 
1110
 
1111
+ response = await call_gemini_mcp(
1112
+ user_prompt=reflection_prompt,
1113
+ system_prompt=system_prompt,
1114
+ model=GEMINI_MODEL, # Use full model for reflection
1115
+ temperature=0.3
1116
  )
1117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1118
  try:
1119
  json_start = response.find('{')
1120
  json_end = response.rfind('}') + 1
 
1128
  logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
1129
  return reflection
1130
 
1131
+ def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
1132
+ """
1133
+ Self-reflection: Evaluate answer quality and completeness.
1134
+ Returns reflection with quality score and improvement suggestions.
1135
+ """
1136
+ if not MCP_AVAILABLE:
1137
+ logger.warning("Gemini MCP not available for reflection, using fallback")
1138
+ return {"overall_score": 7, "improvement_suggestions": []}
1139
+
1140
+ try:
1141
+ loop = asyncio.get_event_loop()
1142
+ if loop.is_running():
1143
+ try:
1144
+ import nest_asyncio
1145
+ return nest_asyncio.run(self_reflection_gemini(answer, query))
1146
+ except Exception as e:
1147
+ logger.error(f"Error in nested async reflection: {e}")
1148
+ else:
1149
+ return loop.run_until_complete(self_reflection_gemini(answer, query))
1150
+ except Exception as e:
1151
+ logger.error(f"Gemini MCP reflection error: {e}")
1152
+
1153
+ return {"overall_score": 7, "improvement_suggestions": []}
1154
+
1155
+ async def parse_document_gemini(file_path: str, file_extension: str) -> str:
1156
+ """Parse document using Gemini MCP"""
1157
+ if not MCP_AVAILABLE:
1158
+ return ""
1159
+
1160
+ try:
1161
+ # Read file and encode to base64
1162
+ with open(file_path, 'rb') as f:
1163
+ file_content = base64.b64encode(f.read()).decode('utf-8')
1164
+
1165
+ # Determine MIME type from file extension
1166
+ mime_type_map = {
1167
+ '.pdf': 'application/pdf',
1168
+ '.doc': 'application/msword',
1169
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
1170
+ '.txt': 'text/plain',
1171
+ '.md': 'text/markdown',
1172
+ '.json': 'application/json',
1173
+ '.xml': 'application/xml',
1174
+ '.csv': 'text/csv'
1175
+ }
1176
+ mime_type = mime_type_map.get(file_extension, 'application/octet-stream')
1177
+
1178
+ # Prepare file object for Gemini MCP (use content for base64)
1179
+ files = [{
1180
+ "content": file_content,
1181
+ "type": mime_type
1182
+ }]
1183
+
1184
+ # Use concise system prompt
1185
+ system_prompt = "Extract all text content from the document accurately."
1186
+ user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
1187
+
1188
+ result = await call_gemini_mcp(
1189
+ user_prompt=user_prompt,
1190
+ system_prompt=system_prompt,
1191
+ files=files,
1192
+ model=GEMINI_MODEL_LITE, # Use lite model for parsing
1193
+ temperature=0.2
1194
+ )
1195
+
1196
+ return result.strip()
1197
+ except Exception as e:
1198
+ logger.error(f"Gemini document parsing error: {e}")
1199
+ import traceback
1200
+ logger.debug(traceback.format_exc())
1201
+ return ""
1202
+
1203
  def extract_text_from_document(file):
1204
+ """Extract text from document using Gemini MCP"""
1205
  file_name = file.name
1206
  file_extension = os.path.splitext(file_name)[1].lower()
1207
 
1208
+ # Handle text files directly
1209
  if file_extension == '.txt':
1210
  text = file.read().decode('utf-8')
1211
  return text, len(text.split()), None
1212
+
1213
+ # For PDF, Word, and other documents, use Gemini MCP
1214
+ # Save file to temporary location for processing
1215
+ try:
1216
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
1217
+ # Write file content to temp file
1218
+ file.seek(0) # Reset file pointer
1219
+ tmp_file.write(file.read())
1220
+ tmp_file_path = tmp_file.name
1221
+
1222
+ # Use Gemini MCP to parse document
1223
+ if MCP_AVAILABLE:
1224
+ try:
1225
+ loop = asyncio.get_event_loop()
1226
+ if loop.is_running():
1227
+ try:
1228
+ import nest_asyncio
1229
+ text = nest_asyncio.run(parse_document_gemini(tmp_file_path, file_extension))
1230
+ except Exception as e:
1231
+ logger.error(f"Error in nested async document parsing: {e}")
1232
+ text = ""
1233
+ else:
1234
+ text = loop.run_until_complete(parse_document_gemini(tmp_file_path, file_extension))
1235
+
1236
+ # Clean up temp file
1237
+ try:
1238
+ os.unlink(tmp_file_path)
1239
+ except:
1240
+ pass
1241
+
1242
+ if text:
1243
+ return text, len(text.split()), None
1244
+ else:
1245
+ return None, 0, ValueError(f"Failed to extract text from {file_extension} file using Gemini MCP")
1246
+ except Exception as e:
1247
+ logger.error(f"Gemini MCP document parsing error: {e}")
1248
+ # Clean up temp file
1249
+ try:
1250
+ os.unlink(tmp_file_path)
1251
+ except:
1252
+ pass
1253
+ return None, 0, ValueError(f"Error parsing {file_extension} file: {str(e)}")
1254
+ else:
1255
+ # Clean up temp file
1256
+ try:
1257
+ os.unlink(tmp_file_path)
1258
+ except:
1259
+ pass
1260
+ return None, 0, ValueError(f"Gemini MCP not available. Cannot parse {file_extension} files.")
1261
+ except Exception as e:
1262
+ logger.error(f"Error processing document: {e}")
1263
+ return None, 0, ValueError(f"Error processing {file_extension} file: {str(e)}")
1264
 
1265
  @spaces.GPU(max_duration=120)
1266
  def create_or_update_index(files, request: gr.Request):
 
1658
  file_upload = gr.File(
1659
  file_count="multiple",
1660
  label="Drag and Drop Files Here",
1661
+ file_types=[".pdf", ".txt", ".doc", ".docx", ".md", ".json", ".xml", ".csv"],
1662
  elem_id="file-upload"
1663
  )
1664
  upload_button = gr.Button("Upload & Index", elem_classes="upload-button")
 
1906
  if __name__ == "__main__":
1907
  # Preload models on startup
1908
  logger.info("Preloading models on startup...")
 
 
 
 
 
 
 
1909
  logger.info("Initializing default medical model (MedSwin TA)...")
1910
  initialize_medical_model(DEFAULT_MEDICAL_MODEL)
 
 
 
 
 
 
 
 
 
 
 
1911
  logger.info("Preloading TTS model...")
1912
  try:
1913
  initialize_tts_model()
 
1918
  except Exception as e:
1919
  logger.warning(f"TTS model preloading failed: {e}")
1920
  logger.warning("Text-to-speech will use MCP or be disabled")
1921
+
1922
+ # Check Gemini MCP availability
1923
+ if MCP_AVAILABLE:
1924
+ logger.info("Gemini MCP is available for translation, summarization, document parsing, and transcription")
1925
+ else:
1926
+ logger.warning("Gemini MCP not available - translation, summarization, document parsing, and transcription features will be limited")
1927
+
1928
  logger.info("Model preloading complete!")
1929
  demo = create_demo()
1930
  demo.launch()
requirements.txt CHANGED
@@ -3,7 +3,6 @@ llama-index
3
  transformers
4
  torch
5
  sentence-transformers
6
- PyPDF2
7
  python-docx
8
  llama_index.llms.huggingface
9
  llama_index.embeddings.huggingface
@@ -13,7 +12,7 @@ sentencepiece>=0.1.99
13
  google-genai
14
  langdetect
15
  gradio
16
- # MCP dependencies (preferred)
17
  mcp
18
  nest-asyncio
19
  # Fallback dependencies (used if MCP is not available)
@@ -21,7 +20,6 @@ requests
21
  beautifulsoup4
22
  ddgs
23
  spaces
24
- openai-whisper
25
  soundfile
26
  numpy<2.0.0
27
  setuptools>=65.0.0
 
3
  transformers
4
  torch
5
  sentence-transformers
 
6
  python-docx
7
  llama_index.llms.huggingface
8
  llama_index.embeddings.huggingface
 
12
  google-genai
13
  langdetect
14
  gradio
15
+ # MCP dependencies (required for Gemini MCP)
16
  mcp
17
  nest-asyncio
18
  # Fallback dependencies (used if MCP is not available)
 
20
  beautifulsoup4
21
  ddgs
22
  spaces
 
23
  soundfile
24
  numpy<2.0.0
25
  setuptools>=65.0.0