Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
fce8688
1
Parent(s):
6e8bf5a
Rm Deepseek for Gemini MCP, Remove PDF reader lib for Gemini OCR, Rm Whisper for Gemini MCP transcribe audio
Browse files- app.py +369 -333
- requirements.txt +1 -3
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
-
import
|
| 4 |
import logging
|
| 5 |
import torch
|
| 6 |
import threading
|
|
@@ -33,7 +33,6 @@ from llama_index.llms.huggingface import HuggingFaceLLM
|
|
| 33 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 34 |
from tqdm import tqdm
|
| 35 |
from langdetect import detect, LangDetectException
|
| 36 |
-
import whisper
|
| 37 |
# MCP imports
|
| 38 |
try:
|
| 39 |
from mcp import ClientSession, StdioServerParameters
|
|
@@ -67,7 +66,6 @@ logger = logging.getLogger(__name__)
|
|
| 67 |
hf_logging.set_verbosity_error()
|
| 68 |
|
| 69 |
# Model configurations
|
| 70 |
-
TRANSLATION_MODEL = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
|
| 71 |
MEDSWIN_MODELS = {
|
| 72 |
"MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
|
| 73 |
"MedSwin KD": "MedSwin/MedSwin-7B-KD",
|
|
@@ -75,12 +73,16 @@ MEDSWIN_MODELS = {
|
|
| 75 |
}
|
| 76 |
DEFAULT_MEDICAL_MODEL = "MedSwin TA"
|
| 77 |
EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1" # Domain-tuned medical embedding model
|
| 78 |
-
WHISPER_MODEL = "openai/whisper-large-v3-turbo"
|
| 79 |
TTS_MODEL = "maya-research/maya1"
|
| 80 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 81 |
if not HF_TOKEN:
|
| 82 |
raise ValueError("HF_TOKEN not found in environment variables")
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
# Custom UI
|
| 85 |
TITLE = "<h1><center>🩺 MedLLM Agent - Medical RAG & Web Search System</center></h1>"
|
| 86 |
DESCRIPTION = """
|
|
@@ -188,12 +190,9 @@ CSS = """
|
|
| 188 |
"""
|
| 189 |
|
| 190 |
# Global model storage
|
| 191 |
-
global_translation_model = None
|
| 192 |
-
global_translation_tokenizer = None
|
| 193 |
global_medical_models = {}
|
| 194 |
global_medical_tokenizers = {}
|
| 195 |
global_file_info = {}
|
| 196 |
-
global_whisper_model = None
|
| 197 |
global_tts_model = None
|
| 198 |
|
| 199 |
# MCP client storage
|
|
@@ -201,10 +200,10 @@ global_mcp_session = None
|
|
| 201 |
global_mcp_stdio_ctx = None # Store stdio context to keep it alive
|
| 202 |
global_mcp_lock = threading.Lock() # Lock for thread-safe session access
|
| 203 |
# MCP server configuration via environment variables
|
| 204 |
-
#
|
| 205 |
-
#
|
| 206 |
-
MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "
|
| 207 |
-
MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", "-
|
| 208 |
|
| 209 |
async def get_mcp_session():
|
| 210 |
"""Get or create MCP client session with proper context management"""
|
|
@@ -265,20 +264,55 @@ async def get_mcp_session():
|
|
| 265 |
global_mcp_stdio_ctx = None
|
| 266 |
return None
|
| 267 |
|
| 268 |
-
def
|
| 269 |
-
"""
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
def initialize_medical_model(model_name: str):
|
| 284 |
"""Initialize medical model (MedSwin) - download on demand"""
|
|
@@ -299,57 +333,6 @@ def initialize_medical_model(model_name: str):
|
|
| 299 |
logger.info(f"Medical model {model_name} initialized successfully")
|
| 300 |
return global_medical_models[model_name], global_medical_tokenizers[model_name]
|
| 301 |
|
| 302 |
-
def initialize_whisper_model():
|
| 303 |
-
"""Initialize Whisper model for speech-to-text"""
|
| 304 |
-
global global_whisper_model
|
| 305 |
-
if global_whisper_model is None:
|
| 306 |
-
logger.info("Initializing Whisper model for speech transcription...")
|
| 307 |
-
try:
|
| 308 |
-
# Check if we're in a spaces environment (has spaces patching)
|
| 309 |
-
in_spaces_env = hasattr(torch, '_spaces_patched') or 'spaces' in str(type(torch.Tensor.to))
|
| 310 |
-
|
| 311 |
-
# Try loading from HuggingFace with device handling for spaces compatibility
|
| 312 |
-
try:
|
| 313 |
-
if in_spaces_env:
|
| 314 |
-
# In spaces environment, load on CPU and don't move to device
|
| 315 |
-
logger.info("Detected spaces environment, loading Whisper on CPU")
|
| 316 |
-
global_whisper_model = whisper.load_model("large-v3-turbo", device="cpu")
|
| 317 |
-
# Don't move to GPU in spaces environment
|
| 318 |
-
else:
|
| 319 |
-
# Normal environment, let whisper handle device
|
| 320 |
-
global_whisper_model = whisper.load_model("large-v3-turbo")
|
| 321 |
-
except NotImplementedError as e:
|
| 322 |
-
# Handle sparse tensor error from spaces library
|
| 323 |
-
if "SparseTensorImpl" in str(e) or "storage" in str(e).lower():
|
| 324 |
-
logger.warning(f"Spaces library compatibility issue: {e}")
|
| 325 |
-
logger.info("Trying to load Whisper model with workaround...")
|
| 326 |
-
try:
|
| 327 |
-
# Try loading on CPU explicitly
|
| 328 |
-
global_whisper_model = whisper.load_model("base", device="cpu")
|
| 329 |
-
except Exception as e2:
|
| 330 |
-
logger.error(f"Failed to load Whisper with workaround: {e2}")
|
| 331 |
-
global_whisper_model = None
|
| 332 |
-
return None
|
| 333 |
-
else:
|
| 334 |
-
raise
|
| 335 |
-
except Exception as e1:
|
| 336 |
-
logger.warning(f"Failed to load large-v3-turbo: {e1}")
|
| 337 |
-
try:
|
| 338 |
-
# Fallback to base model with CPU
|
| 339 |
-
global_whisper_model = whisper.load_model("base", device="cpu")
|
| 340 |
-
except Exception as e2:
|
| 341 |
-
logger.error(f"Failed to load Whisper base model: {e2}")
|
| 342 |
-
# Set to None to indicate failure - will use MCP or skip transcription
|
| 343 |
-
global_whisper_model = None
|
| 344 |
-
return None
|
| 345 |
-
except Exception as e:
|
| 346 |
-
logger.error(f"Whisper model initialization error: {e}")
|
| 347 |
-
import traceback
|
| 348 |
-
logger.debug(traceback.format_exc())
|
| 349 |
-
global_whisper_model = None
|
| 350 |
-
return None
|
| 351 |
-
logger.info("Whisper model initialized successfully")
|
| 352 |
-
return global_whisper_model
|
| 353 |
|
| 354 |
def initialize_tts_model():
|
| 355 |
"""Initialize TTS model for text-to-speech"""
|
|
@@ -368,46 +351,41 @@ def initialize_tts_model():
|
|
| 368 |
global_tts_model = None
|
| 369 |
return global_tts_model
|
| 370 |
|
| 371 |
-
async def
|
| 372 |
-
"""Transcribe audio using MCP
|
| 373 |
if not MCP_AVAILABLE:
|
| 374 |
return ""
|
| 375 |
|
| 376 |
try:
|
| 377 |
-
#
|
| 378 |
-
|
| 379 |
-
if session is None:
|
| 380 |
-
return ""
|
| 381 |
|
| 382 |
-
#
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
if "whisper" in tool.name.lower() or "transcribe" in tool.name.lower() or "speech" in tool.name.lower():
|
| 387 |
-
whisper_tool = tool
|
| 388 |
-
logger.info(f"Found MCP Whisper tool: {tool.name}")
|
| 389 |
-
break
|
| 390 |
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
|
|
|
| 403 |
except Exception as e:
|
| 404 |
-
logger.
|
|
|
|
|
|
|
| 405 |
return ""
|
| 406 |
|
| 407 |
def transcribe_audio(audio):
|
| 408 |
-
"""Transcribe audio to text using
|
| 409 |
-
global global_whisper_model
|
| 410 |
-
|
| 411 |
if audio is None:
|
| 412 |
return ""
|
| 413 |
|
|
@@ -425,40 +403,29 @@ def transcribe_audio(audio):
|
|
| 425 |
else:
|
| 426 |
audio_path = audio
|
| 427 |
|
| 428 |
-
#
|
| 429 |
if MCP_AVAILABLE:
|
| 430 |
try:
|
| 431 |
loop = asyncio.get_event_loop()
|
| 432 |
if loop.is_running():
|
| 433 |
try:
|
| 434 |
import nest_asyncio
|
| 435 |
-
transcribed = nest_asyncio.run(
|
| 436 |
if transcribed:
|
| 437 |
-
logger.info(f"Transcribed via MCP: {transcribed}")
|
| 438 |
return transcribed
|
| 439 |
-
except:
|
| 440 |
-
|
| 441 |
else:
|
| 442 |
-
transcribed = loop.run_until_complete(
|
| 443 |
if transcribed:
|
| 444 |
-
logger.info(f"Transcribed via MCP: {transcribed}")
|
| 445 |
return transcribed
|
| 446 |
except Exception as e:
|
| 447 |
-
logger.
|
| 448 |
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
initialize_whisper_model()
|
| 452 |
-
|
| 453 |
-
if global_whisper_model is None:
|
| 454 |
-
logger.warning("Whisper model not available and MCP not working")
|
| 455 |
-
return ""
|
| 456 |
-
|
| 457 |
-
# Transcribe
|
| 458 |
-
result = global_whisper_model.transcribe(audio_path, language="en")
|
| 459 |
-
transcribed_text = result["text"].strip()
|
| 460 |
-
logger.info(f"Transcribed: {transcribed_text}")
|
| 461 |
-
return transcribed_text
|
| 462 |
except Exception as e:
|
| 463 |
logger.error(f"Transcription error: {e}")
|
| 464 |
return ""
|
|
@@ -594,103 +561,52 @@ def detect_language(text: str) -> str:
|
|
| 594 |
except LangDetectException:
|
| 595 |
return "en" # Default to English if detection fails
|
| 596 |
|
| 597 |
-
async def
|
| 598 |
-
"""Translate text using MCP
|
| 599 |
-
if not MCP_AVAILABLE:
|
| 600 |
-
return ""
|
| 601 |
-
|
| 602 |
-
try:
|
| 603 |
-
# Get MCP session
|
| 604 |
-
session = await get_mcp_session()
|
| 605 |
-
if session is None:
|
| 606 |
-
return ""
|
| 607 |
-
|
| 608 |
-
# Find translation tool
|
| 609 |
-
tools = await session.list_tools()
|
| 610 |
-
translate_tool = None
|
| 611 |
-
for tool in tools.tools:
|
| 612 |
-
if "translate" in tool.name.lower() or "translation" in tool.name.lower():
|
| 613 |
-
translate_tool = tool
|
| 614 |
-
logger.info(f"Found MCP translation tool: {tool.name}")
|
| 615 |
-
break
|
| 616 |
-
|
| 617 |
-
if translate_tool:
|
| 618 |
-
args = {"text": text, "target_language": target_lang}
|
| 619 |
-
if source_lang:
|
| 620 |
-
args["source_language"] = source_lang
|
| 621 |
-
|
| 622 |
-
result = await session.call_tool(
|
| 623 |
-
translate_tool.name,
|
| 624 |
-
arguments=args
|
| 625 |
-
)
|
| 626 |
-
|
| 627 |
-
# Parse result
|
| 628 |
-
if hasattr(result, 'content') and result.content:
|
| 629 |
-
for item in result.content:
|
| 630 |
-
if hasattr(item, 'text'):
|
| 631 |
-
return item.text.strip()
|
| 632 |
-
return ""
|
| 633 |
-
except Exception as e:
|
| 634 |
-
logger.debug(f"MCP translation error: {e}")
|
| 635 |
-
return ""
|
| 636 |
-
|
| 637 |
-
def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
|
| 638 |
-
"""Translate text using DeepSeek-R1 model (with MCP fallback)"""
|
| 639 |
-
# Try MCP first if available
|
| 640 |
-
if MCP_AVAILABLE:
|
| 641 |
-
try:
|
| 642 |
-
loop = asyncio.get_event_loop()
|
| 643 |
-
if loop.is_running():
|
| 644 |
-
try:
|
| 645 |
-
import nest_asyncio
|
| 646 |
-
translated = nest_asyncio.run(translate_text_mcp(text, target_lang, source_lang))
|
| 647 |
-
if translated:
|
| 648 |
-
logger.info(f"Translated via MCP: {translated[:50]}...")
|
| 649 |
-
return translated
|
| 650 |
-
except:
|
| 651 |
-
pass
|
| 652 |
-
else:
|
| 653 |
-
translated = loop.run_until_complete(translate_text_mcp(text, target_lang, source_lang))
|
| 654 |
-
if translated:
|
| 655 |
-
logger.info(f"Translated via MCP: {translated[:50]}...")
|
| 656 |
-
return translated
|
| 657 |
-
except Exception as e:
|
| 658 |
-
logger.debug(f"MCP translation not available: {e}")
|
| 659 |
-
|
| 660 |
-
# Fallback to local translation model
|
| 661 |
-
global global_translation_model, global_translation_tokenizer
|
| 662 |
-
if global_translation_model is None or global_translation_tokenizer is None:
|
| 663 |
-
initialize_translation_model()
|
| 664 |
-
|
| 665 |
if source_lang:
|
| 666 |
-
|
| 667 |
else:
|
| 668 |
-
|
| 669 |
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
{"role": "user", "content": prompt}
|
| 673 |
-
]
|
| 674 |
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
|
|
|
| 679 |
)
|
| 680 |
|
| 681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
|
| 692 |
-
|
| 693 |
-
return
|
| 694 |
|
| 695 |
async def search_web_mcp(query: str, max_results: int = 5) -> list:
|
| 696 |
"""Search web using MCP tools"""
|
|
@@ -898,15 +814,11 @@ def search_web(query: str, max_results: int = 5) -> list:
|
|
| 898 |
else:
|
| 899 |
return search_web_fallback(query, max_results)
|
| 900 |
|
| 901 |
-
def
|
| 902 |
-
"""Summarize web search results using
|
| 903 |
-
global global_translation_model, global_translation_tokenizer
|
| 904 |
-
if global_translation_model is None or global_translation_tokenizer is None:
|
| 905 |
-
initialize_translation_model()
|
| 906 |
-
|
| 907 |
combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
|
| 908 |
|
| 909 |
-
|
| 910 |
|
| 911 |
Extract key medical information, facts, and insights. Be concise and focus on reliable information.
|
| 912 |
|
|
@@ -915,41 +827,59 @@ Search Results:
|
|
| 915 |
|
| 916 |
Summary:"""
|
| 917 |
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
{"role": "user", "content": prompt}
|
| 921 |
-
]
|
| 922 |
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
|
|
|
| 927 |
)
|
| 928 |
|
| 929 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 939 |
|
| 940 |
-
|
| 941 |
-
|
|
|
|
|
|
|
| 942 |
|
| 943 |
def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
|
| 944 |
-
"""Get LLM for RAG indexing (uses
|
| 945 |
-
|
| 946 |
-
|
| 947 |
|
| 948 |
return HuggingFaceLLM(
|
| 949 |
context_window=4096,
|
| 950 |
max_new_tokens=max_new_tokens,
|
| 951 |
-
tokenizer=
|
| 952 |
-
model=
|
| 953 |
generate_kwargs={
|
| 954 |
"do_sample": True,
|
| 955 |
"temperature": temperature,
|
|
@@ -958,15 +888,8 @@ def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
|
|
| 958 |
}
|
| 959 |
)
|
| 960 |
|
| 961 |
-
def
|
| 962 |
-
"""
|
| 963 |
-
Autonomous reasoning: Analyze query complexity, intent, and information needs.
|
| 964 |
-
Returns reasoning analysis with query type, complexity, and required information sources.
|
| 965 |
-
"""
|
| 966 |
-
global global_translation_model, global_translation_tokenizer
|
| 967 |
-
if global_translation_model is None or global_translation_tokenizer is None:
|
| 968 |
-
initialize_translation_model()
|
| 969 |
-
|
| 970 |
reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
|
| 971 |
|
| 972 |
Query: "{query}"
|
|
@@ -989,30 +912,16 @@ Respond in JSON format:
|
|
| 989 |
"sub_questions": ["..."]
|
| 990 |
}}"""
|
| 991 |
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
{"role": "user", "content": reasoning_prompt}
|
| 995 |
-
]
|
| 996 |
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
|
|
|
| 1001 |
)
|
| 1002 |
|
| 1003 |
-
inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
|
| 1004 |
-
|
| 1005 |
-
with torch.no_grad():
|
| 1006 |
-
outputs = global_translation_model.generate(
|
| 1007 |
-
**inputs,
|
| 1008 |
-
max_new_tokens=512,
|
| 1009 |
-
temperature=0.3,
|
| 1010 |
-
do_sample=True,
|
| 1011 |
-
pad_token_id=global_translation_tokenizer.eos_token_id
|
| 1012 |
-
)
|
| 1013 |
-
|
| 1014 |
-
response = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 1015 |
-
|
| 1016 |
# Parse JSON response (with fallback)
|
| 1017 |
try:
|
| 1018 |
# Extract JSON from response
|
|
@@ -1036,6 +945,46 @@ Respond in JSON format:
|
|
| 1036 |
logger.info(f"Reasoning analysis: {reasoning}")
|
| 1037 |
return reasoning
|
| 1038 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1039 |
def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
|
| 1040 |
"""
|
| 1041 |
Planning: Create multi-step execution plan based on reasoning analysis.
|
|
@@ -1132,15 +1081,8 @@ def autonomous_execution_strategy(reasoning: dict, plan: dict, use_rag: bool, us
|
|
| 1132 |
|
| 1133 |
return strategy
|
| 1134 |
|
| 1135 |
-
def
|
| 1136 |
-
"""
|
| 1137 |
-
Self-reflection: Evaluate answer quality and completeness.
|
| 1138 |
-
Returns reflection with quality score and improvement suggestions.
|
| 1139 |
-
"""
|
| 1140 |
-
global global_translation_model, global_translation_tokenizer
|
| 1141 |
-
if global_translation_model is None or global_translation_tokenizer is None:
|
| 1142 |
-
initialize_translation_model()
|
| 1143 |
-
|
| 1144 |
reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
|
| 1145 |
|
| 1146 |
Query: "{query}"
|
|
@@ -1163,31 +1105,16 @@ Respond in JSON:
|
|
| 1163 |
"improvement_suggestions": ["..."]
|
| 1164 |
}}"""
|
| 1165 |
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
{"role": "user", "content": reflection_prompt}
|
| 1169 |
-
]
|
| 1170 |
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
|
|
|
| 1175 |
)
|
| 1176 |
|
| 1177 |
-
inputs = global_translation_tokenizer(prompt_text, return_tensors="pt").to(global_translation_model.device)
|
| 1178 |
-
|
| 1179 |
-
with torch.no_grad():
|
| 1180 |
-
outputs = global_translation_model.generate(
|
| 1181 |
-
**inputs,
|
| 1182 |
-
max_new_tokens=256,
|
| 1183 |
-
temperature=0.3,
|
| 1184 |
-
do_sample=True,
|
| 1185 |
-
pad_token_id=global_translation_tokenizer.eos_token_id
|
| 1186 |
-
)
|
| 1187 |
-
|
| 1188 |
-
response = global_translation_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 1189 |
-
|
| 1190 |
-
import json
|
| 1191 |
try:
|
| 1192 |
json_start = response.find('{')
|
| 1193 |
json_end = response.rfind('}') + 1
|
|
@@ -1201,19 +1128,139 @@ Respond in JSON:
|
|
| 1201 |
logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
|
| 1202 |
return reflection
|
| 1203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1204 |
def extract_text_from_document(file):
|
|
|
|
| 1205 |
file_name = file.name
|
| 1206 |
file_extension = os.path.splitext(file_name)[1].lower()
|
| 1207 |
|
|
|
|
| 1208 |
if file_extension == '.txt':
|
| 1209 |
text = file.read().decode('utf-8')
|
| 1210 |
return text, len(text.split()), None
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1217 |
|
| 1218 |
@spaces.GPU(max_duration=120)
|
| 1219 |
def create_or_update_index(files, request: gr.Request):
|
|
@@ -1611,7 +1658,7 @@ def create_demo():
|
|
| 1611 |
file_upload = gr.File(
|
| 1612 |
file_count="multiple",
|
| 1613 |
label="Drag and Drop Files Here",
|
| 1614 |
-
file_types=[".pdf", ".txt"],
|
| 1615 |
elem_id="file-upload"
|
| 1616 |
)
|
| 1617 |
upload_button = gr.Button("Upload & Index", elem_classes="upload-button")
|
|
@@ -1859,26 +1906,8 @@ def create_demo():
|
|
| 1859 |
if __name__ == "__main__":
|
| 1860 |
# Preload models on startup
|
| 1861 |
logger.info("Preloading models on startup...")
|
| 1862 |
-
logger.info("Initializing translation model (DeepSeek-R1)...")
|
| 1863 |
-
try:
|
| 1864 |
-
initialize_translation_model()
|
| 1865 |
-
logger.info("Translation model (DeepSeek-R1) preloaded successfully!")
|
| 1866 |
-
except Exception as e:
|
| 1867 |
-
logger.error(f"Translation model preloading failed: {e}")
|
| 1868 |
-
logger.warning("Translation features may be limited")
|
| 1869 |
logger.info("Initializing default medical model (MedSwin TA)...")
|
| 1870 |
initialize_medical_model(DEFAULT_MEDICAL_MODEL)
|
| 1871 |
-
logger.info("Preloading Whisper model...")
|
| 1872 |
-
try:
|
| 1873 |
-
initialize_whisper_model()
|
| 1874 |
-
if global_whisper_model is not None:
|
| 1875 |
-
logger.info("Whisper model preloaded successfully!")
|
| 1876 |
-
else:
|
| 1877 |
-
logger.warning("Whisper model not available - will use MCP or disable transcription")
|
| 1878 |
-
except Exception as e:
|
| 1879 |
-
logger.warning(f"Whisper model preloading failed: {e}")
|
| 1880 |
-
logger.warning("Speech-to-text will use MCP or be disabled")
|
| 1881 |
-
global_whisper_model = None
|
| 1882 |
logger.info("Preloading TTS model...")
|
| 1883 |
try:
|
| 1884 |
initialize_tts_model()
|
|
@@ -1889,6 +1918,13 @@ if __name__ == "__main__":
|
|
| 1889 |
except Exception as e:
|
| 1890 |
logger.warning(f"TTS model preloading failed: {e}")
|
| 1891 |
logger.warning("Text-to-speech will use MCP or be disabled")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1892 |
logger.info("Model preloading complete!")
|
| 1893 |
demo = create_demo()
|
| 1894 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
+
import base64
|
| 4 |
import logging
|
| 5 |
import torch
|
| 6 |
import threading
|
|
|
|
| 33 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 34 |
from tqdm import tqdm
|
| 35 |
from langdetect import detect, LangDetectException
|
|
|
|
| 36 |
# MCP imports
|
| 37 |
try:
|
| 38 |
from mcp import ClientSession, StdioServerParameters
|
|
|
|
| 66 |
hf_logging.set_verbosity_error()
|
| 67 |
|
| 68 |
# Model configurations
|
|
|
|
| 69 |
MEDSWIN_MODELS = {
|
| 70 |
"MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
|
| 71 |
"MedSwin KD": "MedSwin/MedSwin-7B-KD",
|
|
|
|
| 73 |
}
|
| 74 |
DEFAULT_MEDICAL_MODEL = "MedSwin TA"
|
| 75 |
EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1" # Domain-tuned medical embedding model
|
|
|
|
| 76 |
TTS_MODEL = "maya-research/maya1"
|
| 77 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 78 |
if not HF_TOKEN:
|
| 79 |
raise ValueError("HF_TOKEN not found in environment variables")
|
| 80 |
|
| 81 |
+
# Gemini MCP configuration
|
| 82 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
| 83 |
+
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") # Default for harder tasks
|
| 84 |
+
GEMINI_MODEL_LITE = os.environ.get("GEMINI_MODEL_LITE", "gemini-2.5-flash-lite") # For parsing and simple tasks
|
| 85 |
+
|
| 86 |
# Custom UI
|
| 87 |
TITLE = "<h1><center>🩺 MedLLM Agent - Medical RAG & Web Search System</center></h1>"
|
| 88 |
DESCRIPTION = """
|
|
|
|
| 190 |
"""
|
| 191 |
|
| 192 |
# Global model storage
|
|
|
|
|
|
|
| 193 |
global_medical_models = {}
|
| 194 |
global_medical_tokenizers = {}
|
| 195 |
global_file_info = {}
|
|
|
|
| 196 |
global_tts_model = None
|
| 197 |
|
| 198 |
# MCP client storage
|
|
|
|
| 200 |
global_mcp_stdio_ctx = None # Store stdio context to keep it alive
|
| 201 |
global_mcp_lock = threading.Lock() # Lock for thread-safe session access
|
| 202 |
# MCP server configuration via environment variables
|
| 203 |
+
# Gemini MCP server: aistudio-mcp-server
|
| 204 |
+
# Example: MCP_SERVER_COMMAND="npx" MCP_SERVER_ARGS="-y @aistudio-mcp/server"
|
| 205 |
+
MCP_SERVER_COMMAND = os.environ.get("MCP_SERVER_COMMAND", "npx")
|
| 206 |
+
MCP_SERVER_ARGS = os.environ.get("MCP_SERVER_ARGS", "-y @aistudio-mcp/server").split() if os.environ.get("MCP_SERVER_ARGS") else ["-y", "@aistudio-mcp/server"]
|
| 207 |
|
| 208 |
async def get_mcp_session():
|
| 209 |
"""Get or create MCP client session with proper context management"""
|
|
|
|
| 264 |
global_mcp_stdio_ctx = None
|
| 265 |
return None
|
| 266 |
|
| 267 |
+
async def call_gemini_mcp(user_prompt: str, system_prompt: str = None, files: list = None, model: str = None, temperature: float = 0.2) -> str:
|
| 268 |
+
"""Call Gemini MCP generate_content tool"""
|
| 269 |
+
if not MCP_AVAILABLE:
|
| 270 |
+
return ""
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
session = await get_mcp_session()
|
| 274 |
+
if session is None:
|
| 275 |
+
return ""
|
| 276 |
+
|
| 277 |
+
# Find generate_content tool
|
| 278 |
+
tools = await session.list_tools()
|
| 279 |
+
generate_tool = None
|
| 280 |
+
for tool in tools.tools:
|
| 281 |
+
if "generate_content" in tool.name.lower() or "generate" in tool.name.lower():
|
| 282 |
+
generate_tool = tool
|
| 283 |
+
logger.info(f"Found Gemini MCP tool: {tool.name}")
|
| 284 |
+
break
|
| 285 |
+
|
| 286 |
+
if not generate_tool:
|
| 287 |
+
logger.warning("Gemini MCP generate_content tool not found")
|
| 288 |
+
return ""
|
| 289 |
+
|
| 290 |
+
# Prepare arguments
|
| 291 |
+
arguments = {
|
| 292 |
+
"user_prompt": user_prompt
|
| 293 |
+
}
|
| 294 |
+
if system_prompt:
|
| 295 |
+
arguments["system_prompt"] = system_prompt
|
| 296 |
+
if files:
|
| 297 |
+
arguments["files"] = files
|
| 298 |
+
if model:
|
| 299 |
+
arguments["model"] = model
|
| 300 |
+
if temperature is not None:
|
| 301 |
+
arguments["temperature"] = temperature
|
| 302 |
+
|
| 303 |
+
result = await session.call_tool(generate_tool.name, arguments=arguments)
|
| 304 |
+
|
| 305 |
+
# Parse result
|
| 306 |
+
if hasattr(result, 'content') and result.content:
|
| 307 |
+
for item in result.content:
|
| 308 |
+
if hasattr(item, 'text'):
|
| 309 |
+
return item.text.strip()
|
| 310 |
+
return ""
|
| 311 |
+
except Exception as e:
|
| 312 |
+
logger.error(f"Gemini MCP call error: {e}")
|
| 313 |
+
import traceback
|
| 314 |
+
logger.debug(traceback.format_exc())
|
| 315 |
+
return ""
|
| 316 |
|
| 317 |
def initialize_medical_model(model_name: str):
|
| 318 |
"""Initialize medical model (MedSwin) - download on demand"""
|
|
|
|
| 333 |
logger.info(f"Medical model {model_name} initialized successfully")
|
| 334 |
return global_medical_models[model_name], global_medical_tokenizers[model_name]
|
| 335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
def initialize_tts_model():
|
| 338 |
"""Initialize TTS model for text-to-speech"""
|
|
|
|
| 351 |
global_tts_model = None
|
| 352 |
return global_tts_model
|
| 353 |
|
| 354 |
+
async def transcribe_audio_gemini(audio_path: str) -> str:
|
| 355 |
+
"""Transcribe audio using Gemini MCP"""
|
| 356 |
if not MCP_AVAILABLE:
|
| 357 |
return ""
|
| 358 |
|
| 359 |
try:
|
| 360 |
+
# Ensure we have an absolute path
|
| 361 |
+
audio_path_abs = os.path.abspath(audio_path)
|
|
|
|
|
|
|
| 362 |
|
| 363 |
+
# Prepare file object for Gemini MCP using path (as per Gemini MCP documentation)
|
| 364 |
+
files = [{
|
| 365 |
+
"path": audio_path_abs
|
| 366 |
+
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
+
# Use exact prompts from Gemini MCP documentation
|
| 369 |
+
system_prompt = "You are a professional transcription service. Provide accurate, well-formatted transcripts."
|
| 370 |
+
user_prompt = "Please transcribe this audio file. Include speaker identification if multiple speakers are present, and format it with proper punctuation and paragraphs, remove mumble, ignore non-verbal noises."
|
| 371 |
+
|
| 372 |
+
result = await call_gemini_mcp(
|
| 373 |
+
user_prompt=user_prompt,
|
| 374 |
+
system_prompt=system_prompt,
|
| 375 |
+
files=files,
|
| 376 |
+
model=GEMINI_MODEL_LITE, # Use lite model for transcription
|
| 377 |
+
temperature=0.2
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
return result.strip()
|
| 381 |
except Exception as e:
|
| 382 |
+
logger.error(f"Gemini transcription error: {e}")
|
| 383 |
+
import traceback
|
| 384 |
+
logger.debug(traceback.format_exc())
|
| 385 |
return ""
|
| 386 |
|
| 387 |
def transcribe_audio(audio):
|
| 388 |
+
"""Transcribe audio to text using Gemini MCP"""
|
|
|
|
|
|
|
| 389 |
if audio is None:
|
| 390 |
return ""
|
| 391 |
|
|
|
|
| 403 |
else:
|
| 404 |
audio_path = audio
|
| 405 |
|
| 406 |
+
# Use Gemini MCP for transcription
|
| 407 |
if MCP_AVAILABLE:
|
| 408 |
try:
|
| 409 |
loop = asyncio.get_event_loop()
|
| 410 |
if loop.is_running():
|
| 411 |
try:
|
| 412 |
import nest_asyncio
|
| 413 |
+
transcribed = nest_asyncio.run(transcribe_audio_gemini(audio_path))
|
| 414 |
if transcribed:
|
| 415 |
+
logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
|
| 416 |
return transcribed
|
| 417 |
+
except Exception as e:
|
| 418 |
+
logger.error(f"Error in nested async transcription: {e}")
|
| 419 |
else:
|
| 420 |
+
transcribed = loop.run_until_complete(transcribe_audio_gemini(audio_path))
|
| 421 |
if transcribed:
|
| 422 |
+
logger.info(f"Transcribed via Gemini MCP: {transcribed[:50]}...")
|
| 423 |
return transcribed
|
| 424 |
except Exception as e:
|
| 425 |
+
logger.error(f"Gemini MCP transcription error: {e}")
|
| 426 |
|
| 427 |
+
logger.warning("Gemini MCP transcription not available")
|
| 428 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
except Exception as e:
|
| 430 |
logger.error(f"Transcription error: {e}")
|
| 431 |
return ""
|
|
|
|
| 561 |
except LangDetectException:
|
| 562 |
return "en" # Default to English if detection fails
|
| 563 |
|
| 564 |
+
async def translate_text_gemini(text: str, target_lang: str = "en", source_lang: str = None) -> str:
|
| 565 |
+
"""Translate text using Gemini MCP"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
if source_lang:
|
| 567 |
+
user_prompt = f"Translate the following {source_lang} text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
|
| 568 |
else:
|
| 569 |
+
user_prompt = f"Translate the following text to {target_lang}. Only provide the translation, no explanations:\n\n{text}"
|
| 570 |
|
| 571 |
+
# Use concise system prompt
|
| 572 |
+
system_prompt = "You are a professional translator. Translate accurately and concisely."
|
|
|
|
|
|
|
| 573 |
|
| 574 |
+
result = await call_gemini_mcp(
|
| 575 |
+
user_prompt=user_prompt,
|
| 576 |
+
system_prompt=system_prompt,
|
| 577 |
+
model=GEMINI_MODEL_LITE, # Use lite model for translation
|
| 578 |
+
temperature=0.2
|
| 579 |
)
|
| 580 |
|
| 581 |
+
return result.strip()
|
| 582 |
+
|
| 583 |
+
def translate_text(text: str, target_lang: str = "en", source_lang: str = None) -> str:
|
| 584 |
+
"""Translate text using Gemini MCP"""
|
| 585 |
+
if not MCP_AVAILABLE:
|
| 586 |
+
logger.warning("Gemini MCP not available for translation")
|
| 587 |
+
return text # Return original text if translation fails
|
| 588 |
|
| 589 |
+
try:
|
| 590 |
+
loop = asyncio.get_event_loop()
|
| 591 |
+
if loop.is_running():
|
| 592 |
+
try:
|
| 593 |
+
import nest_asyncio
|
| 594 |
+
translated = nest_asyncio.run(translate_text_gemini(text, target_lang, source_lang))
|
| 595 |
+
if translated:
|
| 596 |
+
logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
|
| 597 |
+
return translated
|
| 598 |
+
except Exception as e:
|
| 599 |
+
logger.error(f"Error in nested async translation: {e}")
|
| 600 |
+
else:
|
| 601 |
+
translated = loop.run_until_complete(translate_text_gemini(text, target_lang, source_lang))
|
| 602 |
+
if translated:
|
| 603 |
+
logger.info(f"Translated via Gemini MCP: {translated[:50]}...")
|
| 604 |
+
return translated
|
| 605 |
+
except Exception as e:
|
| 606 |
+
logger.error(f"Gemini MCP translation error: {e}")
|
| 607 |
|
| 608 |
+
# Return original text if translation fails
|
| 609 |
+
return text
|
| 610 |
|
| 611 |
async def search_web_mcp(query: str, max_results: int = 5) -> list:
|
| 612 |
"""Search web using MCP tools"""
|
|
|
|
| 814 |
else:
|
| 815 |
return search_web_fallback(query, max_results)
|
| 816 |
|
| 817 |
+
async def summarize_web_content_gemini(content_list: list, query: str) -> str:
|
| 818 |
+
"""Summarize web search results using Gemini MCP"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 819 |
combined_content = "\n\n".join([f"Source: {item['title']}\n{item['content']}" for item in content_list[:3]])
|
| 820 |
|
| 821 |
+
user_prompt = f"""Summarize the following web search results related to the query: "{query}"
|
| 822 |
|
| 823 |
Extract key medical information, facts, and insights. Be concise and focus on reliable information.
|
| 824 |
|
|
|
|
| 827 |
|
| 828 |
Summary:"""
|
| 829 |
|
| 830 |
+
# Use concise system prompt
|
| 831 |
+
system_prompt = "You are a medical information summarizer. Extract and summarize key medical facts accurately."
|
|
|
|
|
|
|
| 832 |
|
| 833 |
+
result = await call_gemini_mcp(
|
| 834 |
+
user_prompt=user_prompt,
|
| 835 |
+
system_prompt=system_prompt,
|
| 836 |
+
model=GEMINI_MODEL, # Use full model for summarization
|
| 837 |
+
temperature=0.5
|
| 838 |
)
|
| 839 |
|
| 840 |
+
return result.strip()
|
| 841 |
+
|
| 842 |
+
def summarize_web_content(content_list: list, query: str) -> str:
|
| 843 |
+
"""Summarize web search results using Gemini MCP"""
|
| 844 |
+
if not MCP_AVAILABLE:
|
| 845 |
+
logger.warning("Gemini MCP not available for summarization")
|
| 846 |
+
# Fallback: return first result's content
|
| 847 |
+
if content_list:
|
| 848 |
+
return content_list[0].get('content', '')[:500]
|
| 849 |
+
return ""
|
| 850 |
|
| 851 |
+
try:
|
| 852 |
+
loop = asyncio.get_event_loop()
|
| 853 |
+
if loop.is_running():
|
| 854 |
+
try:
|
| 855 |
+
import nest_asyncio
|
| 856 |
+
summary = nest_asyncio.run(summarize_web_content_gemini(content_list, query))
|
| 857 |
+
if summary:
|
| 858 |
+
return summary
|
| 859 |
+
except Exception as e:
|
| 860 |
+
logger.error(f"Error in nested async summarization: {e}")
|
| 861 |
+
else:
|
| 862 |
+
summary = loop.run_until_complete(summarize_web_content_gemini(content_list, query))
|
| 863 |
+
if summary:
|
| 864 |
+
return summary
|
| 865 |
+
except Exception as e:
|
| 866 |
+
logger.error(f"Gemini MCP summarization error: {e}")
|
| 867 |
|
| 868 |
+
# Fallback: return first result's content
|
| 869 |
+
if content_list:
|
| 870 |
+
return content_list[0].get('content', '')[:500]
|
| 871 |
+
return ""
|
| 872 |
|
| 873 |
def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
|
| 874 |
+
"""Get LLM for RAG indexing (uses medical model)"""
|
| 875 |
+
# Use medical model for RAG indexing instead of translation model
|
| 876 |
+
medical_model_obj, medical_tokenizer = initialize_medical_model(DEFAULT_MEDICAL_MODEL)
|
| 877 |
|
| 878 |
return HuggingFaceLLM(
|
| 879 |
context_window=4096,
|
| 880 |
max_new_tokens=max_new_tokens,
|
| 881 |
+
tokenizer=medical_tokenizer,
|
| 882 |
+
model=medical_model_obj,
|
| 883 |
generate_kwargs={
|
| 884 |
"do_sample": True,
|
| 885 |
"temperature": temperature,
|
|
|
|
| 888 |
}
|
| 889 |
)
|
| 890 |
|
| 891 |
+
async def autonomous_reasoning_gemini(query: str) -> dict:
|
| 892 |
+
"""Autonomous reasoning using Gemini MCP"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 893 |
reasoning_prompt = f"""Analyze this medical query and provide structured reasoning:
|
| 894 |
|
| 895 |
Query: "{query}"
|
|
|
|
| 912 |
"sub_questions": ["..."]
|
| 913 |
}}"""
|
| 914 |
|
| 915 |
+
# Use concise system prompt
|
| 916 |
+
system_prompt = "You are a medical reasoning system. Analyze queries systematically and provide structured JSON responses."
|
|
|
|
|
|
|
| 917 |
|
| 918 |
+
response = await call_gemini_mcp(
|
| 919 |
+
user_prompt=reasoning_prompt,
|
| 920 |
+
system_prompt=system_prompt,
|
| 921 |
+
model=GEMINI_MODEL, # Use full model for reasoning
|
| 922 |
+
temperature=0.3
|
| 923 |
)
|
| 924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
# Parse JSON response (with fallback)
|
| 926 |
try:
|
| 927 |
# Extract JSON from response
|
|
|
|
| 945 |
logger.info(f"Reasoning analysis: {reasoning}")
|
| 946 |
return reasoning
|
| 947 |
|
| 948 |
+
def autonomous_reasoning(query: str, history: list) -> dict:
|
| 949 |
+
"""
|
| 950 |
+
Autonomous reasoning: Analyze query complexity, intent, and information needs.
|
| 951 |
+
Returns reasoning analysis with query type, complexity, and required information sources.
|
| 952 |
+
"""
|
| 953 |
+
if not MCP_AVAILABLE:
|
| 954 |
+
logger.warning("Gemini MCP not available for reasoning, using fallback")
|
| 955 |
+
# Fallback reasoning
|
| 956 |
+
return {
|
| 957 |
+
"query_type": "general_info",
|
| 958 |
+
"complexity": "moderate",
|
| 959 |
+
"information_needs": ["medical information"],
|
| 960 |
+
"requires_rag": True,
|
| 961 |
+
"requires_web_search": False,
|
| 962 |
+
"sub_questions": [query]
|
| 963 |
+
}
|
| 964 |
+
|
| 965 |
+
try:
|
| 966 |
+
loop = asyncio.get_event_loop()
|
| 967 |
+
if loop.is_running():
|
| 968 |
+
try:
|
| 969 |
+
import nest_asyncio
|
| 970 |
+
return nest_asyncio.run(autonomous_reasoning_gemini(query))
|
| 971 |
+
except Exception as e:
|
| 972 |
+
logger.error(f"Error in nested async reasoning: {e}")
|
| 973 |
+
else:
|
| 974 |
+
return loop.run_until_complete(autonomous_reasoning_gemini(query))
|
| 975 |
+
except Exception as e:
|
| 976 |
+
logger.error(f"Gemini MCP reasoning error: {e}")
|
| 977 |
+
|
| 978 |
+
# Fallback reasoning
|
| 979 |
+
return {
|
| 980 |
+
"query_type": "general_info",
|
| 981 |
+
"complexity": "moderate",
|
| 982 |
+
"information_needs": ["medical information"],
|
| 983 |
+
"requires_rag": True,
|
| 984 |
+
"requires_web_search": False,
|
| 985 |
+
"sub_questions": [query]
|
| 986 |
+
}
|
| 987 |
+
|
| 988 |
def create_execution_plan(reasoning: dict, query: str, has_rag_index: bool) -> dict:
|
| 989 |
"""
|
| 990 |
Planning: Create multi-step execution plan based on reasoning analysis.
|
|
|
|
| 1081 |
|
| 1082 |
return strategy
|
| 1083 |
|
| 1084 |
+
async def self_reflection_gemini(answer: str, query: str) -> dict:
|
| 1085 |
+
"""Self-reflection using Gemini MCP"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
reflection_prompt = f"""Evaluate this medical answer for quality and completeness:
|
| 1087 |
|
| 1088 |
Query: "{query}"
|
|
|
|
| 1105 |
"improvement_suggestions": ["..."]
|
| 1106 |
}}"""
|
| 1107 |
|
| 1108 |
+
# Use concise system prompt
|
| 1109 |
+
system_prompt = "You are a medical answer quality evaluator. Provide honest, constructive feedback."
|
|
|
|
|
|
|
| 1110 |
|
| 1111 |
+
response = await call_gemini_mcp(
|
| 1112 |
+
user_prompt=reflection_prompt,
|
| 1113 |
+
system_prompt=system_prompt,
|
| 1114 |
+
model=GEMINI_MODEL, # Use full model for reflection
|
| 1115 |
+
temperature=0.3
|
| 1116 |
)
|
| 1117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
try:
|
| 1119 |
json_start = response.find('{')
|
| 1120 |
json_end = response.rfind('}') + 1
|
|
|
|
| 1128 |
logger.info(f"Self-reflection score: {reflection.get('overall_score', 'N/A')}")
|
| 1129 |
return reflection
|
| 1130 |
|
| 1131 |
+
def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
|
| 1132 |
+
"""
|
| 1133 |
+
Self-reflection: Evaluate answer quality and completeness.
|
| 1134 |
+
Returns reflection with quality score and improvement suggestions.
|
| 1135 |
+
"""
|
| 1136 |
+
if not MCP_AVAILABLE:
|
| 1137 |
+
logger.warning("Gemini MCP not available for reflection, using fallback")
|
| 1138 |
+
return {"overall_score": 7, "improvement_suggestions": []}
|
| 1139 |
+
|
| 1140 |
+
try:
|
| 1141 |
+
loop = asyncio.get_event_loop()
|
| 1142 |
+
if loop.is_running():
|
| 1143 |
+
try:
|
| 1144 |
+
import nest_asyncio
|
| 1145 |
+
return nest_asyncio.run(self_reflection_gemini(answer, query))
|
| 1146 |
+
except Exception as e:
|
| 1147 |
+
logger.error(f"Error in nested async reflection: {e}")
|
| 1148 |
+
else:
|
| 1149 |
+
return loop.run_until_complete(self_reflection_gemini(answer, query))
|
| 1150 |
+
except Exception as e:
|
| 1151 |
+
logger.error(f"Gemini MCP reflection error: {e}")
|
| 1152 |
+
|
| 1153 |
+
return {"overall_score": 7, "improvement_suggestions": []}
|
| 1154 |
+
|
| 1155 |
+
async def parse_document_gemini(file_path: str, file_extension: str) -> str:
|
| 1156 |
+
"""Parse document using Gemini MCP"""
|
| 1157 |
+
if not MCP_AVAILABLE:
|
| 1158 |
+
return ""
|
| 1159 |
+
|
| 1160 |
+
try:
|
| 1161 |
+
# Read file and encode to base64
|
| 1162 |
+
with open(file_path, 'rb') as f:
|
| 1163 |
+
file_content = base64.b64encode(f.read()).decode('utf-8')
|
| 1164 |
+
|
| 1165 |
+
# Determine MIME type from file extension
|
| 1166 |
+
mime_type_map = {
|
| 1167 |
+
'.pdf': 'application/pdf',
|
| 1168 |
+
'.doc': 'application/msword',
|
| 1169 |
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
| 1170 |
+
'.txt': 'text/plain',
|
| 1171 |
+
'.md': 'text/markdown',
|
| 1172 |
+
'.json': 'application/json',
|
| 1173 |
+
'.xml': 'application/xml',
|
| 1174 |
+
'.csv': 'text/csv'
|
| 1175 |
+
}
|
| 1176 |
+
mime_type = mime_type_map.get(file_extension, 'application/octet-stream')
|
| 1177 |
+
|
| 1178 |
+
# Prepare file object for Gemini MCP (use content for base64)
|
| 1179 |
+
files = [{
|
| 1180 |
+
"content": file_content,
|
| 1181 |
+
"type": mime_type
|
| 1182 |
+
}]
|
| 1183 |
+
|
| 1184 |
+
# Use concise system prompt
|
| 1185 |
+
system_prompt = "Extract all text content from the document accurately."
|
| 1186 |
+
user_prompt = "Extract all text content from this document. Return only the extracted text, preserving structure and formatting where possible."
|
| 1187 |
+
|
| 1188 |
+
result = await call_gemini_mcp(
|
| 1189 |
+
user_prompt=user_prompt,
|
| 1190 |
+
system_prompt=system_prompt,
|
| 1191 |
+
files=files,
|
| 1192 |
+
model=GEMINI_MODEL_LITE, # Use lite model for parsing
|
| 1193 |
+
temperature=0.2
|
| 1194 |
+
)
|
| 1195 |
+
|
| 1196 |
+
return result.strip()
|
| 1197 |
+
except Exception as e:
|
| 1198 |
+
logger.error(f"Gemini document parsing error: {e}")
|
| 1199 |
+
import traceback
|
| 1200 |
+
logger.debug(traceback.format_exc())
|
| 1201 |
+
return ""
|
| 1202 |
+
|
| 1203 |
def extract_text_from_document(file):
|
| 1204 |
+
"""Extract text from document using Gemini MCP"""
|
| 1205 |
file_name = file.name
|
| 1206 |
file_extension = os.path.splitext(file_name)[1].lower()
|
| 1207 |
|
| 1208 |
+
# Handle text files directly
|
| 1209 |
if file_extension == '.txt':
|
| 1210 |
text = file.read().decode('utf-8')
|
| 1211 |
return text, len(text.split()), None
|
| 1212 |
+
|
| 1213 |
+
# For PDF, Word, and other documents, use Gemini MCP
|
| 1214 |
+
# Save file to temporary location for processing
|
| 1215 |
+
try:
|
| 1216 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
|
| 1217 |
+
# Write file content to temp file
|
| 1218 |
+
file.seek(0) # Reset file pointer
|
| 1219 |
+
tmp_file.write(file.read())
|
| 1220 |
+
tmp_file_path = tmp_file.name
|
| 1221 |
+
|
| 1222 |
+
# Use Gemini MCP to parse document
|
| 1223 |
+
if MCP_AVAILABLE:
|
| 1224 |
+
try:
|
| 1225 |
+
loop = asyncio.get_event_loop()
|
| 1226 |
+
if loop.is_running():
|
| 1227 |
+
try:
|
| 1228 |
+
import nest_asyncio
|
| 1229 |
+
text = nest_asyncio.run(parse_document_gemini(tmp_file_path, file_extension))
|
| 1230 |
+
except Exception as e:
|
| 1231 |
+
logger.error(f"Error in nested async document parsing: {e}")
|
| 1232 |
+
text = ""
|
| 1233 |
+
else:
|
| 1234 |
+
text = loop.run_until_complete(parse_document_gemini(tmp_file_path, file_extension))
|
| 1235 |
+
|
| 1236 |
+
# Clean up temp file
|
| 1237 |
+
try:
|
| 1238 |
+
os.unlink(tmp_file_path)
|
| 1239 |
+
except:
|
| 1240 |
+
pass
|
| 1241 |
+
|
| 1242 |
+
if text:
|
| 1243 |
+
return text, len(text.split()), None
|
| 1244 |
+
else:
|
| 1245 |
+
return None, 0, ValueError(f"Failed to extract text from {file_extension} file using Gemini MCP")
|
| 1246 |
+
except Exception as e:
|
| 1247 |
+
logger.error(f"Gemini MCP document parsing error: {e}")
|
| 1248 |
+
# Clean up temp file
|
| 1249 |
+
try:
|
| 1250 |
+
os.unlink(tmp_file_path)
|
| 1251 |
+
except:
|
| 1252 |
+
pass
|
| 1253 |
+
return None, 0, ValueError(f"Error parsing {file_extension} file: {str(e)}")
|
| 1254 |
+
else:
|
| 1255 |
+
# Clean up temp file
|
| 1256 |
+
try:
|
| 1257 |
+
os.unlink(tmp_file_path)
|
| 1258 |
+
except:
|
| 1259 |
+
pass
|
| 1260 |
+
return None, 0, ValueError(f"Gemini MCP not available. Cannot parse {file_extension} files.")
|
| 1261 |
+
except Exception as e:
|
| 1262 |
+
logger.error(f"Error processing document: {e}")
|
| 1263 |
+
return None, 0, ValueError(f"Error processing {file_extension} file: {str(e)}")
|
| 1264 |
|
| 1265 |
@spaces.GPU(max_duration=120)
|
| 1266 |
def create_or_update_index(files, request: gr.Request):
|
|
|
|
| 1658 |
file_upload = gr.File(
|
| 1659 |
file_count="multiple",
|
| 1660 |
label="Drag and Drop Files Here",
|
| 1661 |
+
file_types=[".pdf", ".txt", ".doc", ".docx", ".md", ".json", ".xml", ".csv"],
|
| 1662 |
elem_id="file-upload"
|
| 1663 |
)
|
| 1664 |
upload_button = gr.Button("Upload & Index", elem_classes="upload-button")
|
|
|
|
| 1906 |
if __name__ == "__main__":
|
| 1907 |
# Preload models on startup
|
| 1908 |
logger.info("Preloading models on startup...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1909 |
logger.info("Initializing default medical model (MedSwin TA)...")
|
| 1910 |
initialize_medical_model(DEFAULT_MEDICAL_MODEL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1911 |
logger.info("Preloading TTS model...")
|
| 1912 |
try:
|
| 1913 |
initialize_tts_model()
|
|
|
|
| 1918 |
except Exception as e:
|
| 1919 |
logger.warning(f"TTS model preloading failed: {e}")
|
| 1920 |
logger.warning("Text-to-speech will use MCP or be disabled")
|
| 1921 |
+
|
| 1922 |
+
# Check Gemini MCP availability
|
| 1923 |
+
if MCP_AVAILABLE:
|
| 1924 |
+
logger.info("Gemini MCP is available for translation, summarization, document parsing, and transcription")
|
| 1925 |
+
else:
|
| 1926 |
+
logger.warning("Gemini MCP not available - translation, summarization, document parsing, and transcription features will be limited")
|
| 1927 |
+
|
| 1928 |
logger.info("Model preloading complete!")
|
| 1929 |
demo = create_demo()
|
| 1930 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -3,7 +3,6 @@ llama-index
|
|
| 3 |
transformers
|
| 4 |
torch
|
| 5 |
sentence-transformers
|
| 6 |
-
PyPDF2
|
| 7 |
python-docx
|
| 8 |
llama_index.llms.huggingface
|
| 9 |
llama_index.embeddings.huggingface
|
|
@@ -13,7 +12,7 @@ sentencepiece>=0.1.99
|
|
| 13 |
google-genai
|
| 14 |
langdetect
|
| 15 |
gradio
|
| 16 |
-
# MCP dependencies (
|
| 17 |
mcp
|
| 18 |
nest-asyncio
|
| 19 |
# Fallback dependencies (used if MCP is not available)
|
|
@@ -21,7 +20,6 @@ requests
|
|
| 21 |
beautifulsoup4
|
| 22 |
ddgs
|
| 23 |
spaces
|
| 24 |
-
openai-whisper
|
| 25 |
soundfile
|
| 26 |
numpy<2.0.0
|
| 27 |
setuptools>=65.0.0
|
|
|
|
| 3 |
transformers
|
| 4 |
torch
|
| 5 |
sentence-transformers
|
|
|
|
| 6 |
python-docx
|
| 7 |
llama_index.llms.huggingface
|
| 8 |
llama_index.embeddings.huggingface
|
|
|
|
| 12 |
google-genai
|
| 13 |
langdetect
|
| 14 |
gradio
|
| 15 |
+
# MCP dependencies (required for Gemini MCP)
|
| 16 |
mcp
|
| 17 |
nest-asyncio
|
| 18 |
# Fallback dependencies (used if MCP is not available)
|
|
|
|
| 20 |
beautifulsoup4
|
| 21 |
ddgs
|
| 22 |
spaces
|
|
|
|
| 23 |
soundfile
|
| 24 |
numpy<2.0.0
|
| 25 |
setuptools>=65.0.0
|