Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
dadfb77
1
Parent(s):
d0e0256
Enhance multi-step reasoning, challengers and MAC
Browse files
README.md
CHANGED
|
@@ -72,7 +72,6 @@ tags:
|
|
| 72 |
- Enable/disable Document RAG
|
| 73 |
- Enable/disable Web Search (MCP)
|
| 74 |
- Select medical model (MedSwin SFT/KD/TA)
|
| 75 |
-
- Toggle agentic reasoning on/off for MedSwin-only responses
|
| 76 |
3. **Ask Questions**: Type your medical question in any language
|
| 77 |
4. **Get Answers**: Receive comprehensive answers based on:
|
| 78 |
- Your uploaded documents (if RAG enabled)
|
|
@@ -103,33 +102,9 @@ See `requirements.txt` for full dependency list. Key dependencies:
|
|
| 103 |
- **Utilities**: `langdetect`, `gradio`, `spaces`, `soundfile`
|
| 104 |
- **TTS**: Optional - `TTS` package (voice features work with MCP fallback if unavailable)
|
| 105 |
|
| 106 |
-
### 🔑 Access Tokens
|
| 107 |
-
|
| 108 |
-
- **Hugging Face**: Set `HF_TOKEN` so Transformers can download the MedSwin checkpoints. Generate a read-only token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) and export it:
|
| 109 |
-
```bash
|
| 110 |
-
export HF_TOKEN="hf_your_token"
|
| 111 |
-
```
|
| 112 |
-
- **Gemini**: `GEMINI_API_KEY` remains required for MCP translation, parsing, transcription, and summarization (see MCP configuration below).
|
| 113 |
-
|
| 114 |
### 🔌 MCP Configuration
|
| 115 |
|
| 116 |
-
The application uses a bundled Gemini MCP server (agent.py) for translation, document parsing, transcription, and summarization. Configure via environment variables
|
| 117 |
-
|
| 118 |
-
```bash
|
| 119 |
-
# Required: Gemini API Key
|
| 120 |
-
export GEMINI_API_KEY="your-gemini-api-key"
|
| 121 |
-
|
| 122 |
-
# Optional: Gemini MCP Server Configuration (defaults to bundled agent.py)
|
| 123 |
-
export MCP_SERVER_COMMAND="python"
|
| 124 |
-
export MCP_SERVER_ARGS="/path/to/agent.py" # Default: bundled agent.py
|
| 125 |
-
|
| 126 |
-
# Optional: Gemini Model Configuration
|
| 127 |
-
export GEMINI_MODEL="gemini-2.5-flash" # For complex tasks (default)
|
| 128 |
-
export GEMINI_MODEL_LITE="gemini-2.5-flash-lite" # For simple tasks (default)
|
| 129 |
-
export GEMINI_TIMEOUT=300000 # Request timeout in milliseconds (default: 5 minutes)
|
| 130 |
-
export GEMINI_MAX_OUTPUT_TOKENS=8192 # Maximum output tokens (default)
|
| 131 |
-
export GEMINI_TEMPERATURE=0.2 # Temperature for generation 0-2 (default: 0.2)
|
| 132 |
-
```
|
| 133 |
|
| 134 |
**Setup Steps:**
|
| 135 |
|
|
|
|
| 72 |
- Enable/disable Document RAG
|
| 73 |
- Enable/disable Web Search (MCP)
|
| 74 |
- Select medical model (MedSwin SFT/KD/TA)
|
|
|
|
| 75 |
3. **Ask Questions**: Type your medical question in any language
|
| 76 |
4. **Get Answers**: Receive comprehensive answers based on:
|
| 77 |
- Your uploaded documents (if RAG enabled)
|
|
|
|
| 102 |
- **Utilities**: `langdetect`, `gradio`, `spaces`, `soundfile`
|
| 103 |
- **TTS**: Optional - `TTS` package (voice features work with MCP fallback if unavailable)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
### 🔌 MCP Configuration
|
| 106 |
|
| 107 |
+
The application uses a bundled Gemini MCP server (agent.py) for translation, document parsing, transcription, and summarization. Configure via environment variables
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
**Setup Steps:**
|
| 110 |
|
app.py
CHANGED
|
@@ -1240,6 +1240,138 @@ def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
|
|
| 1240 |
|
| 1241 |
return {"overall_score": 7, "improvement_suggestions": []}
|
| 1242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
async def parse_document_gemini(file_path: str, file_extension: str) -> str:
|
| 1244 |
"""Parse document using Gemini MCP"""
|
| 1245 |
if not MCP_AVAILABLE:
|
|
@@ -1477,6 +1609,8 @@ def stream_chat(
|
|
| 1477 |
yield history + [{"role": "assistant", "content": "Session initialization failed. Please refresh the page."}]
|
| 1478 |
return
|
| 1479 |
|
|
|
|
|
|
|
| 1480 |
user_id = request.session_hash
|
| 1481 |
index_dir = f"./{user_id}_index"
|
| 1482 |
has_rag_index = os.path.exists(index_dir)
|
|
@@ -1712,6 +1846,9 @@ def stream_chat(
|
|
| 1712 |
yield updated_history
|
| 1713 |
|
| 1714 |
partial_response = ""
|
|
|
|
|
|
|
|
|
|
| 1715 |
try:
|
| 1716 |
for new_text in streamer:
|
| 1717 |
partial_response += new_text
|
|
@@ -1721,20 +1858,58 @@ def stream_chat(
|
|
| 1721 |
# ===== SELF-REFLECTION (Step 6) =====
|
| 1722 |
if not disable_agentic_reasoning and reasoning and reasoning.get("complexity") in ["complex", "multi_faceted"]:
|
| 1723 |
logger.info("🔍 Performing self-reflection on answer quality...")
|
| 1724 |
-
|
| 1725 |
|
| 1726 |
-
|
| 1727 |
-
|
| 1728 |
-
|
| 1729 |
-
|
| 1730 |
-
reflection_note += f"\n💡 Suggestions: {', '.join(reflection['improvement_suggestions'][:2])}"
|
| 1731 |
-
partial_response += reflection_note
|
| 1732 |
-
updated_history[-1]["content"] = partial_response
|
| 1733 |
|
| 1734 |
# Add reasoning note if autonomous override occurred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1735 |
if reasoning_note:
|
| 1736 |
partial_response = reasoning_note + "\n\n" + partial_response
|
| 1737 |
-
|
|
|
|
|
|
|
| 1738 |
|
| 1739 |
# Translate back if needed
|
| 1740 |
if needs_translation and partial_response:
|
|
|
|
| 1240 |
|
| 1241 |
return {"overall_score": 7, "improvement_suggestions": []}
|
| 1242 |
|
| 1243 |
+
def _truncate_text(text: str, max_chars: int = 4000) -> str:
|
| 1244 |
+
"""Utility to keep prompts within model limits."""
|
| 1245 |
+
if not text:
|
| 1246 |
+
return ""
|
| 1247 |
+
text = text.strip()
|
| 1248 |
+
if len(text) <= max_chars:
|
| 1249 |
+
return text
|
| 1250 |
+
return text[:max_chars] + "... (truncated)"
|
| 1251 |
+
|
| 1252 |
+
async def enhance_answer_with_gemini_async(
|
| 1253 |
+
query: str,
|
| 1254 |
+
initial_answer: str,
|
| 1255 |
+
rag_context: str = "",
|
| 1256 |
+
web_context: str = "",
|
| 1257 |
+
reasoning: dict = None,
|
| 1258 |
+
reflection: dict = None
|
| 1259 |
+
) -> str:
|
| 1260 |
+
"""Use Gemini MCP to expand and validate the medical answer with multi-step reasoning."""
|
| 1261 |
+
if not MCP_AVAILABLE:
|
| 1262 |
+
return ""
|
| 1263 |
+
|
| 1264 |
+
evidence_sections = []
|
| 1265 |
+
if rag_context:
|
| 1266 |
+
evidence_sections.append(f"Document Evidence:\n{_truncate_text(rag_context, 3500)}")
|
| 1267 |
+
if web_context:
|
| 1268 |
+
evidence_sections.append(f"Web Evidence:\n{_truncate_text(web_context, 3500)}")
|
| 1269 |
+
evidence_block = "\n\n".join(evidence_sections) if evidence_sections else "No external evidence provided."
|
| 1270 |
+
|
| 1271 |
+
reasoning_summary = ""
|
| 1272 |
+
if reasoning:
|
| 1273 |
+
reasoning_summary = json.dumps({
|
| 1274 |
+
"query_type": reasoning.get("query_type"),
|
| 1275 |
+
"complexity": reasoning.get("complexity"),
|
| 1276 |
+
"sub_questions": reasoning.get("sub_questions", [])
|
| 1277 |
+
}, ensure_ascii=False)
|
| 1278 |
+
|
| 1279 |
+
reflection_summary = ""
|
| 1280 |
+
if reflection:
|
| 1281 |
+
reflection_summary = json.dumps(reflection, ensure_ascii=False)
|
| 1282 |
+
|
| 1283 |
+
enhancer_prompt = f"""You are the Gemini MCP Response Enhancer working with a medical specialist model that produced a short draft answer.
|
| 1284 |
+
Task: Reason through the evidence, validate every claim, challenge gaps, and then produce a substantially more detailed final answer.
|
| 1285 |
+
|
| 1286 |
+
Query:
|
| 1287 |
+
{query}
|
| 1288 |
+
|
| 1289 |
+
Initial Draft Answer:
|
| 1290 |
+
{initial_answer}
|
| 1291 |
+
|
| 1292 |
+
Reasoning Summary (optional):
|
| 1293 |
+
{reasoning_summary or "None"}
|
| 1294 |
+
|
| 1295 |
+
Self-Reflection Feedback (optional):
|
| 1296 |
+
{reflection_summary or "None"}
|
| 1297 |
+
|
| 1298 |
+
Evidence You Can Rely On:
|
| 1299 |
+
{evidence_block}
|
| 1300 |
+
|
| 1301 |
+
Process:
|
| 1302 |
+
1. Think step-by-step about what the query truly needs and whether the draft covers it.
|
| 1303 |
+
2. Identify inaccuracies, missing context, or shallow explanations using the evidence.
|
| 1304 |
+
3. Produce an enhanced answer that is longer, clinically thorough, and clearly structured.
|
| 1305 |
+
4. When citing, refer to Document Evidence or Web Evidence explicitly (e.g., [Document], [Web]).
|
| 1306 |
+
5. Keep internal reasoning private; only share the final enhanced answer.
|
| 1307 |
+
|
| 1308 |
+
Format:
|
| 1309 |
+
- Start with a concise summary paragraph.
|
| 1310 |
+
- Follow with detailed sections (Assessment, Supporting Evidence, Recommendations, Monitoring/Risks).
|
| 1311 |
+
- Conclude with a short bullet list of key takeaways.
|
| 1312 |
+
|
| 1313 |
+
The final answer should be at least 3 paragraphs when evidence exists. Do not mention this instruction."""
|
| 1314 |
+
|
| 1315 |
+
enhanced = await call_agent(
|
| 1316 |
+
user_prompt=enhancer_prompt,
|
| 1317 |
+
system_prompt="You are a diligent medical editor. Deliberate internally, then share only the finalized enhanced answer with structured sections.",
|
| 1318 |
+
model=GEMINI_MODEL,
|
| 1319 |
+
temperature=0.4
|
| 1320 |
+
)
|
| 1321 |
+
|
| 1322 |
+
return enhanced.strip() if enhanced else ""
|
| 1323 |
+
|
| 1324 |
+
def enhance_answer_with_gemini(
|
| 1325 |
+
query: str,
|
| 1326 |
+
initial_answer: str,
|
| 1327 |
+
rag_context: str = "",
|
| 1328 |
+
web_context: str = "",
|
| 1329 |
+
reasoning: dict = None,
|
| 1330 |
+
reflection: dict = None
|
| 1331 |
+
) -> str:
|
| 1332 |
+
"""Sync wrapper for the Gemini response enhancer."""
|
| 1333 |
+
if not MCP_AVAILABLE or not initial_answer:
|
| 1334 |
+
return ""
|
| 1335 |
+
|
| 1336 |
+
try:
|
| 1337 |
+
loop = asyncio.get_event_loop()
|
| 1338 |
+
except RuntimeError:
|
| 1339 |
+
loop = asyncio.new_event_loop()
|
| 1340 |
+
asyncio.set_event_loop(loop)
|
| 1341 |
+
|
| 1342 |
+
try:
|
| 1343 |
+
if loop.is_running():
|
| 1344 |
+
try:
|
| 1345 |
+
import nest_asyncio
|
| 1346 |
+
enhanced = nest_asyncio.run(
|
| 1347 |
+
enhance_answer_with_gemini_async(
|
| 1348 |
+
query=query,
|
| 1349 |
+
initial_answer=initial_answer,
|
| 1350 |
+
rag_context=rag_context,
|
| 1351 |
+
web_context=web_context,
|
| 1352 |
+
reasoning=reasoning,
|
| 1353 |
+
reflection=reflection
|
| 1354 |
+
)
|
| 1355 |
+
)
|
| 1356 |
+
return enhanced
|
| 1357 |
+
except Exception as e:
|
| 1358 |
+
logger.error(f"Error in nested async enhancement: {e}")
|
| 1359 |
+
return ""
|
| 1360 |
+
else:
|
| 1361 |
+
return loop.run_until_complete(
|
| 1362 |
+
enhance_answer_with_gemini_async(
|
| 1363 |
+
query=query,
|
| 1364 |
+
initial_answer=initial_answer,
|
| 1365 |
+
rag_context=rag_context,
|
| 1366 |
+
web_context=web_context,
|
| 1367 |
+
reasoning=reasoning,
|
| 1368 |
+
reflection=reflection
|
| 1369 |
+
)
|
| 1370 |
+
)
|
| 1371 |
+
except Exception as e:
|
| 1372 |
+
logger.error(f"Gemini MCP enhancement error: {e}")
|
| 1373 |
+
return ""
|
| 1374 |
+
|
| 1375 |
async def parse_document_gemini(file_path: str, file_extension: str) -> str:
|
| 1376 |
"""Parse document using Gemini MCP"""
|
| 1377 |
if not MCP_AVAILABLE:
|
|
|
|
| 1609 |
yield history + [{"role": "assistant", "content": "Session initialization failed. Please refresh the page."}]
|
| 1610 |
return
|
| 1611 |
|
| 1612 |
+
chat_start_time = time.time()
|
| 1613 |
+
|
| 1614 |
user_id = request.session_hash
|
| 1615 |
index_dir = f"./{user_id}_index"
|
| 1616 |
has_rag_index = os.path.exists(index_dir)
|
|
|
|
| 1846 |
yield updated_history
|
| 1847 |
|
| 1848 |
partial_response = ""
|
| 1849 |
+
reflection_data = None
|
| 1850 |
+
reflection_note = ""
|
| 1851 |
+
enhancement_note = ""
|
| 1852 |
try:
|
| 1853 |
for new_text in streamer:
|
| 1854 |
partial_response += new_text
|
|
|
|
| 1858 |
# ===== SELF-REFLECTION (Step 6) =====
|
| 1859 |
if not disable_agentic_reasoning and reasoning and reasoning.get("complexity") in ["complex", "multi_faceted"]:
|
| 1860 |
logger.info("🔍 Performing self-reflection on answer quality...")
|
| 1861 |
+
reflection_data = self_reflection(partial_response, message, reasoning)
|
| 1862 |
|
| 1863 |
+
if reflection_data and (reflection_data.get("overall_score", 10) < 7 or reflection_data.get("improvement_suggestions")):
|
| 1864 |
+
reflection_note = f"\n\n---\n**Self-Reflection** (Score: {reflection_data.get('overall_score', 'N/A')}/10)"
|
| 1865 |
+
if reflection_data.get("improvement_suggestions"):
|
| 1866 |
+
reflection_note += f"\n💡 Suggestions: {', '.join(reflection_data['improvement_suggestions'][:2])}"
|
|
|
|
|
|
|
|
|
|
| 1867 |
|
| 1868 |
# Add reasoning note if autonomous override occurred
|
| 1869 |
+
base_response = partial_response
|
| 1870 |
+
|
| 1871 |
+
remaining_time = 120 - (time.time() - chat_start_time)
|
| 1872 |
+
should_enhance = (
|
| 1873 |
+
not disable_agentic_reasoning and
|
| 1874 |
+
(
|
| 1875 |
+
final_use_rag or
|
| 1876 |
+
final_use_web_search or
|
| 1877 |
+
(reasoning is not None)
|
| 1878 |
+
)
|
| 1879 |
+
)
|
| 1880 |
+
if remaining_time <= 15:
|
| 1881 |
+
logger.warning("⏱️ Skipping Gemini enhancement to stay within max duration.")
|
| 1882 |
+
should_enhance = False
|
| 1883 |
+
|
| 1884 |
+
if should_enhance:
|
| 1885 |
+
logger.info("🧠 Launching Gemini MCP response enhancer for extended answer...")
|
| 1886 |
+
enhanced_text = enhance_answer_with_gemini(
|
| 1887 |
+
query=message,
|
| 1888 |
+
initial_answer=base_response,
|
| 1889 |
+
rag_context=rag_context,
|
| 1890 |
+
web_context=web_context,
|
| 1891 |
+
reasoning=reasoning,
|
| 1892 |
+
reflection=reflection_data
|
| 1893 |
+
)
|
| 1894 |
+
if enhanced_text:
|
| 1895 |
+
partial_response = enhanced_text
|
| 1896 |
+
enhancement_note = "\n\n_Gemini MCP multi-step reasoning enhancer applied._"
|
| 1897 |
+
|
| 1898 |
+
if reflection_data:
|
| 1899 |
+
partial_response += f"\n\n_Self-reflection score: {reflection_data.get('overall_score', 'N/A')}/10._"
|
| 1900 |
+
else:
|
| 1901 |
+
if reflection_note:
|
| 1902 |
+
partial_response = base_response + reflection_note
|
| 1903 |
+
else:
|
| 1904 |
+
partial_response = base_response
|
| 1905 |
+
else:
|
| 1906 |
+
partial_response = base_response + reflection_note if reflection_note else base_response
|
| 1907 |
+
|
| 1908 |
if reasoning_note:
|
| 1909 |
partial_response = reasoning_note + "\n\n" + partial_response
|
| 1910 |
+
|
| 1911 |
+
if enhancement_note:
|
| 1912 |
+
partial_response += enhancement_note
|
| 1913 |
|
| 1914 |
# Translate back if needed
|
| 1915 |
if needs_translation and partial_response:
|