LiamKhoaLe commited on
Commit
dadfb77
·
1 Parent(s): d0e0256

Enhance multi-step reasoning, challengers and MAC

Browse files
Files changed (2) hide show
  1. README.md +1 -26
  2. app.py +184 -9
README.md CHANGED
@@ -72,7 +72,6 @@ tags:
72
  - Enable/disable Document RAG
73
  - Enable/disable Web Search (MCP)
74
  - Select medical model (MedSwin SFT/KD/TA)
75
- - Toggle agentic reasoning on/off for MedSwin-only responses
76
  3. **Ask Questions**: Type your medical question in any language
77
  4. **Get Answers**: Receive comprehensive answers based on:
78
  - Your uploaded documents (if RAG enabled)
@@ -103,33 +102,9 @@ See `requirements.txt` for full dependency list. Key dependencies:
103
  - **Utilities**: `langdetect`, `gradio`, `spaces`, `soundfile`
104
  - **TTS**: Optional - `TTS` package (voice features work with MCP fallback if unavailable)
105
 
106
- ### 🔑 Access Tokens
107
-
108
- - **Hugging Face**: Set `HF_TOKEN` so Transformers can download the MedSwin checkpoints. Generate a read-only token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) and export it:
109
- ```bash
110
- export HF_TOKEN="hf_your_token"
111
- ```
112
- - **Gemini**: `GEMINI_API_KEY` remains required for MCP translation, parsing, transcription, and summarization (see MCP configuration below).
113
-
114
  ### 🔌 MCP Configuration
115
 
116
- The application uses a bundled Gemini MCP server (agent.py) for translation, document parsing, transcription, and summarization. Configure via environment variables:
117
-
118
- ```bash
119
- # Required: Gemini API Key
120
- export GEMINI_API_KEY="your-gemini-api-key"
121
-
122
- # Optional: Gemini MCP Server Configuration (defaults to bundled agent.py)
123
- export MCP_SERVER_COMMAND="python"
124
- export MCP_SERVER_ARGS="/path/to/agent.py" # Default: bundled agent.py
125
-
126
- # Optional: Gemini Model Configuration
127
- export GEMINI_MODEL="gemini-2.5-flash" # For complex tasks (default)
128
- export GEMINI_MODEL_LITE="gemini-2.5-flash-lite" # For simple tasks (default)
129
- export GEMINI_TIMEOUT=300000 # Request timeout in milliseconds (default: 5 minutes)
130
- export GEMINI_MAX_OUTPUT_TOKENS=8192 # Maximum output tokens (default)
131
- export GEMINI_TEMPERATURE=0.2 # Temperature for generation 0-2 (default: 0.2)
132
- ```
133
 
134
  **Setup Steps:**
135
 
 
72
  - Enable/disable Document RAG
73
  - Enable/disable Web Search (MCP)
74
  - Select medical model (MedSwin SFT/KD/TA)
 
75
  3. **Ask Questions**: Type your medical question in any language
76
  4. **Get Answers**: Receive comprehensive answers based on:
77
  - Your uploaded documents (if RAG enabled)
 
102
  - **Utilities**: `langdetect`, `gradio`, `spaces`, `soundfile`
103
  - **TTS**: Optional - `TTS` package (voice features work with MCP fallback if unavailable)
104
 
 
 
 
 
 
 
 
 
105
  ### 🔌 MCP Configuration
106
 
107
+ The application uses a bundled Gemini MCP server (agent.py) for translation, document parsing, transcription, and summarization. Configure via environment variables
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  **Setup Steps:**
110
 
app.py CHANGED
@@ -1240,6 +1240,138 @@ def self_reflection(answer: str, query: str, reasoning: dict) -> dict:
1240
 
1241
  return {"overall_score": 7, "improvement_suggestions": []}
1242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1243
  async def parse_document_gemini(file_path: str, file_extension: str) -> str:
1244
  """Parse document using Gemini MCP"""
1245
  if not MCP_AVAILABLE:
@@ -1477,6 +1609,8 @@ def stream_chat(
1477
  yield history + [{"role": "assistant", "content": "Session initialization failed. Please refresh the page."}]
1478
  return
1479
 
 
 
1480
  user_id = request.session_hash
1481
  index_dir = f"./{user_id}_index"
1482
  has_rag_index = os.path.exists(index_dir)
@@ -1712,6 +1846,9 @@ def stream_chat(
1712
  yield updated_history
1713
 
1714
  partial_response = ""
 
 
 
1715
  try:
1716
  for new_text in streamer:
1717
  partial_response += new_text
@@ -1721,20 +1858,58 @@ def stream_chat(
1721
  # ===== SELF-REFLECTION (Step 6) =====
1722
  if not disable_agentic_reasoning and reasoning and reasoning.get("complexity") in ["complex", "multi_faceted"]:
1723
  logger.info("🔍 Performing self-reflection on answer quality...")
1724
- reflection = self_reflection(partial_response, message, reasoning)
1725
 
1726
- # Add reflection note if score is low or improvements suggested
1727
- if reflection.get("overall_score", 10) < 7 or reflection.get("improvement_suggestions"):
1728
- reflection_note = f"\n\n---\n**Self-Reflection** (Score: {reflection.get('overall_score', 'N/A')}/10)"
1729
- if reflection.get("improvement_suggestions"):
1730
- reflection_note += f"\n💡 Suggestions: {', '.join(reflection['improvement_suggestions'][:2])}"
1731
- partial_response += reflection_note
1732
- updated_history[-1]["content"] = partial_response
1733
 
1734
  # Add reasoning note if autonomous override occurred
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1735
  if reasoning_note:
1736
  partial_response = reasoning_note + "\n\n" + partial_response
1737
- updated_history[-1]["content"] = partial_response
 
 
1738
 
1739
  # Translate back if needed
1740
  if needs_translation and partial_response:
 
1240
 
1241
  return {"overall_score": 7, "improvement_suggestions": []}
1242
 
1243
+ def _truncate_text(text: str, max_chars: int = 4000) -> str:
1244
+ """Utility to keep prompts within model limits."""
1245
+ if not text:
1246
+ return ""
1247
+ text = text.strip()
1248
+ if len(text) <= max_chars:
1249
+ return text
1250
+ return text[:max_chars] + "... (truncated)"
1251
+
1252
+ async def enhance_answer_with_gemini_async(
1253
+ query: str,
1254
+ initial_answer: str,
1255
+ rag_context: str = "",
1256
+ web_context: str = "",
1257
+ reasoning: dict = None,
1258
+ reflection: dict = None
1259
+ ) -> str:
1260
+ """Use Gemini MCP to expand and validate the medical answer with multi-step reasoning."""
1261
+ if not MCP_AVAILABLE:
1262
+ return ""
1263
+
1264
+ evidence_sections = []
1265
+ if rag_context:
1266
+ evidence_sections.append(f"Document Evidence:\n{_truncate_text(rag_context, 3500)}")
1267
+ if web_context:
1268
+ evidence_sections.append(f"Web Evidence:\n{_truncate_text(web_context, 3500)}")
1269
+ evidence_block = "\n\n".join(evidence_sections) if evidence_sections else "No external evidence provided."
1270
+
1271
+ reasoning_summary = ""
1272
+ if reasoning:
1273
+ reasoning_summary = json.dumps({
1274
+ "query_type": reasoning.get("query_type"),
1275
+ "complexity": reasoning.get("complexity"),
1276
+ "sub_questions": reasoning.get("sub_questions", [])
1277
+ }, ensure_ascii=False)
1278
+
1279
+ reflection_summary = ""
1280
+ if reflection:
1281
+ reflection_summary = json.dumps(reflection, ensure_ascii=False)
1282
+
1283
+ enhancer_prompt = f"""You are the Gemini MCP Response Enhancer working with a medical specialist model that produced a short draft answer.
1284
+ Task: Reason through the evidence, validate every claim, challenge gaps, and then produce a substantially more detailed final answer.
1285
+
1286
+ Query:
1287
+ {query}
1288
+
1289
+ Initial Draft Answer:
1290
+ {initial_answer}
1291
+
1292
+ Reasoning Summary (optional):
1293
+ {reasoning_summary or "None"}
1294
+
1295
+ Self-Reflection Feedback (optional):
1296
+ {reflection_summary or "None"}
1297
+
1298
+ Evidence You Can Rely On:
1299
+ {evidence_block}
1300
+
1301
+ Process:
1302
+ 1. Think step-by-step about what the query truly needs and whether the draft covers it.
1303
+ 2. Identify inaccuracies, missing context, or shallow explanations using the evidence.
1304
+ 3. Produce an enhanced answer that is longer, clinically thorough, and clearly structured.
1305
+ 4. When citing, refer to Document Evidence or Web Evidence explicitly (e.g., [Document], [Web]).
1306
+ 5. Keep internal reasoning private; only share the final enhanced answer.
1307
+
1308
+ Format:
1309
+ - Start with a concise summary paragraph.
1310
+ - Follow with detailed sections (Assessment, Supporting Evidence, Recommendations, Monitoring/Risks).
1311
+ - Conclude with a short bullet list of key takeaways.
1312
+
1313
+ The final answer should be at least 3 paragraphs when evidence exists. Do not mention this instruction."""
1314
+
1315
+ enhanced = await call_agent(
1316
+ user_prompt=enhancer_prompt,
1317
+ system_prompt="You are a diligent medical editor. Deliberate internally, then share only the finalized enhanced answer with structured sections.",
1318
+ model=GEMINI_MODEL,
1319
+ temperature=0.4
1320
+ )
1321
+
1322
+ return enhanced.strip() if enhanced else ""
1323
+
1324
+ def enhance_answer_with_gemini(
1325
+ query: str,
1326
+ initial_answer: str,
1327
+ rag_context: str = "",
1328
+ web_context: str = "",
1329
+ reasoning: dict = None,
1330
+ reflection: dict = None
1331
+ ) -> str:
1332
+ """Sync wrapper for the Gemini response enhancer."""
1333
+ if not MCP_AVAILABLE or not initial_answer:
1334
+ return ""
1335
+
1336
+ try:
1337
+ loop = asyncio.get_event_loop()
1338
+ except RuntimeError:
1339
+ loop = asyncio.new_event_loop()
1340
+ asyncio.set_event_loop(loop)
1341
+
1342
+ try:
1343
+ if loop.is_running():
1344
+ try:
1345
+ import nest_asyncio
1346
+ enhanced = nest_asyncio.run(
1347
+ enhance_answer_with_gemini_async(
1348
+ query=query,
1349
+ initial_answer=initial_answer,
1350
+ rag_context=rag_context,
1351
+ web_context=web_context,
1352
+ reasoning=reasoning,
1353
+ reflection=reflection
1354
+ )
1355
+ )
1356
+ return enhanced
1357
+ except Exception as e:
1358
+ logger.error(f"Error in nested async enhancement: {e}")
1359
+ return ""
1360
+ else:
1361
+ return loop.run_until_complete(
1362
+ enhance_answer_with_gemini_async(
1363
+ query=query,
1364
+ initial_answer=initial_answer,
1365
+ rag_context=rag_context,
1366
+ web_context=web_context,
1367
+ reasoning=reasoning,
1368
+ reflection=reflection
1369
+ )
1370
+ )
1371
+ except Exception as e:
1372
+ logger.error(f"Gemini MCP enhancement error: {e}")
1373
+ return ""
1374
+
1375
  async def parse_document_gemini(file_path: str, file_extension: str) -> str:
1376
  """Parse document using Gemini MCP"""
1377
  if not MCP_AVAILABLE:
 
1609
  yield history + [{"role": "assistant", "content": "Session initialization failed. Please refresh the page."}]
1610
  return
1611
 
1612
+ chat_start_time = time.time()
1613
+
1614
  user_id = request.session_hash
1615
  index_dir = f"./{user_id}_index"
1616
  has_rag_index = os.path.exists(index_dir)
 
1846
  yield updated_history
1847
 
1848
  partial_response = ""
1849
+ reflection_data = None
1850
+ reflection_note = ""
1851
+ enhancement_note = ""
1852
  try:
1853
  for new_text in streamer:
1854
  partial_response += new_text
 
1858
  # ===== SELF-REFLECTION (Step 6) =====
1859
  if not disable_agentic_reasoning and reasoning and reasoning.get("complexity") in ["complex", "multi_faceted"]:
1860
  logger.info("🔍 Performing self-reflection on answer quality...")
1861
+ reflection_data = self_reflection(partial_response, message, reasoning)
1862
 
1863
+ if reflection_data and (reflection_data.get("overall_score", 10) < 7 or reflection_data.get("improvement_suggestions")):
1864
+ reflection_note = f"\n\n---\n**Self-Reflection** (Score: {reflection_data.get('overall_score', 'N/A')}/10)"
1865
+ if reflection_data.get("improvement_suggestions"):
1866
+ reflection_note += f"\n💡 Suggestions: {', '.join(reflection_data['improvement_suggestions'][:2])}"
 
 
 
1867
 
1868
  # Add reasoning note if autonomous override occurred
1869
+ base_response = partial_response
1870
+
1871
+ remaining_time = 120 - (time.time() - chat_start_time)
1872
+ should_enhance = (
1873
+ not disable_agentic_reasoning and
1874
+ (
1875
+ final_use_rag or
1876
+ final_use_web_search or
1877
+ (reasoning is not None)
1878
+ )
1879
+ )
1880
+ if remaining_time <= 15:
1881
+ logger.warning("⏱️ Skipping Gemini enhancement to stay within max duration.")
1882
+ should_enhance = False
1883
+
1884
+ if should_enhance:
1885
+ logger.info("🧠 Launching Gemini MCP response enhancer for extended answer...")
1886
+ enhanced_text = enhance_answer_with_gemini(
1887
+ query=message,
1888
+ initial_answer=base_response,
1889
+ rag_context=rag_context,
1890
+ web_context=web_context,
1891
+ reasoning=reasoning,
1892
+ reflection=reflection_data
1893
+ )
1894
+ if enhanced_text:
1895
+ partial_response = enhanced_text
1896
+ enhancement_note = "\n\n_Gemini MCP multi-step reasoning enhancer applied._"
1897
+
1898
+ if reflection_data:
1899
+ partial_response += f"\n\n_Self-reflection score: {reflection_data.get('overall_score', 'N/A')}/10._"
1900
+ else:
1901
+ if reflection_note:
1902
+ partial_response = base_response + reflection_note
1903
+ else:
1904
+ partial_response = base_response
1905
+ else:
1906
+ partial_response = base_response + reflection_note if reflection_note else base_response
1907
+
1908
  if reasoning_note:
1909
  partial_response = reasoning_note + "\n\n" + partial_response
1910
+
1911
+ if enhancement_note:
1912
+ partial_response += enhancement_note
1913
 
1914
  # Translate back if needed
1915
  if needs_translation and partial_response: