Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Sleeping

App Files Files Community

LiamKhoaLe commited on Oct 8

Commit

410be5e

1 Parent(s): 2a31cee

Enhance UI

Browse files

Files changed (4) hide show

app.py +55 -4
llama_integration.py +122 -0
requirements.txt +3 -0
search.py +134 -0

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from sentence_transformers.util import cos_sim
 from memory import MemoryManager
 from translation import translate_query
 from vlm import process_medical_image
 # ✅ Enable Logging for Debugging
 import logging
@@ -221,7 +223,7 @@ class RAGMedicalChatbot:
         self.model_name = model_name
         self.retrieve = retrieve_function
-    def chat(self, user_id: str, user_query: str, lang: str = "EN", image_diagnosis: str = "") -> str:
         # 0. Translate query if not EN, this help our RAG system
         if lang.upper() in {"VI", "ZH"}:
             user_query = translate_query(user_query, lang.lower())
@@ -232,6 +234,24 @@ class RAGMedicalChatbot:
         knowledge_base = "\n".join(retrieved_info)
         ## b. Diagnosis RAG from symptom query
         diagnosis_guides = retrieve_diagnosis_from_symptoms(user_query)  # smart matcher
         # 2. Hybrid Context Retrieval: RAG + Recent History + Intelligent Selection
         contextual_chunks = memory.get_contextual_chunks(user_id, user_query, lang)
@@ -258,16 +278,46 @@ class RAGMedicalChatbot:
         # Symptom-Diagnosis prediction RAG
         if diagnosis_guides:
             parts.append("Symptom-based diagnosis guidance (if applicable):\n" + "\n".join(diagnosis_guides))
         parts.append(f"User's question: {user_query}")
         parts.append(f"Language to generate answer: {lang}")
         prompt = "\n\n".join(parts)
         logger.info(f"[LLM] Question query in `prompt`: {prompt}") # Debug out checking RAG on kb and history
         response = gemini_flash_completion(prompt, model=self.model_name, temperature=0.7)
          # Store exchange + chunking
         if user_id:
             memory.add_exchange(user_id, user_query, response, lang=lang)
         logger.info(f"[LLM] Response on `prompt`: {response.strip()}") # Debug out base response
         return response.strip()
 # ✅ Initialize Chatbot
 chatbot = RAGMedicalChatbot(model_name="gemini-2.5-flash", retrieve_function=retrieve_medical_info)
@@ -280,23 +330,24 @@ async def chat_endpoint(req: Request):
     query_raw = body.get("query")
     query = query_raw.strip() if isinstance(query_raw, str) else ""
     lang    = body.get("lang", "EN")
     image_base64 = body.get("image_base64", None)
     img_desc = body.get("img_desc", "Describe and investigate any clinical findings from this medical image.")
     start = time.time()
     image_diagnosis = ""
     # LLM Only
     if not image_base64:
-        logger.info("[BOT] LLM scenario.")
     # LLM+VLM
     else:
         # If image is present → diagnose first
         safe_load = len(image_base64.encode("utf-8"))
         if safe_load > 5_000_000: # Img size safe processor
             return JSONResponse({"response": "⚠️ Image too large. Please upload smaller images (<5MB)."})
-        logger.info("[BOT] VLM+LLM scenario.")
         logger.info(f"[VLM] Process medical image size: {safe_load}, desc: {img_desc}, {lang}.")
         image_diagnosis = process_medical_image(image_base64, img_desc, lang)
-    answer = chatbot.chat(user_id, query, lang, image_diagnosis)
     elapsed = time.time() - start
     # Final
     return JSONResponse({"response": f"{answer}\n\n(Response time: {elapsed:.2f}s)"})

 from memory import MemoryManager
 from translation import translate_query
 from vlm import process_medical_image
+from search import search_web
+from llama_integration import process_search_query
 # ✅ Enable Logging for Debugging
 import logging
         self.model_name = model_name
         self.retrieve = retrieve_function
+    def chat(self, user_id: str, user_query: str, lang: str = "EN", image_diagnosis: str = "", search_mode: bool = False) -> str:
         # 0. Translate query if not EN, this help our RAG system
         if lang.upper() in {"VI", "ZH"}:
             user_query = translate_query(user_query, lang.lower())
         knowledge_base = "\n".join(retrieved_info)
         ## b. Diagnosis RAG from symptom query
         diagnosis_guides = retrieve_diagnosis_from_symptoms(user_query)  # smart matcher
+        # 1.5. Search mode - web search and Llama processing
+        search_context = ""
+        url_mapping = {}
+        if search_mode:
+            logger.info("[SEARCH] Starting web search mode")
+            try:
+                # Search the web
+                search_results = search_web(user_query, num_results=5)
+                if search_results:
+                    # Process with Llama
+                    search_context, url_mapping = process_search_query(user_query, search_results)
+                    logger.info(f"[SEARCH] Found {len(search_results)} results, processed with Llama")
+                else:
+                    logger.warning("[SEARCH] No search results found")
+            except Exception as e:
+                logger.error(f"[SEARCH] Search failed: {e}")
+                search_context = ""
         # 2. Hybrid Context Retrieval: RAG + Recent History + Intelligent Selection
         contextual_chunks = memory.get_contextual_chunks(user_id, user_query, lang)
         # Symptom-Diagnosis prediction RAG
         if diagnosis_guides:
             parts.append("Symptom-based diagnosis guidance (if applicable):\n" + "\n".join(diagnosis_guides))
+        # 5. Search context with citation instructions
+        if search_context:
+            parts.append("Additional information from web search:\n" + search_context)
+            parts.append("IMPORTANT: When you use information from the web search results above, you MUST add a citation tag <#ID> immediately after the relevant content, where ID is the document number (1, 2, 3, etc.). For example: 'According to recent studies <#1>, this condition affects...'")
         parts.append(f"User's question: {user_query}")
         parts.append(f"Language to generate answer: {lang}")
         prompt = "\n\n".join(parts)
         logger.info(f"[LLM] Question query in `prompt`: {prompt}") # Debug out checking RAG on kb and history
         response = gemini_flash_completion(prompt, model=self.model_name, temperature=0.7)
+        # 6. Process citations and replace with URLs
+        if search_mode and url_mapping:
+            response = self._process_citations(response, url_mapping)
          # Store exchange + chunking
         if user_id:
             memory.add_exchange(user_id, user_query, response, lang=lang)
         logger.info(f"[LLM] Response on `prompt`: {response.strip()}") # Debug out base response
         return response.strip()
+    def _process_citations(self, response: str, url_mapping: Dict[int, str]) -> str:
+        """Replace citation tags with actual URLs"""
+        import re
+        # Find all citation tags like <#1>, <#2>, etc.
+        citation_pattern = r'<#(\d+)>'
+        def replace_citation(match):
+            doc_id = int(match.group(1))
+            if doc_id in url_mapping:
+                return f'<{url_mapping[doc_id]}>'
+            return match.group(0)  # Keep original if URL not found
+        # Replace citations with URLs
+        processed_response = re.sub(citation_pattern, replace_citation, response)
+        logger.info(f"[CITATION] Processed citations, found {len(re.findall(citation_pattern, response))} citations")
+        return processed_response
 # ✅ Initialize Chatbot
 chatbot = RAGMedicalChatbot(model_name="gemini-2.5-flash", retrieve_function=retrieve_medical_info)
     query_raw = body.get("query")
     query = query_raw.strip() if isinstance(query_raw, str) else ""
     lang    = body.get("lang", "EN")
+    search_mode = body.get("search", False)
     image_base64 = body.get("image_base64", None)
     img_desc = body.get("img_desc", "Describe and investigate any clinical findings from this medical image.")
     start = time.time()
     image_diagnosis = ""
     # LLM Only
     if not image_base64:
+        logger.info(f"[BOT] LLM scenario. Search mode: {search_mode}")
     # LLM+VLM
     else:
         # If image is present → diagnose first
         safe_load = len(image_base64.encode("utf-8"))
         if safe_load > 5_000_000: # Img size safe processor
             return JSONResponse({"response": "⚠️ Image too large. Please upload smaller images (<5MB)."})
+        logger.info(f"[BOT] VLM+LLM scenario. Search mode: {search_mode}")
         logger.info(f"[VLM] Process medical image size: {safe_load}, desc: {img_desc}, {lang}.")
         image_diagnosis = process_medical_image(image_base64, img_desc, lang)
+    answer = chatbot.chat(user_id, query, lang, image_diagnosis, search_mode)
     elapsed = time.time() - start
     # Final
     return JSONResponse({"response": f"{answer}\n\n(Response time: {elapsed:.2f}s)"})

llama_integration.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os
+import requests
+import json
+import logging
+from typing import List, Dict, Tuple
+logger = logging.getLogger(__name__)
+class NVIDIALLamaClient:
+    def __init__(self):
+        self.api_key = os.getenv("NVIDIA_URI")
+        if not self.api_key:
+            raise ValueError("NVIDIA_URI environment variable not set")
+        self.base_url = "https://api.nvcf.nvidia.com/v2/nvcf/chat/completions"
+        self.model = "meta/llama-3.1-8b-instruct"
+    def generate_keywords(self, user_query: str) -> List[str]:
+        """Use Llama to generate search keywords from user query"""
+        try:
+            prompt = f"""Given this medical question: "{user_query}"
+Generate 3-5 specific search keywords that would help find relevant medical information online.
+Focus on medical terms, symptoms, conditions, treatments, or procedures mentioned.
+Return only the keywords separated by commas, no explanations.
+Keywords:"""
+            response = self._call_llama(prompt)
+            # Extract keywords from response
+            keywords = [kw.strip() for kw in response.split(',') if kw.strip()]
+            logger.info(f"Generated keywords: {keywords}")
+            return keywords[:5]  # Limit to 5 keywords
+        except Exception as e:
+            logger.error(f"Failed to generate keywords: {e}")
+            return [user_query]  # Fallback to original query
+    def summarize_documents(self, documents: List[Dict], user_query: str) -> Tuple[str, Dict[int, str]]:
+        """Use Llama to summarize documents and return summary with URL mapping"""
+        try:
+            # Create document summaries
+            doc_summaries = []
+            url_mapping = {}
+            for doc in documents:
+                doc_id = doc['id']
+                url_mapping[doc_id] = doc['url']
+                # Create a summary prompt for each document
+                summary_prompt = f"""Summarize this medical information in 2-3 sentences, focusing on details relevant to: "{user_query}"
+Document: {doc['title']}
+Content: {doc['content'][:1000]}...
+Summary:"""
+                summary = self._call_llama(summary_prompt)
+                doc_summaries.append(f"Document {doc_id}: {summary}")
+            # Combine all summaries
+            combined_summary = "\n\n".join(doc_summaries)
+            return combined_summary, url_mapping
+        except Exception as e:
+            logger.error(f"Failed to summarize documents: {e}")
+            return "", {}
+    def _call_llama(self, prompt: str) -> str:
+        """Make API call to NVIDIA Llama model"""
+        try:
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ],
+                "temperature": 0.7,
+                "max_tokens": 1000
+            }
+            response = requests.post(
+                self.base_url,
+                headers=headers,
+                json=payload,
+                timeout=30
+            )
+            response.raise_for_status()
+            result = response.json()
+            return result['choices'][0]['message']['content'].strip()
+        except Exception as e:
+            logger.error(f"Llama API call failed: {e}")
+            raise
+def process_search_query(user_query: str, search_results: List[Dict]) -> Tuple[str, Dict[int, str]]:
+    """Process search results using Llama model"""
+    try:
+        llama_client = NVIDIALLamaClient()
+        # Generate search keywords
+        keywords = llama_client.generate_keywords(user_query)
+        # Summarize documents
+        summary, url_mapping = llama_client.summarize_documents(search_results, user_query)
+        return summary, url_mapping
+    except Exception as e:
+        logger.error(f"Failed to process search query: {e}")
+        return "", {}

requirements.txt CHANGED Viewed

@@ -21,3 +21,6 @@ uvicorn
 fastapi
 torch               # Reduce model load with half-precision (float16) to reduce RAM usage
 psutil              # CPU/RAM logger

 fastapi
 torch               # Reduce model load with half-precision (float16) to reduce RAM usage
 psutil              # CPU/RAM logger
+# **Web Search**
+requests
+beautifulsoup4

search.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import requests
+from bs4 import BeautifulSoup
+import re
+from urllib.parse import urljoin, urlparse
+import time
+import logging
+from typing import List, Dict, Tuple
+import os
+logger = logging.getLogger(__name__)
+class WebSearcher:
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        })
+        self.max_results = 10
+        self.timeout = 10
+    def search_google(self, query: str, num_results: int = 10) -> List[Dict]:
+        """Search Google and return results with URLs and titles"""
+        try:
+            # Use DuckDuckGo as it's more reliable for scraping
+            return self.search_duckduckgo(query, num_results)
+        except Exception as e:
+            logger.error(f"Google search failed: {e}")
+            return []
+    def search_duckduckgo(self, query: str, num_results: int = 10) -> List[Dict]:
+        """Search DuckDuckGo and return results"""
+        try:
+            url = "https://html.duckduckgo.com/html/"
+            params = {
+                'q': query,
+                'kl': 'us-en'
+            }
+            response = self.session.get(url, params=params, timeout=self.timeout)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            results = []
+            # Find result links
+            result_links = soup.find_all('a', class_='result__a')
+            for link in result_links[:num_results]:
+                try:
+                    href = link.get('href')
+                    if href and href.startswith('http'):
+                        title = link.get_text(strip=True)
+                        if title and href:
+                            results.append({
+                                'url': href,
+                                'title': title,
+                                'content': ''  # Will be filled later
+                            })
+                except Exception as e:
+                    logger.warning(f"Error parsing result: {e}")
+                    continue
+            return results
+        except Exception as e:
+            logger.error(f"DuckDuckGo search failed: {e}")
+            return []
+    def extract_content(self, url: str) -> str:
+        """Extract text content from a webpage"""
+        try:
+            response = self.session.get(url, timeout=self.timeout)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            # Remove script and style elements
+            for script in soup(["script", "style"]):
+                script.decompose()
+            # Get text content
+            text = soup.get_text()
+            # Clean up text
+            lines = (line.strip() for line in text.splitlines())
+            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+            text = ' '.join(chunk for chunk in chunks if chunk)
+            # Limit content length
+            if len(text) > 2000:
+                text = text[:2000] + "..."
+            return text
+        except Exception as e:
+            logger.warning(f"Failed to extract content from {url}: {e}")
+            return ""
+    def search_and_extract(self, query: str, num_results: int = 5) -> List[Dict]:
+        """Search for query and extract content from top results"""
+        logger.info(f"Searching for: {query}")
+        # Get search results
+        search_results = self.search_duckduckgo(query, num_results)
+        # Extract content from each result
+        enriched_results = []
+        for i, result in enumerate(search_results):
+            try:
+                logger.info(f"Extracting content from {result['url']}")
+                content = self.extract_content(result['url'])
+                if content:
+                    enriched_results.append({
+                        'id': i + 1,
+                        'url': result['url'],
+                        'title': result['title'],
+                        'content': content
+                    })
+                # Add delay to be respectful
+                time.sleep(1)
+            except Exception as e:
+                logger.warning(f"Failed to process {result['url']}: {e}")
+                continue
+        logger.info(f"Successfully processed {len(enriched_results)} results")
+        return enriched_results
+def search_web(query: str, num_results: int = 5) -> List[Dict]:
+    """Main function to search the web and return enriched results"""
+    searcher = WebSearcher()
+    return searcher.search_and_extract(query, num_results)