Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 26 days ago

Commit

6ae14bf

1 Parent(s): 5d5697b

Fix indent

Browse files

Files changed (1) hide show

ui.py +27 -27

ui.py CHANGED Viewed

@@ -645,9 +645,9 @@ def create_demo():
                             # If request is None, create a mock request for compatibility
                             if request is None:
-                            class MockRequest:
-                                session_hash = "anonymous"
-                            request = MockRequest()
                             # Let stream_chat handle model loading (it's GPU-decorated and can load on-demand)
                         for result in stream_chat(
@@ -661,32 +661,32 @@ def create_demo():
                             return
                     except Exception as e:
-                            error_msg_lower = str(e).lower()
-                            is_gpu_error = 'gpu task aborted' in error_msg_lower or 'gpu' in error_msg_lower or 'zerogpu' in error_msg_lower
-                            if is_gpu_error and attempt < max_retries - 1:
-                                delay = base_delay * (2 ** attempt)  # Exponential backoff: 2s, 4s
-                                logger.warning(f"[STREAM_CHAT] GPU task aborted (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
-                                # Yield a message to user about retry
-                                retry_msg = f"⏳ GPU task was interrupted. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})"
-                                updated_history = history + [{"role": "assistant", "content": retry_msg}]
-                                yield updated_history, ""
-                                time.sleep(delay)
-                                continue
                             else:
-                                # Final error handling
-                                logger.error(f"[STREAM_CHAT] Error in stream_chat_with_model_check: {e}")
-                                import traceback
-                                logger.error(f"[STREAM_CHAT] Full traceback: {traceback.format_exc()}")
-                                if is_gpu_error:
-                                    error_msg = f"⚠️ GPU task was aborted. This can happen if:\n- The request took too long\n- Multiple GPU requests conflicted\n- GPU quota was exceeded\n\nPlease try again or select a different model."
-                                else:
-                        error_msg = f"⚠️ An error occurred: {str(e)[:200]}"
-                        updated_history = history + [{"role": "assistant", "content": error_msg}]
-                        yield updated_history, ""
-                                return
                 submit_button.click(
                     fn=stream_chat_with_model_check,

                             # If request is None, create a mock request for compatibility
                             if request is None:
+                                class MockRequest:
+                                    session_hash = "anonymous"
+                                request = MockRequest()
                             # Let stream_chat handle model loading (it's GPU-decorated and can load on-demand)
                         for result in stream_chat(
                             return
                     except Exception as e:
+                        error_msg_lower = str(e).lower()
+                        is_gpu_error = 'gpu task aborted' in error_msg_lower or 'gpu' in error_msg_lower or 'zerogpu' in error_msg_lower
+                        if is_gpu_error and attempt < max_retries - 1:
+                            delay = base_delay * (2 ** attempt)  # Exponential backoff: 2s, 4s
+                            logger.warning(f"[STREAM_CHAT] GPU task aborted (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
+                            # Yield a message to user about retry
+                            retry_msg = f"⏳ GPU task was interrupted. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})"
+                            updated_history = history + [{"role": "assistant", "content": retry_msg}]
+                            yield updated_history, ""
+                            time.sleep(delay)
+                            continue
+                        else:
+                            # Final error handling
+                            logger.error(f"[STREAM_CHAT] Error in stream_chat_with_model_check: {e}")
+                            import traceback
+                            logger.error(f"[STREAM_CHAT] Full traceback: {traceback.format_exc()}")
+                            if is_gpu_error:
+                                error_msg = f"⚠️ GPU task was aborted. This can happen if:\n- The request took too long\n- Multiple GPU requests conflicted\n- GPU quota was exceeded\n\nPlease try again or select a different model."
                             else:
+                                error_msg = f"⚠️ An error occurred: {str(e)[:200]}"
+                    updated_history = history + [{"role": "assistant", "content": error_msg}]
+                    yield updated_history, ""
+                        return
                 submit_button.click(
                     fn=stream_chat_with_model_check,