Spaces:
Running
on
Zero
Running
on
Zero
Y Phung Nguyen
commited on
Commit
·
6ae14bf
1
Parent(s):
5d5697b
Fix indent
Browse files
ui.py
CHANGED
|
@@ -645,9 +645,9 @@ def create_demo():
|
|
| 645 |
|
| 646 |
# If request is None, create a mock request for compatibility
|
| 647 |
if request is None:
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
|
| 652 |
# Let stream_chat handle model loading (it's GPU-decorated and can load on-demand)
|
| 653 |
for result in stream_chat(
|
|
@@ -661,32 +661,32 @@ def create_demo():
|
|
| 661 |
return
|
| 662 |
|
| 663 |
except Exception as e:
|
| 664 |
-
|
| 665 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
|
| 667 |
-
if is_gpu_error
|
| 668 |
-
|
| 669 |
-
logger.warning(f"[STREAM_CHAT] GPU task aborted (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
|
| 670 |
-
# Yield a message to user about retry
|
| 671 |
-
retry_msg = f"⏳ GPU task was interrupted. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})"
|
| 672 |
-
updated_history = history + [{"role": "assistant", "content": retry_msg}]
|
| 673 |
-
yield updated_history, ""
|
| 674 |
-
time.sleep(delay)
|
| 675 |
-
continue
|
| 676 |
else:
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
if is_gpu_error:
|
| 683 |
-
error_msg = f"⚠️ GPU task was aborted. This can happen if:\n- The request took too long\n- Multiple GPU requests conflicted\n- GPU quota was exceeded\n\nPlease try again or select a different model."
|
| 684 |
-
else:
|
| 685 |
-
error_msg = f"⚠️ An error occurred: {str(e)[:200]}"
|
| 686 |
-
|
| 687 |
-
updated_history = history + [{"role": "assistant", "content": error_msg}]
|
| 688 |
-
yield updated_history, ""
|
| 689 |
-
return
|
| 690 |
|
| 691 |
submit_button.click(
|
| 692 |
fn=stream_chat_with_model_check,
|
|
|
|
| 645 |
|
| 646 |
# If request is None, create a mock request for compatibility
|
| 647 |
if request is None:
|
| 648 |
+
class MockRequest:
|
| 649 |
+
session_hash = "anonymous"
|
| 650 |
+
request = MockRequest()
|
| 651 |
|
| 652 |
# Let stream_chat handle model loading (it's GPU-decorated and can load on-demand)
|
| 653 |
for result in stream_chat(
|
|
|
|
| 661 |
return
|
| 662 |
|
| 663 |
except Exception as e:
|
| 664 |
+
error_msg_lower = str(e).lower()
|
| 665 |
+
is_gpu_error = 'gpu task aborted' in error_msg_lower or 'gpu' in error_msg_lower or 'zerogpu' in error_msg_lower
|
| 666 |
+
|
| 667 |
+
if is_gpu_error and attempt < max_retries - 1:
|
| 668 |
+
delay = base_delay * (2 ** attempt) # Exponential backoff: 2s, 4s
|
| 669 |
+
logger.warning(f"[STREAM_CHAT] GPU task aborted (attempt {attempt + 1}/{max_retries}), retrying after {delay}s...")
|
| 670 |
+
# Yield a message to user about retry
|
| 671 |
+
retry_msg = f"⏳ GPU task was interrupted. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})"
|
| 672 |
+
updated_history = history + [{"role": "assistant", "content": retry_msg}]
|
| 673 |
+
yield updated_history, ""
|
| 674 |
+
time.sleep(delay)
|
| 675 |
+
continue
|
| 676 |
+
else:
|
| 677 |
+
# Final error handling
|
| 678 |
+
logger.error(f"[STREAM_CHAT] Error in stream_chat_with_model_check: {e}")
|
| 679 |
+
import traceback
|
| 680 |
+
logger.error(f"[STREAM_CHAT] Full traceback: {traceback.format_exc()}")
|
| 681 |
|
| 682 |
+
if is_gpu_error:
|
| 683 |
+
error_msg = f"⚠️ GPU task was aborted. This can happen if:\n- The request took too long\n- Multiple GPU requests conflicted\n- GPU quota was exceeded\n\nPlease try again or select a different model."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
else:
|
| 685 |
+
error_msg = f"⚠️ An error occurred: {str(e)[:200]}"
|
| 686 |
+
|
| 687 |
+
updated_history = history + [{"role": "assistant", "content": error_msg}]
|
| 688 |
+
yield updated_history, ""
|
| 689 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
|
| 691 |
submit_button.click(
|
| 692 |
fn=stream_chat_with_model_check,
|