github-actions[bot]
Deploy demo from GitHub Actions - 2025-12-24 02:23:20
6cdce85
import { NextResponse } from 'next/server';
export interface RunPodHealth {
jobs: {
completed: number;
failed: number;
inProgress: number;
inQueue: number;
retried: number;
};
workers: {
idle: number;
initializing: number;
running: number;
throttled: number;
};
}
export interface StatusResponse {
status: 'ready' | 'cold_start' | 'initializing' | 'processing' | 'unavailable';
message: string;
workers: {
idle: number;
running: number;
initializing: number;
};
queue: {
inProgress: number;
inQueue: number;
};
estimatedWait?: number; // seconds
}
/**
* Check RunPod endpoint health to provide user feedback during cold starts
*/
export async function GET(): Promise<NextResponse<StatusResponse>> {
const baseUrl = process.env.DEMO_MODEL_URL || 'http://localhost:8000/v1';
const apiKey = process.env.DEMO_API_KEY || '';
// Extract RunPod endpoint URL from the vLLM base URL
// vLLM URL format: https://api.runpod.ai/v2/{endpoint_id}/openai/v1
// Health URL format: https://api.runpod.ai/v2/{endpoint_id}/health
const runpodMatch = baseUrl.match(/https:\/\/api\.runpod\.ai\/v2\/([^/]+)/);
if (!runpodMatch) {
// Not a RunPod endpoint, assume it's always ready (local/other provider)
return NextResponse.json({
status: 'ready',
message: 'Model server ready',
workers: { idle: 1, running: 0, initializing: 0 },
queue: { inProgress: 0, inQueue: 0 },
});
}
const endpointId = runpodMatch[1];
const healthUrl = `https://api.runpod.ai/v2/${endpointId}/health`;
try {
const response = await fetch(healthUrl, {
method: 'GET',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
// Short timeout for health check
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
return NextResponse.json({
status: 'unavailable',
message: 'Unable to check model status',
workers: { idle: 0, running: 0, initializing: 0 },
queue: { inProgress: 0, inQueue: 0 },
});
}
const health: RunPodHealth = await response.json();
const totalWorkers = health.workers.idle + health.workers.running + (health.workers.initializing || 0);
const hasActiveWorkers = totalWorkers > 0;
const hasIdleWorkers = health.workers.idle > 0;
const isInitializing = (health.workers.initializing || 0) > 0;
const hasQueuedJobs = health.jobs.inQueue > 0;
const hasRunningJobs = health.jobs.inProgress > 0;
let status: StatusResponse['status'];
let message: string;
let estimatedWait: number | undefined;
if (hasIdleWorkers) {
status = 'ready';
message = 'Model ready';
} else if (isInitializing) {
status = 'initializing';
message = 'Model loading...';
estimatedWait = 30; // Typical vLLM model load time
} else if (health.workers.running > 0) {
status = 'processing';
message = hasQueuedJobs
? `Processing (${health.jobs.inQueue} in queue)`
: 'Processing request...';
estimatedWait = hasQueuedJobs ? health.jobs.inQueue * 15 : undefined;
} else if (!hasActiveWorkers && (hasQueuedJobs || hasRunningJobs)) {
status = 'cold_start';
message = 'Starting worker...';
estimatedWait = 45; // Cold start + model load
} else if (!hasActiveWorkers) {
status = 'cold_start';
message = 'Workers scaled to zero, will start on request';
estimatedWait = 45;
} else {
status = 'ready';
message = 'Model ready';
}
return NextResponse.json({
status,
message,
workers: {
idle: health.workers.idle,
running: health.workers.running,
initializing: health.workers.initializing || 0,
},
queue: {
inProgress: health.jobs.inProgress,
inQueue: health.jobs.inQueue,
},
estimatedWait,
});
} catch (error) {
console.error('Health check error:', error);
// Network error might indicate cold start
return NextResponse.json({
status: 'cold_start',
message: 'Connecting to model server...',
workers: { idle: 0, running: 0, initializing: 0 },
queue: { inProgress: 0, inQueue: 0 },
estimatedWait: 45,
});
}
}