// Test dynamic max_tokens calculation functions // Mock the functions (copied from openrouter.ts) function calculateSafeMaxTokens(contextLength, estimatedInputTokens, maxCompletionTokens) { // Leave some buffer for safety (10% of context length or minimum 1000 tokens) const safetyBuffer = Math.max(Math.floor(contextLength * 0.1), 1000); // Calculate available tokens for output const availableTokens = contextLength - estimatedInputTokens - safetyBuffer; // Respect model's max completion tokens if available const modelMaxTokens = maxCompletionTokens || availableTokens; // Use the smaller of available tokens or model's max completion tokens const safeMaxTokens = Math.min(availableTokens, modelMaxTokens); // Ensure we don't go below a reasonable minimum const finalMaxTokens = Math.max(safeMaxTokens, 1000); console.log('🧮 Token calculation:', { contextLength, estimatedInputTokens, safetyBuffer, availableTokens, modelMaxTokens, finalMaxTokens }); return finalMaxTokens; } function estimateTokenCount(text) { return Math.ceil(text.length / 4); } console.log("Testing dynamic max_tokens calculation:\n"); // Test the exact failing scenario from the error message console.log("1. DeepSeek V3 - Failing Scenario (Original):"); const failingContextLength = 131072; const failingInputTokens = 3499; const failingMaxTokens = 128000; // What we were requesting before console.log(` Context Length: ${failingContextLength.toLocaleString()}`); console.log(` Input Tokens: ${failingInputTokens.toLocaleString()}`); console.log(` Requested Out: ${failingMaxTokens.toLocaleString()}`); console.log(` Total: ${(failingInputTokens + failingMaxTokens).toLocaleString()}`); console.log(` Over Limit: ${(failingInputTokens + failingMaxTokens) - failingContextLength} tokens ❌\n`); // Test with our new calculation console.log("2. DeepSeek V3 - Fixed with Dynamic Calculation:"); const dynamicMaxTokens = calculateSafeMaxTokens(failingContextLength, failingInputTokens, 8192); const newTotal = failingInputTokens + dynamicMaxTokens; console.log(` Context Length: ${failingContextLength.toLocaleString()}`); console.log(` Input Tokens: ${failingInputTokens.toLocaleString()}`); console.log(` Dynamic Max: ${dynamicMaxTokens.toLocaleString()}`); console.log(` New Total: ${newTotal.toLocaleString()}`); console.log(` Within Limit: ${newTotal <= failingContextLength ? '✅' : '❌'}`); console.log(` Safety Margin: ${failingContextLength - newTotal} tokens\n`); // Test with smart HTML context (reduced input) console.log("3. DeepSeek V3 - With Smart HTML Context Reduction:"); const reducedInputTokens = Math.floor(failingInputTokens * 0.3); // 70% reduction from smart context const smartMaxTokens = calculateSafeMaxTokens(failingContextLength, reducedInputTokens, 8192); const smartTotal = reducedInputTokens + smartMaxTokens; console.log(` Context Length: ${failingContextLength.toLocaleString()}`); console.log(` Reduced Input: ${reducedInputTokens.toLocaleString()} (was ${failingInputTokens})`); console.log(` Smart Max: ${smartMaxTokens.toLocaleString()}`); console.log(` Smart Total: ${smartTotal.toLocaleString()}`); console.log(` Within Limit: ${smartTotal <= failingContextLength ? '✅' : '❌'}`); console.log(` Safety Margin: ${failingContextLength - smartTotal} tokens\n`); console.log("✅ Dynamic max_tokens calculation successfully prevents context overflow!");