Spaces:
Running
Running
Update api.js
Browse files
api.js
CHANGED
|
@@ -202,97 +202,11 @@ app.post('api/generate/speech', async (req, res) =>{
|
|
| 202 |
})
|
| 203 |
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
const {voice, text, model} = req.query
|
| 209 |
-
console.log("Utterance Params", {voice, text, model})
|
| 210 |
-
//const outputFilename= await generateAudio(voice, text, model || "tts-1")
|
| 211 |
-
|
| 212 |
-
// We want the browser to cache this response, because there's no reason to TTS the same text-voice-model combination more than once
|
| 213 |
-
//await res.sendFile(path.resolve(outputFilename), { headers: { 'Content-Type': 'audio/mpeg', 'Cache-Control':'Max-Age=8640000' } });
|
| 214 |
})
|
| 215 |
-
|
| 216 |
-
app.post('api/generate/utterance', async (req, res) =>{
|
| 217 |
-
const {voice, text, model} = req.body
|
| 218 |
-
const outputFilename= await generateAudio(voice, text, model || "tts-1")
|
| 219 |
-
|
| 220 |
-
// We want the browser to cache this response, because there's no reason to TTS the same text-voice-model combination more than once
|
| 221 |
-
res.sendFile(path.resolve(outputFilename), { headers: { 'Content-Type': 'audio/mpeg', 'Cache-Control':'Max-Age=8640000' } });
|
| 222 |
-
})
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
// This returns a stream of SSE (application/event-stream) similar to a streaming response from an LLM
|
| 227 |
-
// See example in public/client for how to consume the stream
|
| 228 |
-
app.post('/api/generate/speech/stream', async (req, res) => {
|
| 229 |
-
try {
|
| 230 |
-
const apiKey = req.query.api_key || 'their_api_key';
|
| 231 |
-
if (apiKey !== 'their_api_key') {
|
| 232 |
-
// Replace "their_api_key" with your actual method of managing API keys
|
| 233 |
-
res.status(401).send('Unauthorized');
|
| 234 |
-
return;
|
| 235 |
-
}
|
| 236 |
-
|
| 237 |
-
const script = req.body.payload;
|
| 238 |
-
if (!script) {
|
| 239 |
-
res.status(400).send('Bad Request: Missing payload');
|
| 240 |
-
return;
|
| 241 |
-
}
|
| 242 |
-
|
| 243 |
-
// Set headers for SSE
|
| 244 |
-
res.setHeader('Content-Type', 'text/event-stream');
|
| 245 |
-
res.setHeader('Cache-Control', 'no-cache');
|
| 246 |
-
res.setHeader('Connection', 'keep-alive');
|
| 247 |
-
|
| 248 |
-
const hash = crypto.createHash('sha1');
|
| 249 |
-
hash.update(script);
|
| 250 |
-
const scriptHash = hash.digest('hex');
|
| 251 |
-
|
| 252 |
-
if (audioCache[scriptHash]) {
|
| 253 |
-
// If audio is cached, send the final SSE with the combined audio URL
|
| 254 |
-
const filePath = audioCache[scriptHash];
|
| 255 |
-
console.log(filePath)
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${filePath}\n\n`);
|
| 259 |
-
res.end();
|
| 260 |
-
return;
|
| 261 |
-
}
|
| 262 |
-
|
| 263 |
-
const parsedSegments = parseScript(script);
|
| 264 |
-
const audioSegments = [];
|
| 265 |
-
|
| 266 |
-
for (const segment of parsedSegments) {
|
| 267 |
-
const audioPath = await generateAudio(segment.speaker_name, segment.content);
|
| 268 |
-
audioSegments.push(audioPath);
|
| 269 |
-
|
| 270 |
-
// Send SSE with the URL of the generated audio segment
|
| 271 |
-
res.write(`event: audio_segment\ndata: ${req.protocol}://${req.get('host')}/${audioPath}\n\n`);
|
| 272 |
-
}
|
| 273 |
-
|
| 274 |
-
if (audioSegments.length === 0) {
|
| 275 |
-
res.write(`event: error\ndata: No audio generated\n\n`);
|
| 276 |
-
res.end();
|
| 277 |
-
return;
|
| 278 |
-
}
|
| 279 |
-
|
| 280 |
-
// Concatenate audio files into one using FFmpeg
|
| 281 |
-
const combinedAudioPath = path.join(MEDIA_FOLDER, `combined_${uuidv4()}.mp3`);
|
| 282 |
-
await concatenateAudioFiles(audioSegments, combinedAudioPath);
|
| 283 |
-
|
| 284 |
-
audioCache[scriptHash] = combinedAudioPath;
|
| 285 |
-
console.log(combinedAudioPath)
|
| 286 |
-
// Send SSE with the URL of the combined audio
|
| 287 |
-
res.write(`event: audio_complete\ndata: ${req.protocol}://${req.get('host')}/${combinedAudioPath}\n\n`);
|
| 288 |
-
res.end();
|
| 289 |
-
} catch (error) {
|
| 290 |
-
console.error('Error generating speech:', error);
|
| 291 |
-
res.write(`event: error\ndata: Internal Server Error\n\n`);
|
| 292 |
-
res.end();
|
| 293 |
-
}
|
| 294 |
-
});
|
| 295 |
-
|
| 296 |
|
| 297 |
//Image generation parameters
|
| 298 |
//response_format: image | url
|
|
|
|
| 202 |
})
|
| 203 |
|
| 204 |
|
| 205 |
+
app.get('/api/hello', async(req, res) => {
|
| 206 |
+
await res.status(200).send({"hello": "world"}, {headers: {"Content-Type":"application/json"}})
|
| 207 |
+
res.end()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
})
|
| 209 |
+
// This is normal TTS: specify voice, text, model. Voices are from openai, use those names or the aliases in lookup table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
//Image generation parameters
|
| 212 |
//response_format: image | url
|