from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel from diffusers import StableDiffusionPipeline from huggingface_hub import login from PIL import Image import torch import io import os # Get token from environment (set in Hugging Face Spaces Secrets) HF_TOKEN = os.environ.get("HF_TOKEN", "") if not HF_TOKEN: raise ValueError("HF_TOKEN environment variable not set") # Login to Hugging Face login(token=HF_TOKEN) # Initialize FastAPI app app = FastAPI() # Dynamically select device (CPU or GPU) device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Load Stable Diffusion pipeline with dynamic device and mixed precision pipe = StableDiffusionPipeline.from_pretrained( "CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16 if device == "cuda" else torch.float32, use_safetensors=True, # Use safetensors for efficiency ).to(device) # Enable mixed precision training if GPU is available if device == "cuda": pipe.enable_attention_slicing() # Optimize memory usage pipe.enable_xformers_memory_efficient_attention() # Optional, if xformers is installed # Input schema class PromptRequest(BaseModel): prompt: str num_inference_steps: int = 20 guidance_scale: float = 7.5 # Image generation endpoint @app.post("/generate-image/") def generate_image(req: PromptRequest): try: # Generate image image = pipe( prompt=req.prompt, num_inference_steps=req.num_inference_steps, guidance_scale=req.guidance_scale ).images[0] # Convert to bytes img_bytes = io.BytesIO() image.save(img_bytes, format="PNG") img_bytes.seek(0) return StreamingResponse(img_bytes, media_type="image/png") except Exception as e: raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}") # Health check @app.get("/health/") def health_check(): return {"status": "healthy"} # Note: uvicorn.run is handled by Hugging Face Spaces automatically # No need to run the server manually here