from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from diffusers import StableDiffusionPipeline
from huggingface_hub import login
from PIL import Image
import torch
import io
import os

# Get token from environment (set in Hugging Face Spaces Secrets)
HF_TOKEN = os.environ.get("HF_TOKEN", "")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN environment variable not set")

# Login to Hugging Face
login(token=HF_TOKEN)

# Initialize FastAPI app
app = FastAPI()

# Dynamically select device (CPU or GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load Stable Diffusion pipeline with dynamic device and mixed precision
pipe = StableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    use_safetensors=True,  # Use safetensors for efficiency
).to(device)

# Enable mixed precision training if GPU is available
if device == "cuda":
    pipe.enable_attention_slicing()  # Optimize memory usage
    pipe.enable_xformers_memory_efficient_attention()  # Optional, if xformers is installed

# Input schema
class PromptRequest(BaseModel):
    prompt: str
    num_inference_steps: int = 20
    guidance_scale: float = 7.5

# Image generation endpoint
@app.post("/generate-image/")
def generate_image(req: PromptRequest):
    try:
        # Generate image
        image = pipe(
            prompt=req.prompt,
            num_inference_steps=req.num_inference_steps,
            guidance_scale=req.guidance_scale
        ).images[0]

        # Convert to bytes
        img_bytes = io.BytesIO()
        image.save(img_bytes, format="PNG")
        img_bytes.seek(0)

        return StreamingResponse(img_bytes, media_type="image/png")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Generation error: {str(e)}")

# Health check
@app.get("/health/")
def health_check():
    return {"status": "healthy"}

# Note: uvicorn.run is handled by Hugging Face Spaces automatically
# No need to run the server manually here