Upload 3 files
Browse files- handler.py +9 -4
- requirements.txt +7 -3
handler.py
CHANGED
|
@@ -14,9 +14,10 @@ if IS_COMPILE:
|
|
| 14 |
#from huggingface_inference_toolkit.logging import logger
|
| 15 |
|
| 16 |
def compile_pipeline(pipe) -> Any:
|
| 17 |
-
pipe.transformer.fuse_qkv_projections()
|
| 18 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 19 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
|
|
|
|
|
|
| 20 |
return pipe
|
| 21 |
|
| 22 |
class EndpointHandler:
|
|
@@ -24,14 +25,15 @@ class EndpointHandler:
|
|
| 24 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
| 25 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
| 26 |
dtype = torch.bfloat16
|
| 27 |
-
quantization_config = TorchAoConfig("
|
| 28 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
| 29 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
| 30 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
|
|
|
|
|
|
| 31 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
| 32 |
self.pipeline.to("cuda")
|
| 33 |
|
| 34 |
-
#@torch.inference_mode()
|
| 35 |
def __call__(self, data: Dict[str, Any]) -> Image.Image:
|
| 36 |
#logger.info(f"Received incoming request with {data=}")
|
| 37 |
|
|
@@ -63,4 +65,7 @@ class EndpointHandler:
|
|
| 63 |
guidance_scale=guidance_scale,
|
| 64 |
num_inference_steps=num_inference_steps,
|
| 65 |
generator=generator,
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
#from huggingface_inference_toolkit.logging import logger
|
| 15 |
|
| 16 |
def compile_pipeline(pipe) -> Any:
|
|
|
|
| 17 |
pipe.transformer.to(memory_format=torch.channels_last)
|
| 18 |
pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
| 19 |
+
pipe.vae.to(memory_format=torch.channels_last)
|
| 20 |
+
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=False, dynamic=False, backend="inductor")
|
| 21 |
return pipe
|
| 22 |
|
| 23 |
class EndpointHandler:
|
|
|
|
| 25 |
repo_id = "camenduru/FLUX.1-dev-diffusers"
|
| 26 |
#repo_id = "NoMoreCopyright/FLUX.1-dev-test"
|
| 27 |
dtype = torch.bfloat16
|
| 28 |
+
quantization_config = TorchAoConfig("int8dq")
|
| 29 |
vae = AutoencoderKL.from_pretrained(repo_id, subfolder="vae", torch_dtype=dtype)
|
| 30 |
#transformer = FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", torch_dtype=dtype, quantization_config=quantization_config).to("cuda")
|
| 31 |
self.pipeline = FluxPipeline.from_pretrained(repo_id, vae=vae, torch_dtype=dtype, quantization_config=quantization_config)
|
| 32 |
+
self.pipeline.transformer.fuse_qkv_projections()
|
| 33 |
+
self.pipeline.vae.fuse_qkv_projections()
|
| 34 |
if IS_COMPILE: self.pipeline = compile_pipeline(self.pipeline)
|
| 35 |
self.pipeline.to("cuda")
|
| 36 |
|
|
|
|
| 37 |
def __call__(self, data: Dict[str, Any]) -> Image.Image:
|
| 38 |
#logger.info(f"Received incoming request with {data=}")
|
| 39 |
|
|
|
|
| 65 |
guidance_scale=guidance_scale,
|
| 66 |
num_inference_steps=num_inference_steps,
|
| 67 |
generator=generator,
|
| 68 |
+
output_type="pil",
|
| 69 |
+
).images[0]
|
| 70 |
+
|
| 71 |
+
|
requirements.txt
CHANGED
|
@@ -1,11 +1,15 @@
|
|
| 1 |
huggingface_hub
|
| 2 |
-
torch
|
| 3 |
torchvision
|
| 4 |
-
torchao
|
| 5 |
diffusers
|
| 6 |
peft
|
| 7 |
accelerate
|
| 8 |
transformers
|
| 9 |
numpy
|
| 10 |
scipy
|
| 11 |
-
Pillow
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
huggingface_hub
|
| 2 |
+
torch==2.4.0
|
| 3 |
torchvision
|
| 4 |
+
torchao==0.9.0
|
| 5 |
diffusers
|
| 6 |
peft
|
| 7 |
accelerate
|
| 8 |
transformers
|
| 9 |
numpy
|
| 10 |
scipy
|
| 11 |
+
Pillow
|
| 12 |
+
sentencepiece
|
| 13 |
+
protobuf
|
| 14 |
+
pytorch-lightning
|
| 15 |
+
triton
|