Spaces:

Rubaha
/

LangChain_Text_image

Running

App Files Files Community

Rubaha commited on 24 days ago

Commit

f99016e

verified ·

1 Parent(s): 989fcbd

Create app.py

Browse files

Files changed (1) hide show

app.py +163 -0

app.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import os
+import torch
+from PIL import Image
+# -------------------------
+# 1) Secrets (from environment variables)
+# -------------------------
+# Make sure you set these in your environment or a .env file
+# Example in Linux/macOS:
+#   export GROQ_API_KEY="your_key"
+#   export HUGGINGFACEHUB_API_TOKEN="your_token"
+os.environ["HF_TOKEN"] = os.environ.get("HUGGINGFACEHUB_API_TOKEN", "")
+if not os.environ.get("GROQ_API_KEY"):
+    raise ValueError("❌ Missing GROQ_API_KEY in environment variables")
+if not os.environ.get("HUGGINGFACEHUB_API_TOKEN"):
+    print("⚠️ HUGGINGFACEHUB_API_TOKEN missing. If a model is gated, it may fail to download.")
+# -------------------------
+# 2) Device config
+# -------------------------
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if device == "cuda" else torch.float32
+print("Device:", device)
+torch.backends.cuda.matmul.allow_tf32 = True
+# -------------------------
+# 3) LangChain (Groq)
+# -------------------------
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0.7)
+prompt_refiner = ChatPromptTemplate.from_template("""
+You are an expert AI prompt engineer for SDXL text-to-image generation.
+Convert the user's idea into a high-quality image prompt.
+Rules:
+- concise (max 60 words)
+- include subject, environment, lighting, composition, style
+- avoid brand names, watermarks, copyrighted characters
+- keep any style the user mentions (anime/realistic/etc.)
+User idea: {text}
+Final image prompt:
+""")
+caption_refiner = ChatPromptTemplate.from_template("""
+You are an expert image caption editor.
+Rewrite the caption in clear, neutral English (1–2 sentences). No identity guessing.
+Raw caption: {caption}
+Final caption:
+""")
+prompt_chain = prompt_refiner | llm | StrOutputParser()
+caption_chain = caption_refiner | llm | StrOutputParser()
+NEG_DEFAULT = "lowres, blurry, bad anatomy, extra fingers, watermark, text, logo, jpeg artifacts, deformed"
+# -------------------------
+# 4) SDXL pipeline
+# -------------------------
+from diffusers import StableDiffusionXLPipeline
+MODEL_ID = "playgroundai/playground-v2.5-1024px-aesthetic"
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=dtype,
+    use_safetensors=True,
+    token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") or None
+).to(device)
+pipe.enable_attention_slicing()
+try:
+    pipe.enable_vae_tiling()
+except Exception:
+    pass
+pipe.safety_checker = None
+def _gen(seed: int):
+    seed = int(seed)
+    return torch.Generator(device="cuda").manual_seed(seed) if device == "cuda" else torch.Generator().manual_seed(seed)
+@torch.inference_mode()
+def text_to_image(user_text, steps=30, guidance=6.5, seed=123, size=1024, negative_prompt=NEG_DEFAULT):
+    if not user_text or not str(user_text).strip():
+        raise ValueError("Please enter a non-empty prompt.")
+    enhanced = prompt_chain.invoke({"text": user_text}).strip()
+    g = _gen(seed)
+    img = pipe(
+        prompt=enhanced,
+        negative_prompt=negative_prompt,
+        num_inference_steps=int(steps),
+        guidance_scale=float(guidance),
+        height=int(size),
+        width=int(size),
+        generator=g
+    ).images[0]
+    return enhanced, img
+# -------------------------
+# 5) Image → Text (BLIP)
+# -------------------------
+from transformers import pipeline as hf_pipeline
+caption_model = hf_pipeline(
+    "image-to-text",
+    model="Salesforce/blip-image-captioning-base",
+    device=0 if device == "cuda" else -1
+)
+def image_to_text(img):
+    if img is None:
+        raise ValueError("Please upload an image.")
+    raw = caption_model(img)[0]["generated_text"].strip()
+    refined = caption_chain.invoke({"caption": raw}).strip()
+    return raw, refined
+# -------------------------
+# 6) Gradio App
+# -------------------------
+import gradio as gr
+with gr.Blocks(title="LangChain Text ↔ Image (SDXL, Secure)") as app:
+    gr.Markdown("## 🔁 LangChain Text ↔ Image (SDXL, Secret Key Based) — Better Quality on T4")
+    with gr.Tab("Text → Image (SDXL)"):
+        txt = gr.Textbox(label="Enter text prompt", placeholder="e.g., A futuristic hospital lab with AI robots, cinematic lighting, ultra-detailed")
+        with gr.Row():
+            size = gr.Radio([512, 1024], value=1024, label="Resolution (Use 512 if OOM)")
+            seed = gr.Number(value=123, label="Seed")
+        with gr.Row():
+            steps = gr.Slider(10, 50, value=30, step=1, label="Steps (Quality ↑ with steps)")
+            guidance = gr.Slider(1.0, 10.0, value=6.5, step=0.1, label="Guidance (5–8 best)")
+        negative = gr.Textbox(value=NEG_DEFAULT, label="Negative prompt (quality control)")
+        btn1 = gr.Button("Generate Image")
+        refined_prompt = gr.Textbox(label="Enhanced Prompt (LangChain)", interactive=False)
+        img = gr.Image(label="Generated Image")
+        btn1.click(text_to_image, [txt, steps, guidance, seed, size, negative], [refined_prompt, img])
+    with gr.Tab("Image → Text"):
+        img_in = gr.Image(type="pil", label="Upload image")
+        btn2 = gr.Button("Generate Caption")
+        raw = gr.Textbox(label="Raw Caption (BLIP)", interactive=False)
+        clean = gr.Textbox(label="Refined Caption (LangChain)", interactive=False)
+        btn2.click(image_to_text, img_in, [raw, clean])
+app.launch(share=True)