Spaces:
Running
Running
File size: 3,140 Bytes
aac5437 95efa40 497b535 95efa40 b8462d5 95efa40 8334aa7 b96a262 cfadd82 dc74825 b96a262 1e53050 e833e6f 497b535 cfadd82 497b535 8334aa7 497b535 8606aa2 6cd4cbd 497b535 102b698 418a286 497b535 95efa40 b96a262 95efa40 102b698 1e53050 497b535 1e53050 6bf02ba b8462d5 102b698 b8462d5 102b698 497b535 95efa40 b96a262 4d2d71e cfadd82 418a286 1e53050 497b535 1e53050 b96a262 95efa40 cfadd82 85f65db 95efa40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import os
import gradio as gr
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack import Pipeline
from haystack.utils import Secret
from image_captioner import ImageCaptioner
description = """
# Captionate 📸
### Create Instagram captions for your pics!
* Upload your photo or select one from the examples
* Choose your model
* ✨ Captionate! ✨
It uses [Salesforce/blip-image-captioning-base](https://huggingface.co/Salesforce/blip-image-captioning-base) model for image-to-text caption generation task.
For Instagrammable captions, try different text-to-text models to see how they react to the same prompt.
Built by [Bilge Yucel](https://twitter.com/bilgeycl) using [Haystack](https://github.com/deepset-ai/haystack) 💙
"""
prompt_template =[ChatMessage.from_user("""
You will receive a descriptive text of a photo.
Try to generate a nice Instagram caption with a phrase rhyming with the text. Include emojis in the caption.
Just return one option without alternatives. Don't use hashtags.
Descriptive text: {{caption}};
Instagram Caption:
""")]
hf_api_key = os.environ["HF_API_KEY"]
def generate_caption(image_file_path, model_name):
image_to_text = ImageCaptioner(model_name="Salesforce/blip-image-captioning-base")
prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
generator = HuggingFaceAPIChatGenerator(
api_type="serverless_inference_api",
api_params={"model": model_name},
token=Secret.from_token(hf_api_key))
captioning_pipeline = Pipeline()
captioning_pipeline.add_component("image_to_text", image_to_text)
captioning_pipeline.add_component("prompt_builder", prompt_builder)
captioning_pipeline.add_component("generator", generator)
captioning_pipeline.connect("image_to_text.caption", "prompt_builder.caption")
captioning_pipeline.connect("prompt_builder", "generator")
result = captioning_pipeline.run({"image_to_text":{"image_file_path":image_file_path}})
return result["generator"]["replies"][0].text
with gr.Blocks(theme="soft") as demo:
gr.Markdown(value=description)
with gr.Row():
image = gr.Image(type="filepath")
with gr.Column():
model_name = gr.Dropdown(
["deepseek-ai/DeepSeek-V3.1-Terminus", "meta-llama/Llama-3.3-70B-Instruct", "openai/gpt-oss-20b", "Qwen/Qwen3-4B-Instruct-2507"],
value="deepseek-ai/DeepSeek-V3.1-Terminus",
label="Choose your model!"
)
gr.Examples(["./whale.png", "./rainbow.jpeg", "./selfie.png"], inputs=image, label="Click on any example")
submit_btn = gr.Button("✨ Captionate ✨")
caption = gr.Textbox(label="Caption", show_copy_button=True)
submit_btn.click(fn=generate_caption, inputs=[image, model_name], outputs=[caption])
if __name__ == "__main__":
demo.launch() |