Spaces:

bilgeyucel
/

captionate

Running

File size: 3,140 Bytes

aac5437
95efa40
 
497b535
 
 
95efa40
b8462d5
 
95efa40
8334aa7
b96a262
 
cfadd82
dc74825
b96a262
 
 
1e53050
e833e6f
497b535
cfadd82
497b535
8334aa7
 
497b535
8606aa2
6cd4cbd
497b535
 
102b698
418a286
497b535
95efa40
b96a262
95efa40
102b698
1e53050
497b535
 
 
 
 
1e53050
6bf02ba
b8462d5
 
 
 
102b698
b8462d5
 
102b698
497b535
95efa40
b96a262
4d2d71e
cfadd82
 
418a286
1e53050
497b535
 
1e53050
 
b96a262
95efa40
cfadd82
85f65db
95efa40

import os
import gradio as gr

from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack import Pipeline
from haystack.utils import Secret
from image_captioner import ImageCaptioner

description = """
# Captionate 📸
### Create Instagram captions for your pics!

* Upload your photo or select one from the examples
* Choose your model
* ✨ Captionate! ✨ 

It uses [Salesforce/blip-image-captioning-base](https://huggingface.co/Salesforce/blip-image-captioning-base) model for image-to-text caption generation task.

For Instagrammable captions, try different text-to-text models to see how they react to the same prompt.

Built by [Bilge Yucel](https://twitter.com/bilgeycl) using [Haystack](https://github.com/deepset-ai/haystack) 💙
"""

prompt_template =[ChatMessage.from_user("""
You will receive a descriptive text of a photo.
Try to generate a nice Instagram caption with a phrase rhyming with the text. Include emojis in the caption.
Just return one option without alternatives. Don't use hashtags.           
                                                                                 
Descriptive text: {{caption}};
Instagram Caption:
""")]

hf_api_key = os.environ["HF_API_KEY"]

def generate_caption(image_file_path, model_name):
    image_to_text = ImageCaptioner(model_name="Salesforce/blip-image-captioning-base")
    prompt_builder = ChatPromptBuilder(template=prompt_template, required_variables="*")
    generator = HuggingFaceAPIChatGenerator(
        api_type="serverless_inference_api", 
        api_params={"model": model_name}, 
        token=Secret.from_token(hf_api_key))
    
    captioning_pipeline = Pipeline()
    captioning_pipeline.add_component("image_to_text", image_to_text)
    captioning_pipeline.add_component("prompt_builder", prompt_builder)
    captioning_pipeline.add_component("generator", generator)
    
    captioning_pipeline.connect("image_to_text.caption", "prompt_builder.caption")
    captioning_pipeline.connect("prompt_builder", "generator")

    result = captioning_pipeline.run({"image_to_text":{"image_file_path":image_file_path}})
    return result["generator"]["replies"][0].text

with gr.Blocks(theme="soft") as demo:
    gr.Markdown(value=description)
    with gr.Row():
        image = gr.Image(type="filepath")
        with gr.Column():
            model_name = gr.Dropdown(
                ["deepseek-ai/DeepSeek-V3.1-Terminus", "meta-llama/Llama-3.3-70B-Instruct", "openai/gpt-oss-20b", "Qwen/Qwen3-4B-Instruct-2507"], 
                value="deepseek-ai/DeepSeek-V3.1-Terminus", 
                label="Choose your model!"
                )
            gr.Examples(["./whale.png", "./rainbow.jpeg", "./selfie.png"], inputs=image, label="Click on any example") 
    submit_btn = gr.Button("✨ Captionate ✨")
    caption = gr.Textbox(label="Caption", show_copy_button=True)
    submit_btn.click(fn=generate_caption, inputs=[image, model_name], outputs=[caption])

if __name__ == "__main__":
    demo.launch()