|
|
import gradio as gr |
|
|
import tensorflow as tf |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from io import BytesIO |
|
|
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input |
|
|
from tensorflow.keras.preprocessing.image import load_img, img_to_array |
|
|
from transformers import TFAutoModelWithLMHead, AutoTokenizer |
|
|
from stylegan2.tf_api import G_synthesis as StyleGAN2 |
|
|
|
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("t5-large") |
|
|
model = TFAutoModelWithLMHead.from_pretrained("t5-large") |
|
|
|
|
|
|
|
|
g = StyleGAN2() |
|
|
g.load_weights('models/stylegan2-ffhq-config-f.pkl') |
|
|
|
|
|
|
|
|
inception_v3 = InceptionV3(weights='imagenet') |
|
|
|
|
|
|
|
|
def preprocess_image(image): |
|
|
image = image.resize((256, 256)) |
|
|
image_array = img_to_array(image) |
|
|
image_array = preprocess_input(image_array) |
|
|
image_array = np.expand_dims(image_array, axis=0) |
|
|
return image_array |
|
|
|
|
|
|
|
|
def generate_image(description): |
|
|
z = tf.random.normal([1, g.input_shape[1]]) |
|
|
text = "generate image of a " + description |
|
|
input_ids = tokenizer.encode(text, return_tensors='tf') |
|
|
output = model.generate(input_ids=input_ids) |
|
|
caption = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
image = g(z, caption) |
|
|
image = (image.numpy()[0] * 255).astype(np.uint8) |
|
|
image = Image.fromarray(image, mode='RGB') |
|
|
return image |
|
|
|
|
|
|
|
|
def generate_description(image_file): |
|
|
image = Image.open(BytesIO(image_file.read())) |
|
|
image = preprocess_image(image) |
|
|
features = inception_v3.predict(image) |
|
|
features = tf.keras.backend.flatten(features) |
|
|
input_text = tokenizer.encode("generate a description of an image", return_tensors="tf") |
|
|
output = model.generate(input_ids=input_text, attention_mask=tf.ones(input_text.shape), max_length=50) |
|
|
caption = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
return caption |
|
|
|
|
|
|
|
|
def image_generation(text_input, image_file): |
|
|
if image_file is not None: |
|
|
|
|
|
description = generate_description(image_file) |
|
|
|
|
|
generated_image = generate_image(description) |
|
|
else: |
|
|
|
|
|
generated_image = generate_image(text_input) |
|
|
return generated_image |
|
|
|
|
|
|
|
|
inputs = [gr.inputs.Textbox(label="Input text"), |
|
|
gr.inputs.Image(label="Upload an image (optional)") |
|
|
] |
|
|
outputs = gr.outputs.Image(label="Generated Image") |
|
|
|
|
|
gr.Interface( |
|
|
fn=image_generation, |
|
|
inputs=inputs, |
|
|
outputs=outputs, |
|
|
title="Image Generation from Text", |
|
|
description="Generate high-quality images from text descriptions.", |
|
|
theme="default", |
|
|
layout="vertical", |
|
|
examples=[ |
|
|
["a red sports car on a mountain road"], |
|
|
["a cute puppy"], |
|
|
["an elegant woman with a hat and a scarf"], |
|
|
["a scenic beach with palm trees and blue water"], |
|
|
["a golden retriever sitting on a couch"], |
|
|
["a delicious pizza with pepperoni and cheese"], |
|
|
["a futuristic city with tall buildings and flying cars"], |
|
|
["an adorable kitten playing with a ball of yarn"], |
|
|
], |
|
|
).launch(debug=True) |