Spaces:

Parth1503
/

GenMatch

Sleeping

App Files Files Community

Parth1503 commited on Oct 6, 2025

Commit

9517f59

verified ·

1 Parent(s): 0c2d792

Upload 3 files

Browse files

Files changed (3) hide show

Clip.py +166 -0
app.py +28 -0
requirements.txt +11 -0

Clip.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from transformers import CLIPModel, CLIPProcessor
+from PIL import Image
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+TF_ENABLE_ONEDNN_OPTS=0
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "openai/clip-vit-base-patch32"
+model = CLIPModel.from_pretrained(model_name).to(device)
+processor = CLIPProcessor.from_pretrained(model_name)
+# This function extracts patches from an image and returns them along with their coordinates.
+def image_patch(img, patch_size =(100, 100), stride = 2):
+    img_w, img_h = img.size
+    print(f"Image dimensions: width={img_w}, height={img_h}")
+    patches = []
+    for i in range(0, img_h - patch_size[1] + 1, stride):
+        for j in range(0, img_w - patch_size[0] + 1, stride):
+            patch = img.crop((j, i, j + patch_size[0], i + patch_size[1]))
+            patches.append((patch, (j, i)))
+    return patches
+def bounding_box(img, heatmap):
+    img_copy = np.array(img).copy()
+    found = False
+    normalized = cv2.normalize(heatmap, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
+    _, binary = cv2.threshold(normalized, 200, 255, cv2.THRESH_BINARY)
+    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if contours:
+        largest = max(contours, key=cv2.contourArea)
+        x, y, w, h = cv2.boundingRect(largest)
+        cv2.rectangle(img_copy, (x, y), (x + w, y + h), (255, 0, 0), 2)
+        found = True
+    return img_copy, found
+# def main():
+#     print("Starting the object detection process...")
+#     img_path = r"C:\Users\sahas\OneDrive\Desktop\GenMatch\Photo of a dog.jpg"
+#     score_patches = []
+#     prompt = ["a photo of a human", "a close up of a dog's face"]
+#     try:
+#         # Open the image
+#         img = Image.open(img_path)
+#         print(f"Image opened successfully: {img_path}")
+#         # Extract patches from the image
+#         patches = image_patch(img)
+#         print(f"Extracted {len(patches)} patches from the image.")
+#         # Process all patches with the CLIP model to get the probabilities
+#         patch_batch = [p for p, (x, y) in patches]
+#         input = processor(text=prompt, images=patch_batch, return_tensors="pt", padding=True)
+#         input = {k: v.to(device) for k, v in input.items()}
+#         with torch.no_grad():
+#             output = model(**input)
+#         logits = output.logits_per_image
+#         prob = logits.softmax(dim=1)
+#         for i, (patch, (x, y)) in enumerate(patches):
+#             score = prob[i][0].item()
+#             score_patches.append((patch, (x, y), score))
+#         # Create heatmap based on scores
+#         img_h, img_w = img.size
+#         pat_h, pat_w = patches[0][0].size
+#         heatmap = np.zeros((img_h, img_w))
+#         for _, (x, y), score in score_patches:
+#             heatmap[y:y + pat_h, x:x + pat_w] += score
+#         fig, ax = plt.subplots()
+#         ax.imshow(img)
+#         ax.imshow(heatmap, cmap='viridis', alpha=0.6)
+#         ax.axis('off')
+#         plt.show()
+#         print("Genrating images with bounding box")
+#         box_img = bounding_box(img, heatmap)
+#         plt.imshow(box_img)
+#         plt.axis('off')
+#         plt.show()
+#     except FileNotFoundError:
+#         print(f"Error opening image: {img_path}")
+#         return
+# if __name__ == "__main__":
+#     main()
+def run_detection_pipeline(input_image, text_prompt):
+    print("Starting the object detection process...")
+    img = input_image
+    prompt = [text_prompt, "a photo of a blank background"]
+    score_patches = []
+    all_scores = []
+    patches = image_patch(img)
+    print(f"Extracted {len(patches)} patches from the image.")
+    patch_batch = [p for p, (x, y) in patches]
+    input_data = processor(text=prompt, images=patch_batch, return_tensors="pt", padding=True)
+    input_data = {k: v.to(device) for k, v in input_data.items()}
+    with torch.no_grad():
+        output = model(**input_data)
+    logits = output.logits_per_image
+    prob = logits.softmax(dim=1)
+    for i, (patch, (x, y)) in enumerate(patches):
+        score = prob[i][0].item()
+        score_patches.append((patch, (x, y), score))
+        all_scores.append(score)
+    confidence_threshold = 0.20
+    max_score = max(all_scores) if all_scores else 0
+    print(f"Max confidence score: {max_score:.4f}")
+    if max_score < confidence_threshold:
+        msg = f"Could not find '{text_prompt}' with enough confidence."
+        return msg, input_image
+    img_h, img_w = img.size
+    if not patches:
+        print("Warning: No patches were extracted from the image.")
+        return img
+    pat_h, pat_w = patches[0][0].size
+    heatmap = np.zeros((img_h, img_w))
+    for _, (x, y), score in score_patches:
+        heatmap[y:y + pat_h, x:x + pat_w] += score
+    print("Generating image with bounding box...")
+    box_img, found = bounding_box(img, heatmap)
+    if not found:
+        msg = "No object detected matching the prompt."
+    else:
+        msg = "Object detected and highlighted."
+    return msg, Image.fromarray(box_img)

app.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import gradio as gr
+from Clip import run_detection_pipeline
+print("Loading the application...")
+iface = gr.Interface(
+    fn=run_detection_pipeline,
+    inputs = [
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Text Prompt", placeholder="e.g., a photo of a dog's face")
+    ],
+    outputs=[
+        gr.Textbox(label="Result"),
+        gr.Image(type="pil", label="Detection Result")
+        ],
+    title="GenMatch: Open-Vocabulary Object Detector",
+    description="Upload an image and type what you want to find. The model will draw a box around it.",
+    # Adding examples makes your app much easier to test and demonstrate!
+    # Create a folder named 'examples' and put some images inside it.
+    # examples=[
+    #     [r"C:\Users\sahas\OneDrive\Desktop\GenMatch\Bounding Box.png", "Object detected and highlighted."],
+    #     [r"C:\Users\sahas\OneDrive\Desktop\GenMatch\Photo of a dog.jpg", "Could not find '{prompt}' with enough confidence."],
+    # ]
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pandas
+numpy
+matplotlib
+scikit-learn
+seaborn
+tensorflow
+torch
+torchvision
+transformers
+pillow
+opencv-python