Add BiRefNet v2
Browse files- app.py +11 -4
- image_processing_pipeline.py +10 -5
- utils/upload_to_dataset.py +55 -31
app.py
CHANGED
|
@@ -22,6 +22,7 @@ from db import (
|
|
| 22 |
fill_database_once,
|
| 23 |
compute_votes_per_model
|
| 24 |
)
|
|
|
|
| 25 |
|
| 26 |
# Load environment variables
|
| 27 |
load_dotenv()
|
|
@@ -32,7 +33,7 @@ google_analytics_tracking_id = os.getenv("GOOGLE_ANALYTICS_TRACKING_ID")
|
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
|
| 34 |
# Load datasets and initialize database
|
| 35 |
-
dataset = load_dataset("bgsys/background-removal-
|
| 36 |
fill_database_once()
|
| 37 |
|
| 38 |
# Directory setup for JSON dataset
|
|
@@ -64,7 +65,7 @@ def update_rankings_table():
|
|
| 64 |
model_vote_counts = compute_votes_per_model()
|
| 65 |
try:
|
| 66 |
# Create a list of models to iterate over
|
| 67 |
-
models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0"]
|
| 68 |
rankings = []
|
| 69 |
|
| 70 |
for model in models:
|
|
@@ -104,8 +105,14 @@ def select_new_image(last_used_indices):
|
|
| 104 |
sample = dataset[random_index]
|
| 105 |
input_image = sample['original_image']
|
| 106 |
|
| 107 |
-
segmented_images = [sample.get(key) for key in [
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
if segmented_images.count(None) > 2:
|
| 111 |
logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
|
|
|
|
| 22 |
fill_database_once,
|
| 23 |
compute_votes_per_model
|
| 24 |
)
|
| 25 |
+
from utils.birefnet import iterate_over_directory as birefnet_iterate
|
| 26 |
|
| 27 |
# Load environment variables
|
| 28 |
load_dotenv()
|
|
|
|
| 33 |
logging.basicConfig(level=logging.INFO)
|
| 34 |
|
| 35 |
# Load datasets and initialize database
|
| 36 |
+
dataset = load_dataset("bgsys/background-removal-arena_v0_clothing_checkered", split='train')
|
| 37 |
fill_database_once()
|
| 38 |
|
| 39 |
# Directory setup for JSON dataset
|
|
|
|
| 65 |
model_vote_counts = compute_votes_per_model()
|
| 66 |
try:
|
| 67 |
# Create a list of models to iterate over
|
| 68 |
+
models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0", "BiRefNet v2"]
|
| 69 |
rankings = []
|
| 70 |
|
| 71 |
for model in models:
|
|
|
|
| 105 |
sample = dataset[random_index]
|
| 106 |
input_image = sample['original_image']
|
| 107 |
|
| 108 |
+
segmented_images = [sample.get(key) for key in [
|
| 109 |
+
'clipdrop_image', 'bria_image', 'photoroom_image',
|
| 110 |
+
'removebg_image', 'birefnet_image'
|
| 111 |
+
]]
|
| 112 |
+
segmented_sources = [
|
| 113 |
+
'Clipdrop', 'BRIA RMBG 2.0', 'Photoroom',
|
| 114 |
+
'RemoveBG', 'BiRefNet v2'
|
| 115 |
+
]
|
| 116 |
|
| 117 |
if segmented_images.count(None) > 2:
|
| 118 |
logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
|
image_processing_pipeline.py
CHANGED
|
@@ -14,6 +14,7 @@ from utils.clipdrop import iterate_over_directory as clipdrop_iterate
|
|
| 14 |
from utils.upload_to_dataset import upload_to_dataset
|
| 15 |
from utils.resize_processed_images import process_images as downsize_processed_images
|
| 16 |
from utils.add_checkered_background import process_directory as add_checkered_background_process
|
|
|
|
| 17 |
|
| 18 |
def check_env_variables():
|
| 19 |
"""Check if the necessary environment variables are loaded."""
|
|
@@ -22,7 +23,11 @@ def check_env_variables():
|
|
| 22 |
|
| 23 |
load_dotenv()
|
| 24 |
|
| 25 |
-
required_keys = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
missing_keys = [key for key in required_keys if not os.getenv(key)]
|
| 27 |
|
| 28 |
if missing_keys:
|
|
@@ -86,24 +91,24 @@ def main():
|
|
| 86 |
"removebg": os.path.join(bg_removed_dir, "removebg"),
|
| 87 |
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
|
| 88 |
"bria": os.path.join(bg_removed_dir, "bria"),
|
| 89 |
-
"clipdrop": os.path.join(bg_removed_dir, "clipdrop")
|
|
|
|
| 90 |
}
|
| 91 |
|
| 92 |
for dir_path in bg_removal_dirs.values():
|
| 93 |
os.makedirs(dir_path, exist_ok=True)
|
| 94 |
|
| 95 |
# Use ThreadPoolExecutor to parallelize API calls
|
| 96 |
-
with ThreadPoolExecutor(max_workers=
|
| 97 |
executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
|
| 98 |
executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
|
| 99 |
executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
|
| 100 |
executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
|
| 101 |
-
|
| 102 |
|
| 103 |
print("Adding checkered background...")
|
| 104 |
add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
|
| 105 |
|
| 106 |
-
|
| 107 |
if args.dataset_name:
|
| 108 |
upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
|
| 109 |
else:
|
|
|
|
| 14 |
from utils.upload_to_dataset import upload_to_dataset
|
| 15 |
from utils.resize_processed_images import process_images as downsize_processed_images
|
| 16 |
from utils.add_checkered_background import process_directory as add_checkered_background_process
|
| 17 |
+
from utils.birefnet import process_directory as birefnet_iterate
|
| 18 |
|
| 19 |
def check_env_variables():
|
| 20 |
"""Check if the necessary environment variables are loaded."""
|
|
|
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
|
| 26 |
+
required_keys = [
|
| 27 |
+
'REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY',
|
| 28 |
+
'BRIA_API_TOKEN', 'CLIPDROP_API_KEY',
|
| 29 |
+
'FAL_KEY'
|
| 30 |
+
]
|
| 31 |
missing_keys = [key for key in required_keys if not os.getenv(key)]
|
| 32 |
|
| 33 |
if missing_keys:
|
|
|
|
| 91 |
"removebg": os.path.join(bg_removed_dir, "removebg"),
|
| 92 |
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
|
| 93 |
"bria": os.path.join(bg_removed_dir, "bria"),
|
| 94 |
+
"clipdrop": os.path.join(bg_removed_dir, "clipdrop"),
|
| 95 |
+
"birefnet": os.path.join(bg_removed_dir, "birefnet")
|
| 96 |
}
|
| 97 |
|
| 98 |
for dir_path in bg_removal_dirs.values():
|
| 99 |
os.makedirs(dir_path, exist_ok=True)
|
| 100 |
|
| 101 |
# Use ThreadPoolExecutor to parallelize API calls
|
| 102 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
| 103 |
executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
|
| 104 |
executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
|
| 105 |
executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
|
| 106 |
executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
|
| 107 |
+
executor.submit(birefnet_iterate, input_resized_dir, bg_removal_dirs["birefnet"])
|
| 108 |
|
| 109 |
print("Adding checkered background...")
|
| 110 |
add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
|
| 111 |
|
|
|
|
| 112 |
if args.dataset_name:
|
| 113 |
upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
|
| 114 |
else:
|
utils/upload_to_dataset.py
CHANGED
|
@@ -6,16 +6,22 @@ import pandas as pd
|
|
| 6 |
import argparse
|
| 7 |
from PIL import Image as PILImage
|
| 8 |
import sys
|
|
|
|
| 9 |
|
| 10 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
# Define the dataset features with dedicated columns for each model
|
| 12 |
features = Features({
|
| 13 |
-
"original_image": Image(),
|
| 14 |
-
"clipdrop_image": Image(),
|
| 15 |
-
"bria_image": Image(),
|
| 16 |
-
"photoroom_image": Image(),
|
| 17 |
-
"removebg_image": Image(),
|
| 18 |
-
"
|
|
|
|
| 19 |
})
|
| 20 |
|
| 21 |
# Load image paths and metadata
|
|
@@ -23,7 +29,8 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
| 23 |
"clipdrop_image": None,
|
| 24 |
"bria_image": None,
|
| 25 |
"photoroom_image": None,
|
| 26 |
-
"removebg_image": None
|
|
|
|
| 27 |
})
|
| 28 |
|
| 29 |
# Walk into the original images folder
|
|
@@ -35,16 +42,15 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
| 35 |
data[f]["original_filename"] = f
|
| 36 |
|
| 37 |
# Check for corresponding images in processed directories
|
| 38 |
-
for source in ["clipdrop", "bria", "photoroom", "removebg"]:
|
| 39 |
-
|
| 40 |
-
for ext in ['.png', '.jpg']:
|
| 41 |
processed_image_filename = os.path.splitext(f)[0] + ext
|
| 42 |
source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
|
| 43 |
|
| 44 |
if os.path.exists(source_image_path):
|
| 45 |
data[f][f"{source}_image"] = source_image_path
|
| 46 |
-
break
|
| 47 |
-
|
| 48 |
# Convert the data to a dictionary of lists
|
| 49 |
dataset_dict = {
|
| 50 |
"original_image": [],
|
|
@@ -52,35 +58,47 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
| 52 |
"bria_image": [],
|
| 53 |
"photoroom_image": [],
|
| 54 |
"removebg_image": [],
|
|
|
|
| 55 |
"original_filename": []
|
| 56 |
}
|
| 57 |
|
| 58 |
errors = []
|
|
|
|
|
|
|
| 59 |
|
| 60 |
for filename, entry in data.items():
|
| 61 |
if "original_image" in entry:
|
| 62 |
-
# Check if all images have the same size
|
| 63 |
try:
|
| 64 |
original_size = PILImage.open(entry["original_image"]).size
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
if entry[source] is not None:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
if errors:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
|
| 85 |
# Save the data dictionary to a CSV file for inspection
|
| 86 |
df = pd.DataFrame.from_dict(dataset_dict)
|
|
@@ -90,14 +108,20 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
| 90 |
dataset = Dataset.from_dict(dataset_dict, features=features)
|
| 91 |
|
| 92 |
if dry_run:
|
| 93 |
-
|
| 94 |
-
print(df.head()) # Display the first few rows for inspection
|
| 95 |
else:
|
| 96 |
-
|
| 97 |
api = HfApi()
|
| 98 |
dataset.push_to_hub(dataset_name, token=api.token, private=True)
|
|
|
|
| 99 |
|
| 100 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
|
| 102 |
parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
|
| 103 |
parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")
|
|
|
|
| 6 |
import argparse
|
| 7 |
from PIL import Image as PILImage
|
| 8 |
import sys
|
| 9 |
+
import logging
|
| 10 |
|
| 11 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
| 12 |
+
"""Upload images to a Hugging Face dataset including BiRefNet results."""
|
| 13 |
+
|
| 14 |
+
logging.info(f"Starting dataset upload from {original_images_dir}")
|
| 15 |
+
|
| 16 |
# Define the dataset features with dedicated columns for each model
|
| 17 |
features = Features({
|
| 18 |
+
"original_image": Image(),
|
| 19 |
+
"clipdrop_image": Image(),
|
| 20 |
+
"bria_image": Image(),
|
| 21 |
+
"photoroom_image": Image(),
|
| 22 |
+
"removebg_image": Image(),
|
| 23 |
+
"birefnet_image": Image(),
|
| 24 |
+
"original_filename": Value("string")
|
| 25 |
})
|
| 26 |
|
| 27 |
# Load image paths and metadata
|
|
|
|
| 29 |
"clipdrop_image": None,
|
| 30 |
"bria_image": None,
|
| 31 |
"photoroom_image": None,
|
| 32 |
+
"removebg_image": None,
|
| 33 |
+
"birefnet_image": None
|
| 34 |
})
|
| 35 |
|
| 36 |
# Walk into the original images folder
|
|
|
|
| 42 |
data[f]["original_filename"] = f
|
| 43 |
|
| 44 |
# Check for corresponding images in processed directories
|
| 45 |
+
for source in ["clipdrop", "bria", "photoroom", "removebg", "birefnet"]:
|
| 46 |
+
for ext in ['.png', '.jpg', '.jpeg', '.webp']:
|
|
|
|
| 47 |
processed_image_filename = os.path.splitext(f)[0] + ext
|
| 48 |
source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
|
| 49 |
|
| 50 |
if os.path.exists(source_image_path):
|
| 51 |
data[f][f"{source}_image"] = source_image_path
|
| 52 |
+
break
|
| 53 |
+
|
| 54 |
# Convert the data to a dictionary of lists
|
| 55 |
dataset_dict = {
|
| 56 |
"original_image": [],
|
|
|
|
| 58 |
"bria_image": [],
|
| 59 |
"photoroom_image": [],
|
| 60 |
"removebg_image": [],
|
| 61 |
+
"birefnet_image": [],
|
| 62 |
"original_filename": []
|
| 63 |
}
|
| 64 |
|
| 65 |
errors = []
|
| 66 |
+
processed_count = 0
|
| 67 |
+
skipped_count = 0
|
| 68 |
|
| 69 |
for filename, entry in data.items():
|
| 70 |
if "original_image" in entry:
|
|
|
|
| 71 |
try:
|
| 72 |
original_size = PILImage.open(entry["original_image"]).size
|
| 73 |
+
valid_entry = True
|
| 74 |
+
|
| 75 |
+
for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image", "birefnet_image"]:
|
| 76 |
if entry[source] is not None:
|
| 77 |
+
try:
|
| 78 |
+
processed_size = PILImage.open(entry[source]).size
|
| 79 |
+
if processed_size != original_size:
|
| 80 |
+
errors.append(f"Size mismatch for {filename}: {source}")
|
| 81 |
+
valid_entry = False
|
| 82 |
+
except Exception as e:
|
| 83 |
+
errors.append(f"Error with {filename}: {source}")
|
| 84 |
+
valid_entry = False
|
| 85 |
|
| 86 |
+
if valid_entry:
|
| 87 |
+
for key in dataset_dict.keys():
|
| 88 |
+
if key in entry:
|
| 89 |
+
dataset_dict[key].append(entry[key])
|
| 90 |
+
processed_count += 1
|
| 91 |
+
else:
|
| 92 |
+
skipped_count += 1
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
errors.append(f"Error processing {filename}")
|
| 96 |
+
skipped_count += 1
|
| 97 |
|
| 98 |
if errors:
|
| 99 |
+
logging.warning(f"Encountered {len(errors)} errors during processing")
|
| 100 |
+
|
| 101 |
+
logging.info(f"Processed: {processed_count}, Skipped: {skipped_count}, Total: {processed_count + skipped_count}")
|
| 102 |
|
| 103 |
# Save the data dictionary to a CSV file for inspection
|
| 104 |
df = pd.DataFrame.from_dict(dataset_dict)
|
|
|
|
| 108 |
dataset = Dataset.from_dict(dataset_dict, features=features)
|
| 109 |
|
| 110 |
if dry_run:
|
| 111 |
+
logging.info("Dry run completed - dataset not pushed")
|
|
|
|
| 112 |
else:
|
| 113 |
+
logging.info(f"Pushing dataset to {dataset_name}")
|
| 114 |
api = HfApi()
|
| 115 |
dataset.push_to_hub(dataset_name, token=api.token, private=True)
|
| 116 |
+
logging.info("Upload completed successfully")
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
| 119 |
+
logging.basicConfig(
|
| 120 |
+
level=logging.INFO,
|
| 121 |
+
format='%(asctime)s - %(message)s',
|
| 122 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
|
| 126 |
parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
|
| 127 |
parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")
|