Spaces:

justinkay
/

coda

Sleeping

App Files Files Community

justinkay commited on Sep 30, 2025

Commit

3c56581

1 Parent(s): 991278f

Per-user subsampling

Browse files

Files changed (7) hide show

app.py +65 -41
images.txt +0 -2
iwildcam_demo.pt +2 -2
iwildcam_demo_annotations.json +0 -32
iwildcam_demo_images/95ddfafe-21bc-11ea-a13a-137349068a90.jpg +0 -3
iwildcam_demo_images/97c99b7a-21bc-11ea-a13a-137349068a90.jpg +0 -3
iwildcam_demo_labels.pt +2 -2

app.py CHANGED Viewed

@@ -74,61 +74,35 @@ print(f"Loaded {len(images_data)} images for the quiz")
 # Load image filenames list
 with open('images.txt', 'r') as f:
-    image_filenames = [line.strip() for line in f.readlines() if line.strip()]
-# Initialize CODA with subsampled dataset
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load full dataset
 full_preds = torch.load("iwildcam_demo.pt").to(device)
 full_labels = torch.load("iwildcam_demo_labels.pt").to(device)
-# Subsample to balance classes
 from collections import defaultdict
-class_to_indices = defaultdict(list)
 for idx, label in enumerate(full_labels):
     class_idx = label.item()
-    class_to_indices[class_idx].append(idx)
 # Find minimum class size
-min_class_size = min(len(indices) for indices in class_to_indices.values())
-print(f"Subsampling to {min_class_size} images per class (total: {min_class_size * len(class_to_indices)} images)")
-# Randomly subsample each class
-subsampled_indices = []
-for class_idx in sorted(class_to_indices.keys()):
-    indices = class_to_indices[class_idx]
-    sampled = np.random.choice(indices, size=min_class_size, replace=False)
-    subsampled_indices.extend(sampled.tolist())
-# Sort indices to maintain order
-subsampled_indices.sort()
-# Create subsampled dataset
-subsampled_preds = full_preds[:, subsampled_indices, :]
-subsampled_labels = full_labels[subsampled_indices]
-image_filenames = [image_filenames[idx] for idx in subsampled_indices]
-# Create Dataset object with subsampled data
-dataset = Dataset.__new__(Dataset)
-dataset.preds = subsampled_preds
-dataset.labels = subsampled_labels
-dataset.device = device
 loss_fn = LOSS_FNS['acc']
-oracle = Oracle(dataset, loss_fn=loss_fn)
-# Create CODA selector with default parameters
-coda_selector = CODA(dataset)
-print(f"Initialized CODA with {dataset.preds.shape[1]} samples and {dataset.preds.shape[0]} models")
-# Global state
 current_image_info = None
-# coda_selector already initialized above
-# oracle already initialized above
-# dataset already initialized above
-# image_filenames already initialized above
 iteration_count = 0
 def get_model_predictions(chosen_idx):
@@ -804,9 +778,34 @@ with gr.Blocks(title="CODA: Wildlife Photo Classification Challenge",
     # Set up button interactions
     def start_demo():
-        global iteration_count, coda_selector
         # Reset the demo state
         iteration_count = 0
         coda_selector = CODA(dataset)
         image, status, predictions = get_next_coda_image()
@@ -817,9 +816,34 @@ with gr.Blocks(title="CODA: Wildlife Photo Classification Challenge",
         return image, status_html, predictions, prob_plot, acc_plot, gr.update(visible=False), "", gr.update(visible=True)
     def start_over():
-        global iteration_count, coda_selector
         # Reset the demo state
         iteration_count = 0
         coda_selector = CODA(dataset)
         # Reset all displays

 # Load image filenames list
 with open('images.txt', 'r') as f:
+    full_image_filenames = [line.strip() for line in f.readlines() if line.strip()]
+# Initialize full dataset (will be subsampled per-user)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load full dataset
 full_preds = torch.load("iwildcam_demo.pt").to(device)
 full_labels = torch.load("iwildcam_demo_labels.pt").to(device)
+# Pre-compute class indices for subsampling
 from collections import defaultdict
+full_class_to_indices = defaultdict(list)
 for idx, label in enumerate(full_labels):
     class_idx = label.item()
+    full_class_to_indices[class_idx].append(idx)
 # Find minimum class size
+min_class_size = min(len(indices) for indices in full_class_to_indices.values())
+print(f"Each user will get {min_class_size} images per class (total: {min_class_size * len(full_class_to_indices)} images per user)")
+# Loss function for oracle
 loss_fn = LOSS_FNS['acc']
+# Global state (will be set per-user in start_demo)
 current_image_info = None
+coda_selector = None
+oracle = None
+dataset = None
+image_filenames = None
 iteration_count = 0
 def get_model_predictions(chosen_idx):
     # Set up button interactions
     def start_demo():
+        global iteration_count, coda_selector, dataset, oracle, image_filenames
         # Reset the demo state
         iteration_count = 0
+        # Subsample dataset for this user
+        subsampled_indices = []
+        for class_idx in sorted(full_class_to_indices.keys()):
+            indices = full_class_to_indices[class_idx]
+            sampled = np.random.choice(indices, size=min_class_size, replace=False)
+            subsampled_indices.extend(sampled.tolist())
+        # Sort indices to maintain order
+        subsampled_indices.sort()
+        # Create subsampled dataset for this user
+        subsampled_preds = full_preds[:, subsampled_indices, :]
+        subsampled_labels = full_labels[subsampled_indices]
+        image_filenames = [full_image_filenames[idx] for idx in subsampled_indices]
+        # Create Dataset object with subsampled data
+        dataset = Dataset.__new__(Dataset)
+        dataset.preds = subsampled_preds
+        dataset.labels = subsampled_labels
+        dataset.device = device
+        # Create oracle and CODA selector for this user
+        oracle = Oracle(dataset, loss_fn=loss_fn)
         coda_selector = CODA(dataset)
         image, status, predictions = get_next_coda_image()
         return image, status_html, predictions, prob_plot, acc_plot, gr.update(visible=False), "", gr.update(visible=True)
     def start_over():
+        global iteration_count, coda_selector, dataset, oracle, image_filenames
         # Reset the demo state
         iteration_count = 0
+        # Subsample dataset for this user (new random subsample)
+        subsampled_indices = []
+        for class_idx in sorted(full_class_to_indices.keys()):
+            indices = full_class_to_indices[class_idx]
+            sampled = np.random.choice(indices, size=min_class_size, replace=False)
+            subsampled_indices.extend(sampled.tolist())
+        # Sort indices to maintain order
+        subsampled_indices.sort()
+        # Create subsampled dataset for this user
+        subsampled_preds = full_preds[:, subsampled_indices, :]
+        subsampled_labels = full_labels[subsampled_indices]
+        image_filenames = [full_image_filenames[idx] for idx in subsampled_indices]
+        # Create Dataset object with subsampled data
+        dataset = Dataset.__new__(Dataset)
+        dataset.preds = subsampled_preds
+        dataset.labels = subsampled_labels
+        dataset.device = device
+        # Create oracle and CODA selector for this user
+        oracle = Oracle(dataset, loss_fn=loss_fn)
         coda_selector = CODA(dataset)
         # Reset all displays

images.txt CHANGED Viewed

@@ -33,7 +33,6 @@
 8eb30b2a-21bc-11ea-a13a-137349068a90.jpg
 99005e3e-21bc-11ea-a13a-137349068a90.jpg
 86e3b2fa-21bc-11ea-a13a-137349068a90.jpg
-97c99b7a-21bc-11ea-a13a-137349068a90.jpg
 8f988d76-21bc-11ea-a13a-137349068a90.jpg
 9593f3a0-21bc-11ea-a13a-137349068a90.jpg
 988d1cbc-21bc-11ea-a13a-137349068a90.jpg
@@ -188,7 +187,6 @@
 98536d82-21bc-11ea-a13a-137349068a90.jpg
 8f4dd7f4-21bc-11ea-a13a-137349068a90.jpg
 8f88c6e8-21bc-11ea-a13a-137349068a90.jpg
-95ddfafe-21bc-11ea-a13a-137349068a90.jpg
 8aaabb04-21bc-11ea-a13a-137349068a90.jpg
 8768fa28-21bc-11ea-a13a-137349068a90.jpg
 9505e7fe-21bc-11ea-a13a-137349068a90.jpg

 8eb30b2a-21bc-11ea-a13a-137349068a90.jpg
 99005e3e-21bc-11ea-a13a-137349068a90.jpg
 86e3b2fa-21bc-11ea-a13a-137349068a90.jpg
 8f988d76-21bc-11ea-a13a-137349068a90.jpg
 9593f3a0-21bc-11ea-a13a-137349068a90.jpg
 988d1cbc-21bc-11ea-a13a-137349068a90.jpg
 98536d82-21bc-11ea-a13a-137349068a90.jpg
 8f4dd7f4-21bc-11ea-a13a-137349068a90.jpg
 8f88c6e8-21bc-11ea-a13a-137349068a90.jpg
 8aaabb04-21bc-11ea-a13a-137349068a90.jpg
 8768fa28-21bc-11ea-a13a-137349068a90.jpg
 9505e7fe-21bc-11ea-a13a-137349068a90.jpg

iwildcam_demo.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1880c2a8d2dc0297fe1f87ce3eb1875c228e1d35b6c81008d81db7ee990f4c2f
-size 77843

 version https://git-lfs.github.com/spec/v1
+oid sha256:b44bb58d31c2f26a17a754861ed39d30f38afd0a6a263c358b5dcfafbe287b21
+size 77715

iwildcam_demo_annotations.json CHANGED Viewed

@@ -404,17 +404,6 @@
       "file_name": "86e3b2fa-21bc-11ea-a13a-137349068a90.jpg",
       "seq_frame_num": 4
     },
-    {
-      "seq_num_frames": 10,
-      "location": 218,
-      "datetime": "2013-05-11 20:29:09.000",
-      "id": "97c99b7a-21bc-11ea-a13a-137349068a90",
-      "seq_id": "3019d1ce-7d42-11eb-8fb5-0242ac1c0002",
-      "width": 1920,
-      "height": 1080,
-      "file_name": "97c99b7a-21bc-11ea-a13a-137349068a90.jpg",
-      "seq_frame_num": 8
-    },
     {
       "seq_num_frames": 10,
       "location": 408,
@@ -2166,17 +2155,6 @@
       "file_name": "8f88c6e8-21bc-11ea-a13a-137349068a90.jpg",
       "seq_frame_num": 2
     },
-    {
-      "seq_num_frames": 10,
-      "location": 151,
-      "datetime": "2013-02-03 01:47:54.000",
-      "id": "95ddfafe-21bc-11ea-a13a-137349068a90",
-      "seq_id": "302607f0-7d42-11eb-8fb5-0242ac1c0002",
-      "width": 1920,
-      "height": 1080,
-      "file_name": "95ddfafe-21bc-11ea-a13a-137349068a90.jpg",
-      "seq_frame_num": 3
-    },
     {
       "seq_num_frames": 10,
       "location": 151,
@@ -16359,11 +16337,6 @@
       "image_id": "98c3c686-21bc-11ea-a13a-137349068a90",
       "category_id": 10
     },
-    {
-      "id": "9a6547bc-21bc-11ea-a13a-137349068a90",
-      "image_id": "97c99b7a-21bc-11ea-a13a-137349068a90",
-      "category_id": 10
-    },
     {
       "id": "a1d760ac-21bc-11ea-a13a-137349068a90",
       "image_id": "873ffbfa-21bc-11ea-a13a-137349068a90",
@@ -17894,11 +17867,6 @@
       "image_id": "8f88c6e8-21bc-11ea-a13a-137349068a90",
       "category_id": 101
     },
-    {
-      "id": "9cafbc0a-21bc-11ea-a13a-137349068a90",
-      "image_id": "95ddfafe-21bc-11ea-a13a-137349068a90",
-      "category_id": 101
-    },
     {
       "id": "a36c4fd6-21bc-11ea-a13a-137349068a90",
       "image_id": "8aaabb04-21bc-11ea-a13a-137349068a90",

       "file_name": "86e3b2fa-21bc-11ea-a13a-137349068a90.jpg",
       "seq_frame_num": 4
     },
     {
       "seq_num_frames": 10,
       "location": 408,
       "file_name": "8f88c6e8-21bc-11ea-a13a-137349068a90.jpg",
       "seq_frame_num": 2
     },
     {
       "seq_num_frames": 10,
       "location": 151,
       "image_id": "98c3c686-21bc-11ea-a13a-137349068a90",
       "category_id": 10
     },
     {
       "id": "a1d760ac-21bc-11ea-a13a-137349068a90",
       "image_id": "873ffbfa-21bc-11ea-a13a-137349068a90",
       "image_id": "8f88c6e8-21bc-11ea-a13a-137349068a90",
       "category_id": 101
     },
     {
       "id": "a36c4fd6-21bc-11ea-a13a-137349068a90",
       "image_id": "8aaabb04-21bc-11ea-a13a-137349068a90",

iwildcam_demo_images/95ddfafe-21bc-11ea-a13a-137349068a90.jpg DELETED Viewed

Git LFS Details

SHA256: 85327e363c6e7422675143412fb8b988e0e22c5bf4c21c941b6441b0a9e3f0b9
Pointer size: 131 Bytes
Size of remote file: 253 kB

iwildcam_demo_images/97c99b7a-21bc-11ea-a13a-137349068a90.jpg DELETED Viewed

Git LFS Details

SHA256: 9729db5cda5979069e507b7b4d9133bcb934beef36976d88a031330b2f166b63
Pointer size: 131 Bytes
Size of remote file: 225 kB

iwildcam_demo_labels.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2a97f8df143685109ade0335ae8c8afc0b74787a12d237a5647fdc4f75842ed
-size 11844

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6c3f2f49fe5b1dce7f4c9e45380fa68fe4040f004d9bcf7ff73bb3323d096f7
+size 11780