import os import glob import json import pandas as pd import gradio as gr from typing import Optional, Dict, List import time from functools import lru_cache ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) EVAL_RESULTS_DIR = os.path.join(ROOT_DIR, "eval-results") @lru_cache(maxsize=1) def _get_dataset_keys(results_dir: str) -> tuple: """Cache dataset keys to avoid repeated file scanning.""" all_dataset_keys = set() if not os.path.isdir(results_dir): return tuple() for path in glob.glob(os.path.join(results_dir, "*.json")): try: with open(path, "r", encoding="utf-8") as f: data = json.load(f) res = data.get("results", {}) all_dataset_keys.update(res.keys()) except Exception: continue return tuple(sorted(all_dataset_keys)) def load_results(results_dir: str) -> pd.DataFrame: """ Load and process evaluation results from JSON files. Dynamically handles any number of datasets with caching for performance. """ rows = [] all_dataset_keys = set(_get_dataset_keys(results_dir)) if not all_dataset_keys: return pd.DataFrame(columns=["Model", "Avg PER", "Avg Duration (s)"]) # Use dataset keys directly as display names dataset_display_names = {key: key for key in all_dataset_keys} # Single pass: extract data with optimized processing for path in glob.glob(os.path.join(results_dir, "*.json")): try: with open(path, "r", encoding="utf-8") as f: data = json.load(f) cfg = data.get("config", {}) res = data.get("results", {}) model_name = cfg.get("model_name", "unknown") # Extract PER for each dataset dynamically per_values = {} dur_values = [] for dataset_key in all_dataset_keys: dataset_data = res.get(dataset_key, {}) per_value = dataset_data.get("per") if dataset_data else None dur_value = dataset_data.get("avg_duration") if dataset_data else None display_name = dataset_display_names[dataset_key] per_values[f"PER {display_name}"] = per_value if dur_value is not None: dur_values.append(dur_value) # Calculate average PER across all datasets per_vals = [v for v in per_values.values() if v is not None] avg_per = sum(per_vals) / len(per_vals) if per_vals else None # Calculate average duration avg_dur = sum(dur_values) / len(dur_values) if dur_values else None row = { "Model": model_name, "Avg PER": avg_per, "Avg Duration (s)": avg_dur, "_file": os.path.basename(path), } row.update(per_values) rows.append(row) except Exception: continue df = pd.DataFrame(rows) if df.empty: # Create default columns based on discovered datasets default_cols = ["Model", "Avg PER", "Avg Duration (s)"] for key in sorted(all_dataset_keys): display_name = dataset_display_names[key] default_cols.insert(-2, f"PER {display_name}") return pd.DataFrame(columns=default_cols) df = df.sort_values(by=["Avg PER"], ascending=True, na_position="last") return df.reset_index(drop=True) def build_interface(): """Build the optimized Gradio interface for the phoneme leaderboard.""" # Custom CSS for better styling custom_css = """ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; } .leaderboard-header { text-align: center; margin-bottom: 2rem; } .stats-container { display: flex; gap: 1rem; margin-bottom: 1rem; flex-wrap: wrap; } .stat-card { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 10px; text-align: center; min-width: 150px; flex: 1; } .stat-value { font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem; } .stat-label { font-size: 0.9rem; opacity: 0.9; } .table-container { margin-top: 1rem; } .refresh-btn { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px; cursor: pointer; } """ with gr.Blocks( title="Phoneme Detection Leaderboard", css=custom_css, theme=gr.themes.Soft() ) as demo: # Header section with gr.Column(elem_classes="leaderboard-header"): gr.Markdown("# đ¯ Phoneme Detection Leaderboard") gr.Markdown("Compare phoneme recognition models across different datasets") # Stats section with gr.Row(elem_classes="stats-container"): total_models = gr.HTML( '