File size: 3,124 Bytes
9230ae1
 
 
 
359afe5
9230ae1
 
359afe5
 
 
 
9230ae1
 
 
 
 
 
 
 
 
359afe5
 
 
9230ae1
359afe5
 
 
9230ae1
359afe5
 
 
 
9230ae1
359afe5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9230ae1
 
 
 
 
 
 
 
 
 
359afe5
9230ae1
 
359afe5
 
 
9230ae1
 
359afe5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from dataclasses import dataclass, make_dataclass
from enum import Enum
import pandas as pd

from src.about import Tasks  # assume Tasks = [Task1, Task2, ...]

def fields(raw_class):
    return [
        v for k, v in raw_class.__dict__.items()
        if not (k.startswith("__") and k.endswith("__"))
    ]

@dataclass
class ColumnContent:
    name: str
    type: str
    displayed_by_default: bool
    hidden: bool = False
    never_hidden: bool = False

# -------------------------------------------------------------------
# Build leaderboard columns
# -------------------------------------------------------------------
auto_eval_column_dict = []

# Rank/Model/Badge
auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
auto_eval_column_dict.append(["badge", ColumnContent, ColumnContent("Badge", "str", True)])

# Per-dataset metrics
# Example: "PER ⬇️ (TIMIT)", "Avg Duration (s) (TIMIT)"
for task in Tasks:
    dataset_name = task.name   # short name
    col_base = task.value.col_name  # e.g. "PER ⬇️"
    # allow multiple metrics per dataset if needed
    auto_eval_column_dict.append([
        f"{dataset_name}_per",
        ColumnContent,
        ColumnContent(f"{col_base} ({dataset_name})", "number", True),
    ])
    auto_eval_column_dict.append([
        f"{dataset_name}_avg_duration",
        ColumnContent,
        ColumnContent(f"Avg Duration (s) ({dataset_name})", "number", True),
    ])

# Global average across datasets
auto_eval_column_dict.append([
    "average", ColumnContent, ColumnContent("Avg PER ⬇️ (All)", "number", True)
])

# Extra model info
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])

# Final dataclass
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

# -------------------------------------------------------------------
# Example: Create dataframe header
# -------------------------------------------------------------------
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]

df = pd.DataFrame(columns=[c.name for c in fields(AutoEvalColumn)])