Spaces:

AHAAM
/

LahjatBERT

Running

App Files Files Community

AHAAM commited on 24 days ago

Commit

5e25f9c

1 Parent(s): 62b019e

add models dropdown

Browse files

Files changed (2) hide show

.DS_Store +0 -0
app.py +46 -4

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -428,10 +428,25 @@ from svgpathtools import parse_path
 # ======================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ======================
-# Base multi-dialect model (B2BERT)
 # ======================
-base_model_name = "Mohamedelzeftawy/b2bert_baseline"
 base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name).to(DEVICE)
 base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
@@ -488,6 +503,24 @@ SVG_PATH = Path("assets/world-map.svg")
 SVG_NS = "http://www.w3.org/2000/svg"
 ET.register_namespace("", SVG_NS)
 def _merge_style(old_style: str, updates: dict) -> str:
     """
     Merge CSS style strings (e.g., "fill:#000;stroke:#fff") with updates dict.
@@ -693,13 +726,15 @@ def predict_dialects_with_confidence(text, threshold=0.3):
     return df
-def predict_wrapper(text, threshold):
     """
     Returns:
       df (table),
       summary (markdown),
       map_html (HTML)
     """
     df = predict_dialects_with_confidence(text, threshold)
     predicted_dialects = df[df["Prediction"] == "✓ Valid"]["Dialect"].tolist()
@@ -775,6 +810,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             text_input = gr.Textbox(
                 label="Arabic Text Input",
                 placeholder="أدخل نصًا عربيًا هنا... مثال: شلونك؟ / إزيك يا عم؟ / شو أخبارك؟",
@@ -870,7 +912,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     predict_button.click(
         fn=predict_wrapper,
-        inputs=[text_input, threshold_slider],
         outputs=[results_output, summary_output, map_output],
     )

 # ======================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# # ======================
+# # Base multi-dialect model (B2BERT)
+# # ======================
+# base_model_name = "Mohamedelzeftawy/b2bert_baseline"
+# base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name).to(DEVICE)
+# base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 # ======================
+# Multi-dialect model registry
 # ======================
+MODEL_CHOICES = {
+    "LahjatBERT": "Mohamedelzeftawy/b2bert_baseline",          # default (current)
+    "LahjatBERT-CL-ALDI": "Mohamedelzeftawy/b2bert_cl_aldi",
+    "LahjatBERT-CL-Cardinality": "Mohamedelzeftawy/b2bert_cl_cardinalty",
+}
+# Load default model at startup (LahjatBERT)
+_current_model_key = "LahjatBERT"
+base_model_name = MODEL_CHOICES[_current_model_key]
 base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name).to(DEVICE)
 base_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 SVG_NS = "http://www.w3.org/2000/svg"
 ET.register_namespace("", SVG_NS)
+def load_multidialect_model(model_key: str):
+    """
+    Load the selected multi-dialect model + tokenizer.
+    Uses global variables so the rest of your pipeline stays unchanged.
+    """
+    global base_model, base_tokenizer, base_model_name, _current_model_key
+    if model_key == _current_model_key:
+        return  # already loaded
+    repo = MODEL_CHOICES[model_key]
+    base_model_name = repo
+    base_model = AutoModelForSequenceClassification.from_pretrained(repo).to(DEVICE)
+    base_tokenizer = AutoTokenizer.from_pretrained(repo)
+    _current_model_key = model_key
 def _merge_style(old_style: str, updates: dict) -> str:
     """
     Merge CSS style strings (e.g., "fill:#000;stroke:#fff") with updates dict.
     return df
+def predict_wrapper(model_key, text, threshold):
     """
     Returns:
       df (table),
       summary (markdown),
       map_html (HTML)
     """
+    load_multidialect_model(model_key)
     df = predict_dialects_with_confidence(text, threshold)
     predicted_dialects = df[df["Prediction"] == "✓ Valid"]["Dialect"].tolist()
     with gr.Row():
         with gr.Column(scale=1):
+            model_dropdown = gr.Dropdown(
+                choices=list(MODEL_CHOICES.keys()),
+                value="LahjatBERT",
+                label="Model",
+                info="Select which LahjatBERT variant to use for prediction."
+            )
             text_input = gr.Textbox(
                 label="Arabic Text Input",
                 placeholder="أدخل نصًا عربيًا هنا... مثال: شلونك؟ / إزيك يا عم؟ / شو أخبارك؟",
     predict_button.click(
         fn=predict_wrapper,
+        inputs=[model_dropdown, text_input, threshold_slider],
         outputs=[results_output, summary_output, map_output],
     )