| import gradio as gr |
| from transformers import pipeline |
| import logging |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| MODEL_LINKS = { |
| "OpenAlex": "https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract", |
| "albertmartinez": "https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract" |
| } |
|
|
| |
| try: |
| model = pipeline("text-classification", |
| model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract") |
| model2 = pipeline("text-classification", |
| model="albertmartinez/openalex-topic-classification-title-abstract") |
| logger.info("Models loaded successfully") |
| except Exception as e: |
| logger.error(f"Error loading models: {str(e)}") |
| raise |
|
|
| def classify_text(text, top_k): |
| """ |
| Classify the given text using two different models. |
| |
| Args: |
| text (str): Text to classify in format "<TITLE> {title}\n<ABSTRACT> {abstract}" |
| top_k (int): Number of classifications to return |
| |
| Returns: |
| tuple: Two dictionaries with classifications from each model |
| """ |
| try: |
| if not text or not isinstance(text, str): |
| raise ValueError("Input text must be a non-empty string") |
| |
| if not isinstance(top_k, int) or top_k < 1: |
| raise ValueError("top_k must be a positive integer") |
| |
| results = [ |
| {p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)}, |
| {p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)} |
| ] |
| return results |
| except Exception as e: |
| logger.error(f"Classification error: {str(e)}") |
| raise gr.Error(f"Classification error: {str(e)}") |
|
|
| |
| EXAMPLE_TEXT = """<TITLE> Machine Learning Applications in Healthcare |
| <ABSTRACT> This paper explores the use of machine learning algorithms in healthcare systems for disease prediction and diagnosis.""" |
|
|
| demo = gr.Interface( |
| fn=classify_text, |
| inputs=[ |
| gr.Textbox( |
| lines=5, |
| label="Text", |
| placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}", |
| value=EXAMPLE_TEXT |
| ), |
| gr.Number( |
| label="Number of classifications (top_k)", |
| value=10, |
| precision=0, |
| minimum=1, |
| maximum=20 |
| ) |
| ], |
| outputs=[ |
| gr.Label(label="Model 1: OpenAlex"), |
| gr.Label(label="Model 2: albertmartinez") |
| ], |
| title="OpenAlex Topic Classification", |
| description=""" |
| Enter a text with title and abstract to get its topic classification. |
| |
| Input format: |
| ``` |
| <TITLE> Your title here |
| <ABSTRACT> Your abstract here |
| ``` |
| |
| The system uses two different models to provide a more robust classification: |
| |
| 1. [OpenAlex Model]({openalex_link}): Based on BERT multilingual model, fine-tuned on OpenAlex data |
| 2. [AlbertMartinez Model]({albert_link}): Based on BERT multilingual model, fine-tuned on [OpenAlex data](https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract) |
| |
| For more information about the models and their performance, visit their Hugging Face pages. |
| """.format( |
| openalex_link=MODEL_LINKS["OpenAlex"], |
| albert_link=MODEL_LINKS["albertmartinez"] |
| ), |
| examples=[ |
| [EXAMPLE_TEXT, 5], |
| ["<TITLE> Climate Change Impact\n<ABSTRACT> Study of global warming effects on biodiversity", 3] |
| ], |
| flagging_mode="never", |
| api_name="classify" |
| ) |
|
|
| if __name__ == "__main__": |
| logger.info(f"Gradio version: {gr.__version__}") |
| demo.launch() |
|
|