Spaces:

albertmartinez
/

openalex-topic-classification

Running

App Files Files Community

albertmartinez commited on Jun 10

Commit

e36317e

1 Parent(s): 836e6dc

Upgrade gradio

Browse files

Files changed (3) hide show

README.md +63 -1
app.py +91 -16
requirements.txt +11 -2

README.md CHANGED Viewed

@@ -11,4 +11,66 @@ license: mit
 short_description: OpenAlex/bert-base-multilingual-cased-finetuned-openalex-top
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: OpenAlex/bert-base-multilingual-cased-finetuned-openalex-top
 ---
+# OpenAlex Topic Classification
+This application allows you to classify academic texts into different topics using machine learning models trained with OpenAlex data.
+## Features
+- Classification of academic texts into multiple topics
+- Uses two different models for more robust classification
+- Easy-to-use web interface
+- Support for structured title and abstract format
+## Requirements
+- Python 3.7+
+- Gradio 5.23.1
+- Transformers (Hugging Face)
+## Installation
+```bash
+pip install -r requirements.txt
+```
+## Usage
+1. Run the application:
+```bash
+python app.py
+```
+2. Open your browser at the address shown in the console (usually http://localhost:7860)
+3. Enter your text in the format:
+```
+<TITLE> Your title here
+<ABSTRACT> Your abstract here
+```
+4. Select the number of classifications you want to see (top_k)
+5. Click "Submit" to get the results
+## Models
+The application uses two different models:
+1. [OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract](https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract)
+   - Based on BERT multilingual model
+   - Fine-tuned on OpenAlex data
+   - Supports multiple languages
+2. [albertmartinez/openalex-topic-classification-title-abstract](https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract)
+   - Based on BERT multilingual model
+   - Fine-tuned on OpenAlex data (https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)
+   - Supports multiple languages
+## License
+MIT
+## References
+- [OpenAlex](https://openalex.org/)
+- [Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-config-reference)

app.py CHANGED Viewed

@@ -1,33 +1,108 @@
 import gradio as gr
 from transformers import pipeline
-# Define the models
-model = pipeline("text-classification",
-                 model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
-model2 = pipeline("text-classification",
-                 model="albertmartinez/openalex-topic-classification-title-abstract")
 def classify_text(text, top_k):
-    return [
-        {p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
-        {p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
-    ]
 demo = gr.Interface(
     fn=classify_text,
-    inputs=[gr.Textbox(lines=5, label="Text", placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}",
-                       value="<TITLE> {title}\n<ABSTRACT> {abstract}"),
-            gr.Number(label="top_k", value=10, precision=0)],
-    outputs=[gr.Label(label="Model 1: OpenAlex"),
-             gr.Label(label="Model 2: AlbertMartinez")],
     title="OpenAlex Topic Classification",
-    description="Enter a text and see the topic classification result!",
     flagging_mode="never",
     api_name="classify"
 )
 if __name__ == "__main__":
-    print(gr.__version__)
     demo.launch()

 import gradio as gr
 from transformers import pipeline
+import logging
+# Logging configuration
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Model information
+MODEL_LINKS = {
+    "OpenAlex": "https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract",
+    "albertmartinez": "https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract"
+}
+# Load models only once
+try:
+    model = pipeline("text-classification",
+                    model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
+    model2 = pipeline("text-classification",
+                     model="albertmartinez/openalex-topic-classification-title-abstract")
+    logger.info("Models loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading models: {str(e)}")
+    raise
 def classify_text(text, top_k):
+    """
+    Classify the given text using two different models.
+    Args:
+        text (str): Text to classify in format "<TITLE> {title}\n<ABSTRACT> {abstract}"
+        top_k (int): Number of classifications to return
+    Returns:
+        tuple: Two dictionaries with classifications from each model
+    """
+    try:
+        if not text or not isinstance(text, str):
+            raise ValueError("Input text must be a non-empty string")
+        if not isinstance(top_k, int) or top_k < 1:
+            raise ValueError("top_k must be a positive integer")
+        results = [
+            {p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
+            {p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
+        ]
+        return results
+    except Exception as e:
+        logger.error(f"Classification error: {str(e)}")
+        raise gr.Error(f"Classification error: {str(e)}")
+# Example text
+EXAMPLE_TEXT = """<TITLE> Machine Learning Applications in Healthcare
+<ABSTRACT> This paper explores the use of machine learning algorithms in healthcare systems for disease prediction and diagnosis."""
 demo = gr.Interface(
     fn=classify_text,
+    inputs=[
+        gr.Textbox(
+            lines=5,
+            label="Text",
+            placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}",
+            value=EXAMPLE_TEXT
+        ),
+        gr.Number(
+            label="Number of classifications (top_k)",
+            value=10,
+            precision=0,
+            minimum=1,
+            maximum=20
+        )
+    ],
+    outputs=[
+        gr.Label(label="Model 1: OpenAlex"),
+        gr.Label(label="Model 2: albertmartinez")
+    ],
     title="OpenAlex Topic Classification",
+    description="""
+    Enter a text with title and abstract to get its topic classification.
+    Input format:
+    ```
+    <TITLE> Your title here
+    <ABSTRACT> Your abstract here
+    ```
+    The system uses two different models to provide a more robust classification:
+    1. [OpenAlex Model]({openalex_link}): Based on BERT multilingual model, fine-tuned on OpenAlex data
+    2. [AlbertMartinez Model]({albert_link}): Based on BERT multilingual model, fine-tuned on [OpenAlex data](https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)
+    For more information about the models and their performance, visit their Hugging Face pages.
+    """.format(
+        openalex_link=MODEL_LINKS["OpenAlex"],
+        albert_link=MODEL_LINKS["albertmartinez"]
+    ),
+    examples=[
+        [EXAMPLE_TEXT, 5],
+        ["<TITLE> Climate Change Impact\n<ABSTRACT> Study of global warming effects on biodiversity", 3]
+    ],
     flagging_mode="never",
     api_name="classify"
 )
 if __name__ == "__main__":
+    logger.info(f"Gradio version: {gr.__version__}")
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,2 +1,11 @@
-transformers
-torch

+gradio==5.33.1
+transformers>=4.41.0,<5.0.0
+torch==2.3.1
+torchvision==0.18.1
+torchaudio==2.3.1
+numpy==1.26.4
+sentencepiece>=0.1.99
+protobuf>=4.25.2
+accelerate>=0.27.2
+huggingface-hub>=0.20.3
+sentence-transformers>=3.3.1