albertmartinez commited on
Commit
e36317e
·
1 Parent(s): 836e6dc

Upgrade gradio

Browse files
Files changed (3) hide show
  1. README.md +63 -1
  2. app.py +91 -16
  3. requirements.txt +11 -2
README.md CHANGED
@@ -11,4 +11,66 @@ license: mit
11
  short_description: OpenAlex/bert-base-multilingual-cased-finetuned-openalex-top
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  short_description: OpenAlex/bert-base-multilingual-cased-finetuned-openalex-top
12
  ---
13
 
14
+ # OpenAlex Topic Classification
15
+
16
+ This application allows you to classify academic texts into different topics using machine learning models trained with OpenAlex data.
17
+
18
+ ## Features
19
+
20
+ - Classification of academic texts into multiple topics
21
+ - Uses two different models for more robust classification
22
+ - Easy-to-use web interface
23
+ - Support for structured title and abstract format
24
+
25
+ ## Requirements
26
+
27
+ - Python 3.7+
28
+ - Gradio 5.23.1
29
+ - Transformers (Hugging Face)
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install -r requirements.txt
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ 1. Run the application:
40
+ ```bash
41
+ python app.py
42
+ ```
43
+
44
+ 2. Open your browser at the address shown in the console (usually http://localhost:7860)
45
+
46
+ 3. Enter your text in the format:
47
+ ```
48
+ <TITLE> Your title here
49
+ <ABSTRACT> Your abstract here
50
+ ```
51
+
52
+ 4. Select the number of classifications you want to see (top_k)
53
+
54
+ 5. Click "Submit" to get the results
55
+
56
+ ## Models
57
+
58
+ The application uses two different models:
59
+ 1. [OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract](https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract)
60
+ - Based on BERT multilingual model
61
+ - Fine-tuned on OpenAlex data
62
+ - Supports multiple languages
63
+
64
+ 2. [albertmartinez/openalex-topic-classification-title-abstract](https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract)
65
+ - Based on BERT multilingual model
66
+ - Fine-tuned on OpenAlex data (https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)
67
+ - Supports multiple languages
68
+
69
+ ## License
70
+
71
+ MIT
72
+
73
+ ## References
74
+
75
+ - [OpenAlex](https://openalex.org/)
76
+ - [Hugging Face Spaces](https://huggingface.co/docs/hub/spaces-config-reference)
app.py CHANGED
@@ -1,33 +1,108 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
- # Define the models
5
- model = pipeline("text-classification",
6
- model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
7
 
8
- model2 = pipeline("text-classification",
9
- model="albertmartinez/openalex-topic-classification-title-abstract")
 
 
 
10
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def classify_text(text, top_k):
13
- return [
14
- {p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
15
- {p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
16
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  demo = gr.Interface(
19
  fn=classify_text,
20
- inputs=[gr.Textbox(lines=5, label="Text", placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}",
21
- value="<TITLE> {title}\n<ABSTRACT> {abstract}"),
22
- gr.Number(label="top_k", value=10, precision=0)],
23
- outputs=[gr.Label(label="Model 1: OpenAlex"),
24
- gr.Label(label="Model 2: AlbertMartinez")],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  title="OpenAlex Topic Classification",
26
- description="Enter a text and see the topic classification result!",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  flagging_mode="never",
28
  api_name="classify"
29
  )
30
 
31
  if __name__ == "__main__":
32
- print(gr.__version__)
33
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import logging
4
 
5
+ # Logging configuration
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
 
9
+ # Model information
10
+ MODEL_LINKS = {
11
+ "OpenAlex": "https://huggingface.co/OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract",
12
+ "albertmartinez": "https://huggingface.co/albertmartinez/openalex-topic-classification-title-abstract"
13
+ }
14
 
15
+ # Load models only once
16
+ try:
17
+ model = pipeline("text-classification",
18
+ model="OpenAlex/bert-base-multilingual-cased-finetuned-openalex-topic-classification-title-abstract")
19
+ model2 = pipeline("text-classification",
20
+ model="albertmartinez/openalex-topic-classification-title-abstract")
21
+ logger.info("Models loaded successfully")
22
+ except Exception as e:
23
+ logger.error(f"Error loading models: {str(e)}")
24
+ raise
25
 
26
  def classify_text(text, top_k):
27
+ """
28
+ Classify the given text using two different models.
29
+
30
+ Args:
31
+ text (str): Text to classify in format "<TITLE> {title}\n<ABSTRACT> {abstract}"
32
+ top_k (int): Number of classifications to return
33
+
34
+ Returns:
35
+ tuple: Two dictionaries with classifications from each model
36
+ """
37
+ try:
38
+ if not text or not isinstance(text, str):
39
+ raise ValueError("Input text must be a non-empty string")
40
+
41
+ if not isinstance(top_k, int) or top_k < 1:
42
+ raise ValueError("top_k must be a positive integer")
43
+
44
+ results = [
45
+ {p["label"]: p["score"] for p in model(text, top_k=top_k, truncation=True, max_length=512)},
46
+ {p["label"]: p["score"] for p in model2(text, top_k=top_k, truncation=True, max_length=512)}
47
+ ]
48
+ return results
49
+ except Exception as e:
50
+ logger.error(f"Classification error: {str(e)}")
51
+ raise gr.Error(f"Classification error: {str(e)}")
52
+
53
+ # Example text
54
+ EXAMPLE_TEXT = """<TITLE> Machine Learning Applications in Healthcare
55
+ <ABSTRACT> This paper explores the use of machine learning algorithms in healthcare systems for disease prediction and diagnosis."""
56
 
57
  demo = gr.Interface(
58
  fn=classify_text,
59
+ inputs=[
60
+ gr.Textbox(
61
+ lines=5,
62
+ label="Text",
63
+ placeholder="<TITLE> {title}\n<ABSTRACT> {abstract}",
64
+ value=EXAMPLE_TEXT
65
+ ),
66
+ gr.Number(
67
+ label="Number of classifications (top_k)",
68
+ value=10,
69
+ precision=0,
70
+ minimum=1,
71
+ maximum=20
72
+ )
73
+ ],
74
+ outputs=[
75
+ gr.Label(label="Model 1: OpenAlex"),
76
+ gr.Label(label="Model 2: albertmartinez")
77
+ ],
78
  title="OpenAlex Topic Classification",
79
+ description="""
80
+ Enter a text with title and abstract to get its topic classification.
81
+
82
+ Input format:
83
+ ```
84
+ <TITLE> Your title here
85
+ <ABSTRACT> Your abstract here
86
+ ```
87
+
88
+ The system uses two different models to provide a more robust classification:
89
+
90
+ 1. [OpenAlex Model]({openalex_link}): Based on BERT multilingual model, fine-tuned on OpenAlex data
91
+ 2. [AlbertMartinez Model]({albert_link}): Based on BERT multilingual model, fine-tuned on [OpenAlex data](https://huggingface.co/datasets/albertmartinez/openalex-topic-title-abstract)
92
+
93
+ For more information about the models and their performance, visit their Hugging Face pages.
94
+ """.format(
95
+ openalex_link=MODEL_LINKS["OpenAlex"],
96
+ albert_link=MODEL_LINKS["albertmartinez"]
97
+ ),
98
+ examples=[
99
+ [EXAMPLE_TEXT, 5],
100
+ ["<TITLE> Climate Change Impact\n<ABSTRACT> Study of global warming effects on biodiversity", 3]
101
+ ],
102
  flagging_mode="never",
103
  api_name="classify"
104
  )
105
 
106
  if __name__ == "__main__":
107
+ logger.info(f"Gradio version: {gr.__version__}")
108
  demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
- transformers
2
- torch
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.33.1
2
+ transformers>=4.41.0,<5.0.0
3
+ torch==2.3.1
4
+ torchvision==0.18.1
5
+ torchaudio==2.3.1
6
+ numpy==1.26.4
7
+ sentencepiece>=0.1.99
8
+ protobuf>=4.25.2
9
+ accelerate>=0.27.2
10
+ huggingface-hub>=0.20.3
11
+ sentence-transformers>=3.3.1