Curative commited on
Commit
0e8551f
·
verified ·
1 Parent(s): e8f73fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -35
app.py CHANGED
@@ -1,72 +1,133 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
 
4
- # Lazy‑load pipelines
5
- sentiment = classifier = ner = summarizer = None
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def get_sentiment():
8
  global sentiment
9
- if not sentiment:
10
- sentiment = pipeline("sentiment-analysis",
11
- model="distilbert-base-uncased-finetuned-sst-2-english")
 
 
 
12
  return sentiment
13
 
14
  def get_classifier():
15
  global classifier
16
- if not classifier:
17
  classifier = pipeline(
18
  "zero-shot-classification",
19
- model="facebook/bart-large-mnli")
 
 
20
  return classifier
21
 
22
  def get_ner():
23
- global ner
24
- if not ner:
25
- ner = pipeline("ner",
 
 
 
 
 
 
26
  model="elastic/distilbert-base-uncased-finetuned-conll03-english",
27
- aggregation_strategy="simple")
 
 
 
28
  return ner
29
 
30
- def get_summarizer():
31
- global summarizer
32
- if not summarizer:
33
- summarizer = pipeline("summarization",
34
- model="Curative/t5-summarizer-cnn")
35
- return summarizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def process(text, features):
38
- result = {}
39
  if "Summarization" in features:
40
- result["summary"] = get_summarizer()(
41
- text, max_length=150, min_length=40, do_sample=False
42
- )[0]["summary_text"]
43
  if "Sentiment" in features:
44
- sent = get_sentiment()(text)[0]
45
- result["sentiment"] = {"label": sent["label"], "score": sent["score"]}
46
  if "Classification" in features:
47
- candidate_labels = [
48
- "technology", "sports", "business", "politics",
49
- "health", "science", "travel", "entertainment"
 
 
 
 
 
 
 
 
50
  ]
51
- cls = get_classifier()(text, candidate_labels=candidate_labels)
52
- # Map labels → scores
53
- result["classification"] = dict(zip(cls["labels"], cls["scores"]))
54
  if "Entities" in features:
55
  ents = get_ner()(text)
56
- result["entities"] = [
57
- {"word": e["word"], "type": e["entity_group"]} for e in ents
58
- ]
59
- return result
60
 
 
61
  with gr.Blocks() as demo:
62
  gr.Markdown("## 🛠️ Multi‑Feature NLP Service")
63
- inp = gr.Textbox(lines=6, placeholder="Enter your text here…")
64
  feats = gr.CheckboxGroup(
65
  ["Summarization","Sentiment","Classification","Entities"],
66
  label="Select features to run"
67
  )
68
  btn = gr.Button("Run")
69
  out = gr.JSON(label="Results")
 
70
  btn.click(process, [inp, feats], out)
71
 
72
  demo.queue(api_open=True).launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer
3
+ import torch
4
 
5
+ # —— Lazy‑loaded pipelines & tokenizers —— #
6
+ summarizer = sentiment = ner = classifier = None
7
+ ner_tokenizer = None
8
+
9
+ def get_summarizer():
10
+ global summarizer
11
+ if summarizer is None:
12
+ summarizer = pipeline(
13
+ "summarization",
14
+ model="Curative/t5-summarizer-cnn",
15
+ framework="pt"
16
+ )
17
+ return summarizer
18
 
19
  def get_sentiment():
20
  global sentiment
21
+ if sentiment is None:
22
+ sentiment = pipeline(
23
+ "sentiment-analysis",
24
+ model="distilbert-base-uncased-finetuned-sst-2-english",
25
+ framework="pt"
26
+ )
27
  return sentiment
28
 
29
  def get_classifier():
30
  global classifier
31
+ if classifier is None:
32
  classifier = pipeline(
33
  "zero-shot-classification",
34
+ model="facebook/bart-large-mnli",
35
+ framework="pt"
36
+ )
37
  return classifier
38
 
39
  def get_ner():
40
+ global ner, ner_tokenizer
41
+ if ner is None:
42
+ # Load Fast tokenizer explicitly for proper aggregation
43
+ ner_tokenizer = AutoTokenizer.from_pretrained(
44
+ "elastic/distilbert-base-uncased-finetuned-conll03-english",
45
+ use_fast=True
46
+ )
47
+ ner = pipeline(
48
+ "ner",
49
  model="elastic/distilbert-base-uncased-finetuned-conll03-english",
50
+ tokenizer=ner_tokenizer,
51
+ aggregation_strategy="simple",
52
+ framework="pt"
53
+ )
54
  return ner
55
 
56
+ # —— Helper functions —— #
57
+ def chunk_and_summarize(text: str) -> str:
58
+ """Split on sentences into ≤1,000 char chunks, summarize each, then join."""
59
+ summarizer = get_summarizer()
60
+ max_chunk = 1000
61
+ sentences = text.split(". ")
62
+ chunks, current = [], ""
63
+ for sent in sentences:
64
+ # +2 accounts for the period and space
65
+ if len(current) + len(sent) + 2 <= max_chunk:
66
+ current += sent + ". "
67
+ else:
68
+ chunks.append(current.strip())
69
+ current = sent + ". "
70
+ if current:
71
+ chunks.append(current.strip())
72
+
73
+ summaries = []
74
+ for chunk in chunks:
75
+ part = summarizer(
76
+ chunk,
77
+ max_length=150,
78
+ min_length=40,
79
+ do_sample=False
80
+ )[0]["summary_text"]
81
+ summaries.append(part)
82
+ return " ".join(summaries)
83
+
84
+ def merge_entities(ents):
85
+ """Merge sub‑word tokens (##…) into full words."""
86
+ merged = []
87
+ for e in ents:
88
+ w, t = e["word"], e["entity_group"]
89
+ if w.startswith("##") and merged:
90
+ merged[-1]["word"] += w.replace("##", "")
91
+ else:
92
+ merged.append({"word": w, "type": t})
93
+ return merged
94
 
95
  def process(text, features):
96
+ out = {}
97
  if "Summarization" in features:
98
+ out["summary"] = chunk_and_summarize(text) # :contentReference[oaicite:7]{index=7}
 
 
99
  if "Sentiment" in features:
100
+ s = get_sentiment()(text)[0]
101
+ out["sentiment"] = {"label": s["label"], "score": s["score"]}
102
  if "Classification" in features:
103
+ labels = ["technology","sports","business","politics",
104
+ "health","science","travel","entertainment"]
105
+ cls = get_classifier()(text, candidate_labels=labels)
106
+ # Zip & sort
107
+ pairs = sorted(
108
+ zip(cls["labels"], cls["scores"]),
109
+ key=lambda x: x[1],
110
+ reverse=True
111
+ )
112
+ out["classification"] = [
113
+ {"label": lbl, "score": scr} for lbl, scr in pairs
114
  ]
 
 
 
115
  if "Entities" in features:
116
  ents = get_ner()(text)
117
+ out["entities"] = merge_entities(ents) # :contentReference[oaicite:8]{index=8}
118
+ return out
 
 
119
 
120
+ # —— Gradio UI —— #
121
  with gr.Blocks() as demo:
122
  gr.Markdown("## 🛠️ Multi‑Feature NLP Service")
123
+ inp = gr.Textbox(lines=8, placeholder="Enter your text here…")
124
  feats = gr.CheckboxGroup(
125
  ["Summarization","Sentiment","Classification","Entities"],
126
  label="Select features to run"
127
  )
128
  btn = gr.Button("Run")
129
  out = gr.JSON(label="Results")
130
+
131
  btn.click(process, [inp, feats], out)
132
 
133
  demo.queue(api_open=True).launch()