Spaces:

Curative
/

t5-summarizer-gradio

Sleeping

App Files Files Community

Curative commited on May 4

Commit

3241a33

verified ·

1 Parent(s): 0e8551f

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -114

app.py CHANGED Viewed

@@ -1,133 +1,44 @@
 import gradio as gr
-from transformers import pipeline, AutoTokenizer
-import torch
-# —— Lazy‑loaded pipelines & tokenizers —— #
-summarizer = sentiment = ner = classifier = None
-ner_tokenizer = None
-def get_summarizer():
-    global summarizer
-    if summarizer is None:
-        summarizer = pipeline(
-            "summarization",
-            model="Curative/t5-summarizer-cnn",
-            framework="pt"
-        )
-    return summarizer
-def get_sentiment():
-    global sentiment
-    if sentiment is None:
-        sentiment = pipeline(
-            "sentiment-analysis",
-            model="distilbert-base-uncased-finetuned-sst-2-english",
-            framework="pt"
-        )
-    return sentiment
-def get_classifier():
-    global classifier
-    if classifier is None:
-        classifier = pipeline(
-            "zero-shot-classification",
-            model="facebook/bart-large-mnli",
-            framework="pt"
-        )
-    return classifier
-def get_ner():
-    global ner, ner_tokenizer
-    if ner is None:
-        # Load Fast tokenizer explicitly for proper aggregation
-        ner_tokenizer = AutoTokenizer.from_pretrained(
-            "elastic/distilbert-base-uncased-finetuned-conll03-english",
-            use_fast=True
-        )
-        ner = pipeline(
-            "ner",
-            model="elastic/distilbert-base-uncased-finetuned-conll03-english",
-            tokenizer=ner_tokenizer,
-            aggregation_strategy="simple",
-            framework="pt"
-        )
-    return ner
-# —— Helper functions —— #
-def chunk_and_summarize(text: str) -> str:
-    """Split on sentences into ≤1,000 char chunks, summarize each, then join."""
-    summarizer = get_summarizer()
-    max_chunk = 1000
-    sentences = text.split(". ")
-    chunks, current = [], ""
-    for sent in sentences:
-        # +2 accounts for the period and space
-        if len(current) + len(sent) + 2 <= max_chunk:
-            current += sent + ". "
-        else:
-            chunks.append(current.strip())
-            current = sent + ". "
-    if current:
-        chunks.append(current.strip())
-    summaries = []
-    for chunk in chunks:
-        part = summarizer(
-            chunk,
-            max_length=150,
-            min_length=40,
-            do_sample=False
-        )[0]["summary_text"]
-        summaries.append(part)
-    return " ".join(summaries)
-def merge_entities(ents):
-    """Merge sub‑word tokens (##…) into full words."""
-    merged = []
-    for e in ents:
-        w, t = e["word"], e["entity_group"]
-        if w.startswith("##") and merged:
-            merged[-1]["word"] += w.replace("##", "")
-        else:
-            merged.append({"word": w, "type": t})
-    return merged
 def process(text, features):
-    out = {}
     if "Summarization" in features:
-        out["summary"] = chunk_and_summarize(text)  # :contentReference[oaicite:7]{index=7}
     if "Sentiment" in features:
-        s = get_sentiment()(text)[0]
-        out["sentiment"] = {"label": s["label"], "score": s["score"]}
     if "Classification" in features:
-        labels = ["technology","sports","business","politics",
-                  "health","science","travel","entertainment"]
-        cls = get_classifier()(text, candidate_labels=labels)
-        # Zip & sort
-        pairs = sorted(
-            zip(cls["labels"], cls["scores"]),
-            key=lambda x: x[1],
-            reverse=True
-        )
-        out["classification"] = [
-            {"label": lbl, "score": scr} for lbl, scr in pairs
-        ]
     if "Entities" in features:
-        ents = get_ner()(text)
-        out["entities"] = merge_entities(ents)        # :contentReference[oaicite:8]{index=8}
-    return out
-# —— Gradio UI ���— #
 with gr.Blocks() as demo:
-    gr.Markdown("## 🛠️ Multi‑Feature NLP Service")
-    inp = gr.Textbox(lines=8, placeholder="Enter your text here…")
     feats = gr.CheckboxGroup(
-        ["Summarization","Sentiment","Classification","Entities"],
         label="Select features to run"
     )
     btn = gr.Button("Run")
     out = gr.JSON(label="Results")
     btn.click(process, [inp, feats], out)
 demo.queue(api_open=True).launch()

 import gradio as gr
+from transformers import pipeline
+# Initialize pipelines
+sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+classification_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
+summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
+# Define candidate labels for classification
+candidate_labels = [
+    "technology", "sports", "business", "politics",
+    "health", "science", "travel", "entertainment"
+]
 def process(text, features):
+    result = {}
     if "Summarization" in features:
+        summary = summarization_pipeline(text, max_length=150, min_length=40, do_sample=False)
+        result["summary"] = summary[0]["summary_text"]
     if "Sentiment" in features:
+        sentiment = sentiment_pipeline(text)[0]
+        result["sentiment"] = {"label": sentiment["label"], "score": sentiment["score"]}
     if "Classification" in features:
+        classification = classification_pipeline(text, candidate_labels=candidate_labels)
+        result["classification"] = dict(zip(classification["labels"], classification["scores"]))
     if "Entities" in features:
+        entities = ner_pipeline(text)
+        result["entities"] = [{"word": entity["word"], "type": entity["entity_group"]} for entity in entities]
+    return result
+# Build Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## 🛠️ Multi-Feature NLP Service")
+    inp = gr.Textbox(lines=6, placeholder="Enter your text here…")
     feats = gr.CheckboxGroup(
+        ["Summarization", "Sentiment", "Classification", "Entities"],
         label="Select features to run"
     )
     btn = gr.Button("Run")
     out = gr.JSON(label="Results")
     btn.click(process, [inp, feats], out)
 demo.queue(api_open=True).launch()