File size: 6,085 Bytes
99cae8f
 
 
8ae67d7
 
99cae8f
8ae67d7
 
99cae8f
 
 
 
 
 
 
8ae67d7
99cae8f
8ae67d7
99cae8f
8ae67d7
 
 
 
 
 
99cae8f
8ae67d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99cae8f
8ae67d7
 
99cae8f
 
 
8ae67d7
99cae8f
8ae67d7
 
99cae8f
8ae67d7
99cae8f
 
8ae67d7
 
 
 
 
 
 
99cae8f
8ae67d7
99cae8f
8ae67d7
99cae8f
8ae67d7
 
 
 
99cae8f
8ae67d7
 
 
99cae8f
8ae67d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
import requests
import time
import re
import pandas as pd

# ==== CONFIG ====
API_KEY = st.secrets["API_KEY"]  # Reads from Hugging Face / .streamlit/secrets.toml
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

API_URLS = {
    "Summarizer": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
    "Sentiment": "https://api-inference.huggingface.co/models/finiteautomata/bertweet-base-sentiment-analysis"
}

# ==== HELPERS ====
def query(api_url, payload):
    """Call Hugging Face inference API and return JSON or {'error':...}"""
    try:
        resp = requests.post(api_url, headers=HEADERS, json=payload, timeout=60)
        if resp.status_code != 200:
            return {"error": f"HTTP {resp.status_code}: {resp.text}"}
        return resp.json()
    except requests.exceptions.RequestException as e:
        return {"error": f"Request failed: {e}"}

def split_sentences(text: str):
    """
    Split text into sentences/phrases by ., ?, !, ;, :, comma and newlines.
    Keeps things simple and avoids external tokenizers.
    """
    # Split on end punctuation + whitespace, OR on newline, OR on comma followed by space
    parts = re.split(r'(?<=[.!?;:])\s+|\n+|(?<=,)\s+', text.strip())
    # Filter empties and strip
    return [p.strip() for p in parts if p and p.strip()]

def extract_scores_from_api_response(res):
    """
    Accepts HF response shapes like:
      - [{'label': 'NEG', 'score': 0.86}, ...]  OR
      - [[{'label': 'NEG','score':...}, ...]]  (nested list)
    Returns a dict with numeric neg, neu, pos (floats 0..1) or None on unexpected.
    """
    sentiments = None
    if isinstance(res, list):
        # nested list (common with some HF endpoints)
        if len(res) > 0 and isinstance(res[0], list):
            sentiments = res[0]
        # flat list
        elif len(res) > 0 and isinstance(res[0], dict):
            sentiments = res
    if sentiments is None:
        return None

    neg = neu = pos = 0.0
    for item in sentiments:
        lab = item.get("label", "").upper()
        sc = float(item.get("score", 0) or 0)
        if "NEG" in lab:
            neg = sc
        elif "NEU" in lab:
            neu = sc
        elif "POS" in lab:
            pos = sc
    return {"neg": neg, "neu": neu, "pos": pos}

# ==== STREAMLIT UI ====
st.set_page_config(page_title="NLP Toolkit", page_icon="🧠", layout="centered")
st.markdown("<h1 style='text-align: center; color: cyan;'>🧠 AI NLP Toolkit</h1>", unsafe_allow_html=True)
st.write("Summarization & sentence-wise Sentiment Analysis.")

tab1, tab2 = st.tabs(["📄 Summarizer", "📝 Sentiment Analysis"])

# --- Summarizer (unchanged) ---
with tab1:
    text = st.text_area("Enter text to summarize:", height=220)
    if st.button("Summarize📝"):
        if not text.strip():
            st.warning("Please enter text.")
        else:
            with st.spinner("Generating summary..."):
                time.sleep(0.8)
                out = query(API_URLS["Summarizer"], {"inputs": text})
            if "error" in out:
                st.error(out["error"])
            elif isinstance(out, list) and "summary_text" in out[0]:
                st.success("Summary ready")
                st.write(out[0]["summary_text"])
            else:
                st.error("Unexpected response from summarizer.")

# --- Sentiment Analysis (sentence-wise, table + average) ---
with tab2:
    text_sent = st.text_area("Enter text for sentiment analysis:", height=220, key="sent_text2")
    if st.button("Analyze Sentiment🧠"):
        if not text_sent.strip():
            st.warning("Please enter text.")
        else:
            sentences = split_sentences(text_sent)
            if len(sentences) == 0:
                st.warning("No sentences found after splitting.")
            else:
                rows = []
                total_neg = total_neu = total_pos = 0.0
                error_happened = False

                with st.spinner("Analyzing sentences..."):
                    time.sleep(0.5)
                    for i, s in enumerate(sentences, start=1):
                        res = query(API_URLS["Sentiment"], {"inputs": s})
                        if isinstance(res, dict) and "error" in res:
                            st.error(f"API error for sentence {i}: {res['error']}")
                            error_happened = True
                            break

                        scores = extract_scores_from_api_response(res)
                        if scores is None:
                            st.error(f"Unexpected response format for sentence {i}.")
                            error_happened = True
                            break

                        neg_pct = round(scores["neg"] * 100)
                        neu_pct = round(scores["neu"] * 100)
                        pos_pct = round(scores["pos"] * 100)

                        rows.append({
                            "#": i,
                            "Sentence": f'"{s}"',
                            "Negative": f"{neg_pct}%",
                            "Neutral": f"{neu_pct}%",
                            "Positive": f"{pos_pct}%"
                        })

                        total_neg += scores["neg"]
                        total_neu += scores["neu"]
                        total_pos += scores["pos"]

                if not error_happened:
                    n = len(sentences)
                    avg_neg = round((total_neg / n) * 100)
                    avg_neu = round((total_neu / n) * 100)
                    avg_pos = round((total_pos / n) * 100)

                    # Append average row
                    rows.append({
                        "#": "Avg",
                        "Sentence": "—",
                        "Negative": f"{avg_neg}%",
                        "Neutral": f"{avg_neu}%",
                        "Positive": f"{avg_pos}%"
                    })

                    df = pd.DataFrame(rows, columns=["#", "Sentence", "Negative", "Neutral", "Positive"])
                    st.table(df)