Spaces:
Sleeping
Sleeping
File size: 6,085 Bytes
99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 99cae8f 8ae67d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import streamlit as st
import requests
import time
import re
import pandas as pd
# ==== CONFIG ====
API_KEY = st.secrets["API_KEY"] # Reads from Hugging Face / .streamlit/secrets.toml
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
API_URLS = {
"Summarizer": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
"Sentiment": "https://api-inference.huggingface.co/models/finiteautomata/bertweet-base-sentiment-analysis"
}
# ==== HELPERS ====
def query(api_url, payload):
"""Call Hugging Face inference API and return JSON or {'error':...}"""
try:
resp = requests.post(api_url, headers=HEADERS, json=payload, timeout=60)
if resp.status_code != 200:
return {"error": f"HTTP {resp.status_code}: {resp.text}"}
return resp.json()
except requests.exceptions.RequestException as e:
return {"error": f"Request failed: {e}"}
def split_sentences(text: str):
"""
Split text into sentences/phrases by ., ?, !, ;, :, comma and newlines.
Keeps things simple and avoids external tokenizers.
"""
# Split on end punctuation + whitespace, OR on newline, OR on comma followed by space
parts = re.split(r'(?<=[.!?;:])\s+|\n+|(?<=,)\s+', text.strip())
# Filter empties and strip
return [p.strip() for p in parts if p and p.strip()]
def extract_scores_from_api_response(res):
"""
Accepts HF response shapes like:
- [{'label': 'NEG', 'score': 0.86}, ...] OR
- [[{'label': 'NEG','score':...}, ...]] (nested list)
Returns a dict with numeric neg, neu, pos (floats 0..1) or None on unexpected.
"""
sentiments = None
if isinstance(res, list):
# nested list (common with some HF endpoints)
if len(res) > 0 and isinstance(res[0], list):
sentiments = res[0]
# flat list
elif len(res) > 0 and isinstance(res[0], dict):
sentiments = res
if sentiments is None:
return None
neg = neu = pos = 0.0
for item in sentiments:
lab = item.get("label", "").upper()
sc = float(item.get("score", 0) or 0)
if "NEG" in lab:
neg = sc
elif "NEU" in lab:
neu = sc
elif "POS" in lab:
pos = sc
return {"neg": neg, "neu": neu, "pos": pos}
# ==== STREAMLIT UI ====
st.set_page_config(page_title="NLP Toolkit", page_icon="🧠", layout="centered")
st.markdown("<h1 style='text-align: center; color: cyan;'>🧠 AI NLP Toolkit</h1>", unsafe_allow_html=True)
st.write("Summarization & sentence-wise Sentiment Analysis.")
tab1, tab2 = st.tabs(["📄 Summarizer", "📝 Sentiment Analysis"])
# --- Summarizer (unchanged) ---
with tab1:
text = st.text_area("Enter text to summarize:", height=220)
if st.button("Summarize📝"):
if not text.strip():
st.warning("Please enter text.")
else:
with st.spinner("Generating summary..."):
time.sleep(0.8)
out = query(API_URLS["Summarizer"], {"inputs": text})
if "error" in out:
st.error(out["error"])
elif isinstance(out, list) and "summary_text" in out[0]:
st.success("Summary ready")
st.write(out[0]["summary_text"])
else:
st.error("Unexpected response from summarizer.")
# --- Sentiment Analysis (sentence-wise, table + average) ---
with tab2:
text_sent = st.text_area("Enter text for sentiment analysis:", height=220, key="sent_text2")
if st.button("Analyze Sentiment🧠"):
if not text_sent.strip():
st.warning("Please enter text.")
else:
sentences = split_sentences(text_sent)
if len(sentences) == 0:
st.warning("No sentences found after splitting.")
else:
rows = []
total_neg = total_neu = total_pos = 0.0
error_happened = False
with st.spinner("Analyzing sentences..."):
time.sleep(0.5)
for i, s in enumerate(sentences, start=1):
res = query(API_URLS["Sentiment"], {"inputs": s})
if isinstance(res, dict) and "error" in res:
st.error(f"API error for sentence {i}: {res['error']}")
error_happened = True
break
scores = extract_scores_from_api_response(res)
if scores is None:
st.error(f"Unexpected response format for sentence {i}.")
error_happened = True
break
neg_pct = round(scores["neg"] * 100)
neu_pct = round(scores["neu"] * 100)
pos_pct = round(scores["pos"] * 100)
rows.append({
"#": i,
"Sentence": f'"{s}"',
"Negative": f"{neg_pct}%",
"Neutral": f"{neu_pct}%",
"Positive": f"{pos_pct}%"
})
total_neg += scores["neg"]
total_neu += scores["neu"]
total_pos += scores["pos"]
if not error_happened:
n = len(sentences)
avg_neg = round((total_neg / n) * 100)
avg_neu = round((total_neu / n) * 100)
avg_pos = round((total_pos / n) * 100)
# Append average row
rows.append({
"#": "Avg",
"Sentence": "—",
"Negative": f"{avg_neg}%",
"Neutral": f"{avg_neu}%",
"Positive": f"{avg_pos}%"
})
df = pd.DataFrame(rows, columns=["#", "Sentence", "Negative", "Neutral", "Positive"])
st.table(df)
|