🧠 AI NLP Toolkit

import streamlit as st
import requests
import time
import re
import pandas as pd

# ==== CONFIG ====
API_KEY = st.secrets["API_KEY"]  # Reads from Hugging Face / .streamlit/secrets.toml
HEADERS = {"Authorization": f"Bearer {API_KEY}"}

API_URLS = {
    "Summarizer": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
    "Sentiment": "https://api-inference.huggingface.co/models/finiteautomata/bertweet-base-sentiment-analysis"
}

# ==== HELPERS ====
def query(api_url, payload):
    """Call Hugging Face inference API and return JSON or {'error':...}"""
    try:
        resp = requests.post(api_url, headers=HEADERS, json=payload, timeout=60)
        if resp.status_code != 200:
            return {"error": f"HTTP {resp.status_code}: {resp.text}"}
        return resp.json()
    except requests.exceptions.RequestException as e:
        return {"error": f"Request failed: {e}"}

def split_sentences(text: str):
    """
    Split text into sentences/phrases by ., ?, !, ;, :, comma and newlines.
    Keeps things simple and avoids external tokenizers.
    """
    # Split on end punctuation + whitespace, OR on newline, OR on comma followed by space
    parts = re.split(r'(?<=[.!?;:])\s+|\n+|(?<=,)\s+', text.strip())
    # Filter empties and strip
    return [p.strip() for p in parts if p and p.strip()]

def extract_scores_from_api_response(res):
    """
    Accepts HF response shapes like:
      - [{'label': 'NEG', 'score': 0.86}, ...]  OR
      - [[{'label': 'NEG','score':...}, ...]]  (nested list)
    Returns a dict with numeric neg, neu, pos (floats 0..1) or None on unexpected.
    """
    sentiments = None
    if isinstance(res, list):
        # nested list (common with some HF endpoints)
        if len(res) > 0 and isinstance(res[0], list):
            sentiments = res[0]
        # flat list
        elif len(res) > 0 and isinstance(res[0], dict):
            sentiments = res
    if sentiments is None:
        return None

    neg = neu = pos = 0.0
    for item in sentiments:
        lab = item.get("label", "").upper()
        sc = float(item.get("score", 0) or 0)
        if "NEG" in lab:
            neg = sc
        elif "NEU" in lab:
            neu = sc
        elif "POS" in lab:
            pos = sc
    return {"neg": neg, "neu": neu, "pos": pos}

# ==== STREAMLIT UI ====
st.set_page_config(page_title="NLP Toolkit", page_icon="🧠", layout="centered")
st.markdown("<h1 style='text-align: center; color: cyan;'>🧠 AI NLP Toolkit</h1>", unsafe_allow_html=True)
st.write("Summarization & sentence-wise Sentiment Analysis.")

tab1, tab2 = st.tabs(["📄 Summarizer", "📝 Sentiment Analysis"])

# --- Summarizer (unchanged) ---
with tab1:
    text = st.text_area("Enter text to summarize:", height=220)
    if st.button("Summarize📝"):
        if not text.strip():
            st.warning("Please enter text.")
        else:
            with st.spinner("Generating summary..."):
                time.sleep(0.8)
                out = query(API_URLS["Summarizer"], {"inputs": text})
            if "error" in out:
                st.error(out["error"])
            elif isinstance(out, list) and "summary_text" in out[0]:
                st.success("Summary ready")
                st.write(out[0]["summary_text"])
            else:
                st.error("Unexpected response from summarizer.")

# --- Sentiment Analysis (sentence-wise, table + average) ---
with tab2:
    text_sent = st.text_area("Enter text for sentiment analysis:", height=220, key="sent_text2")
    if st.button("Analyze Sentiment🧠"):
        if not text_sent.strip():
            st.warning("Please enter text.")
        else:
            sentences = split_sentences(text_sent)
            if len(sentences) == 0:
                st.warning("No sentences found after splitting.")
            else:
                rows = []
                total_neg = total_neu = total_pos = 0.0
                error_happened = False

                with st.spinner("Analyzing sentences..."):
                    time.sleep(0.5)
                    for i, s in enumerate(sentences, start=1):
                        res = query(API_URLS["Sentiment"], {"inputs": s})
                        if isinstance(res, dict) and "error" in res:
                            st.error(f"API error for sentence {i}: {res['error']}")
                            error_happened = True
                            break

                        scores = extract_scores_from_api_response(res)
                        if scores is None:
                            st.error(f"Unexpected response format for sentence {i}.")
                            error_happened = True
                            break

                        neg_pct = round(scores["neg"] * 100)
                        neu_pct = round(scores["neu"] * 100)
                        pos_pct = round(scores["pos"] * 100)

                        rows.append({
                            "#": i,
                            "Sentence": f'"{s}"',
                            "Negative": f"{neg_pct}%",
                            "Neutral": f"{neu_pct}%",
                            "Positive": f"{pos_pct}%"
                        })

                        total_neg += scores["neg"]
                        total_neu += scores["neu"]
                        total_pos += scores["pos"]

                if not error_happened:
                    n = len(sentences)
                    avg_neg = round((total_neg / n) * 100)
                    avg_neu = round((total_neu / n) * 100)
                    avg_pos = round((total_pos / n) * 100)

                    # Append average row
                    rows.append({
                        "#": "Avg",
                        "Sentence": "—",
                        "Negative": f"{avg_neg}%",
                        "Neutral": f"{avg_neu}%",
                        "Positive": f"{avg_pos}%"
                    })

                    df = pd.DataFrame(rows, columns=["#", "Sentence", "Negative", "Neutral", "Positive"])
                    st.table(df)