File size: 18,677 Bytes
aae8a37
 
 
 
 
 
 
 
 
 
 
b791a9a
 
 
 
 
 
aae8a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fde4e5
aae8a37
 
 
 
 
 
b258333
 
 
 
 
 
e6a9160
b258333
 
 
 
 
 
 
 
aae8a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
import streamlit as st
import pandas as pd
import numpy as np
import os
import re
from groq import Groq
import plotly.graph_objs as go
from collections import defaultdict
from itertools import cycle
import json
from dotenv import load_dotenv
PERSONA_PATH = os.getenv("PERSONA_PATH", "/tmp/personas.json")

# Set HuggingFace cache directories to /tmp for cloud hosting (permission safe)
os.environ["TRANSFORMERS_CACHE"] = os.getenv("TRANSFORMERS_CACHE", "/tmp/hf_cache")
os.environ["HF_HOME"] = os.getenv("HF_HOME", "/tmp/huggingface")
os.environ["HF_DATASETS_CACHE"] = os.getenv("HF_DATASETS_CACHE", "/tmp/huggingface")

# --- THEME COLORS ---
neon_blue = "#00fff7"
neon_green = "#7CFC00"
neon_pink = "#F72585"
neon_yellow = "#FFF600"
neon_bg = "#181830"
neon_orange = "#FFB347"
neon_dark = "#202037"
load_dotenv()  # load .env file

GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

# --- CONFIG ---
GROQ_MODEL = "llama3-70b-8192"
groq_client = Groq(api_key=GROQ_API_KEY)
PRODUCT_CONTEXT = (
    "You are an AI market research expert analyzing customer reviews for a chocolate-flavoured whey protein powder. "
    "Generate user personas based on patterns and diversity in the reviews."
)
CSV_PATH = "src/data_with_text.csv"

st.set_page_config(page_title="Persona Lab", layout="wide", initial_sidebar_state="collapsed")
st.markdown(
    "<h1 style='color:#00fff7;font-size:2.6rem;font-weight:900;letter-spacing:0.01em;margin-bottom:5px;'>🎭 Persona Lab</h1>", 
    unsafe_allow_html=True
)

# Set dark theme programmatically
st.markdown(
    """
    <style>
    body, .main, .stApp {
        background: #14151A !important;
        color: #fff !important;
    }
    </style>
    """,
    unsafe_allow_html=True
)


st.markdown(
    f"""
    <div style="font-size:1.21rem; color:#AC7CFF; font-weight:600; margin-top:-13px; margin-bottom:14px; line-height:1.5;">
        Ready to peek inside the minds of your customers?  
        This is your sandbox for uncovering who buys, why they rave, and what they crave—powered by real reviews and sharp AI.  
        Dive in, explore the personas that drive your market, and see your brand through their eyes (and taste buds)!
    </div>
    """,
    unsafe_allow_html=True
)

# --- NAVIGATION BUTTONS ---
st.markdown("""
    <style>
    .neon-btn {
        display:inline-block;
        font-weight:bold;
        padding:14px 32px;
        border:none;
        border-radius:12px;
        font-size:1.1em;
        margin-right:18px;
        cursor:pointer;
        box-shadow:0 0 14px #00fff777;
        color:#222 !important;
        background:linear-gradient(90deg,#7CFC00,#00fff7);
        text-decoration:none !important;
        transition: transform 0.08s;
    }
    .neon-btn-pink {
        background:linear-gradient(90deg,#F72585,#00fff7);
        color:#fff !important;
        box-shadow:0 0 14px #F7258577;
    }
    .neon-btn:hover {
        transform:scale(1.04);
        box-shadow:0 0 24px #00fff799;
    }
    .neon-btn-pink:hover {
        box-shadow:0 0 24px #F7258599;
    }
    </style>
""", unsafe_allow_html=True)

st.markdown("""
<div style="display:flex;gap:2em;justify-content:flex-start;">
    <a href="/prt111" class="neon-btn"target="_self">🏠 Home</a>
    <a href="/newprod" class="neon-btn neon-btn-pink"target="_self">🚀 New Product Launch</a>
</div>
<br>
""", unsafe_allow_html=True)


def block_markdown(text, color):
    text = text.replace('\n', '<br>')
    return (
        f'<div style="background:linear-gradient(90deg,{color}22,#181830 90%);'
        f'padding:16px 22px;border-radius:16px;margin:10px 0 24px 0;'
        f'font-weight:600;color:#fff;font-size:1.04em;line-height:1.6;box-shadow:0 2px 24px {color}19;">'
        f'{text}</div>'
    )

@st.cache_data(show_spinner=True)
def load_reviews(csv_path):
    if not os.path.exists(csv_path):
        st.error(f"CSV file not found: {csv_path}")
        return pd.DataFrame()
    df = pd.read_csv(csv_path)
    if "polarity" not in df.columns:
        try:
            from transformers import pipeline
            sa = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
            df["polarity"] = df["review_text"].apply(lambda x: 1 if sa(x)[0]["label"] == "POSITIVE" else -1)
        except Exception as e:
            st.warning("Could not compute sentiment scores. All reviews set to neutral (0).")
            df["polarity"] = 0

    if "review_length" not in df.columns:
        df["review_length"] = df["review_text"].apply(lambda x: len(str(x).split()))
    return df

def generate_personas(review_texts, n_personas=4):
    prompt = (
        f"Read the following customer reviews for a chocolate-flavored whey protein powder. "
        f"Based on the language, interests, and context, segment these users into {n_personas} distinct personas. "
        "For each persona, provide:\n"
        "1. Persona Name starting with emoji\n"
        "2. A one-line summary\n"
        "3. Five detailed bullet points describing their characteristics, needs, goals, or behaviors (each bullet should be specific and insightful, not generic).\n"
        "Give the answer as a numbered list, one for each persona. Format:\n"
        "1. [Emoji] Persona Name\nSummary: ...\n- ...\n- ...\n- ...\n- ...\n- ...\n"
        "\nREVIEWS:\n" +
        "\n".join(review_texts[:120])[:3600]
    )
    try:
        chat_completion = groq_client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[
                {"role": "system", "content": PRODUCT_CONTEXT},
                {"role": "user", "content": prompt}
            ],
            max_tokens=900,
            temperature=0.6,
        )
        return chat_completion.choices[0].message.content.strip()
    except Exception as e:
        return f"Error generating personas: {e}"

def parse_personas_bulletproof(llm_output, n=4):
    lines = llm_output.splitlines()
    persona_headers = []
    for i, line in enumerate(lines):
        if re.match(r"^([0-9]{1,2}[.)-]?\s*)?[\U0001F300-\U0001FAFF]", line.strip()):
            persona_headers.append(i)
    persona_blocks = []
    for idx, start in enumerate(persona_headers):
        end = persona_headers[idx+1] if idx+1 < len(persona_headers) else len(lines)
        persona_blocks.append(lines[start:end])

    personas = []
    for block in persona_blocks[:n]:
        name_line = re.sub(r"^([0-9]{1,2}[.)-]?\s*)?", "", block[0]).strip().replace("**", "")
        summary = ""
        bullets = []
        for l in block[1:]:
            l = l.strip()
            if not l: continue
            if not summary and ("summary" in l.lower() or not l.startswith(("-", "•", "*", "+"))):
                summary = re.sub(r"^summary[:\- ]*", "", l, flags=re.I)
            elif l.startswith(("-", "•", "*", "+")) or re.match(r"^[0-9]{1,2}[.)-]", l):
                b = re.sub(r"^[-•*+0-9. ]+", "", l)
                if b: bullets.append(b)
        personas.append({
            "name": name_line,
            "summary": summary,
            "bullets": bullets[:5]
        })
    return personas

def assign_review_to_persona_tfidf(df, persona_defs):
    # Use TF-IDF cosine similarity for assignment (faster than LLM for large data)
    from sklearn.feature_extraction.text import TfidfVectorizer
    persona_texts = [p["summary"] + " " + " ".join(p["bullets"]) for p in persona_defs]
    tfidf = TfidfVectorizer(stop_words='english')
    X = tfidf.fit_transform(df["review_text"].tolist() + persona_texts)
    review_vecs = X[:-len(persona_texts)]
    persona_vecs = X[-len(persona_texts):]
    assignments = []
    for i in range(review_vecs.shape[0]):
        sims = review_vecs[i].dot(persona_vecs.T).toarray().flatten()
        idx = np.argmax(sims)
        assignments.append(persona_defs[idx]["name"])
    return assignments

def groq_bullets_persona(chart_desc, chart_data_text):
    user_prompt = (
        f"Summarize as exactly two bullet points the main insights for this chart: {chart_desc}. "
        f"Here is the data: {chart_data_text}. "
        "Provide a percentage if applicable. Just facts."
    )
    try:
        chat_completion = groq_client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[
                {"role": "system", "content": PRODUCT_CONTEXT},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=80,
            temperature=0.5,
        )
        bullets = chat_completion.choices[0].message.content.strip()
        points = [line for line in bullets.splitlines() if line.strip().startswith(("-", "•"))]
        return "\n".join(points[:2]) if len(points) >= 2 else "- " + bullets
    except Exception:
        return "- Summary not available.\n- (LLM error)"

# --- EMOTION PIPELINE (optional) ---
def emotion_pipeline(df):
    try:
        from transformers import pipeline
        emo = pipeline(
            "text-classification",
            model="finiteautomata/bertweet-base-emotion-analysis",  # much smaller than roberta-base!
            top_k=None,
            device=-1  # always use CPU, avoid meta-tensor bug
        )
    except Exception as e:
        st.warning(f"Could not load emotion model, skipping emotion analysis: {e}")
        df["main_emotion"] = "neutral"
        return df
    all_emotions = []
    for t in df["review_text"]:
        try:
            emotions = emo(t[:512])
            if isinstance(emotions, list) and len(emotions) and isinstance(emotions[0], list):
                # Sometimes returns list of lists
                emotions = emotions[0]
            main_emo = sorted(emotions, key=lambda x: -x["score"])[0]["label"]
        except Exception:
            main_emo = "neutral"
        all_emotions.append(main_emo)
    df["main_emotion"] = all_emotions
    return df


# ========== MAIN PIPELINE ========== #

with st.spinner("🔎 Analyzing your data... Please wait a few moments."):
    df = load_reviews(CSV_PATH)
    reviews = df["review_text"].dropna().tolist() if not df.empty else []
    reviews = [t for t in reviews if "unreadable" not in t and "missing" not in t and t.strip()]
    if reviews:
        personas_raw = generate_personas(reviews, 4)
        personas = parse_personas_bulletproof(personas_raw, 4)
        if personas:
            with open(PERSONA_PATH, "w", encoding="utf-8") as f:
                json.dump(personas, f, ensure_ascii=False, indent=2)
            st.session_state['personas'] = personas
            st.success(f"{len(personas)} personas saved for next use.")
    else:
        personas = []

    persona_colors = [neon_green, neon_blue, neon_pink, neon_orange]
    persona_cycler = cycle(persona_colors)
    persona_blocks = []
    persona_names = []

    # Persona grid (left-right)
    if personas:
        st.markdown("<br>", unsafe_allow_html=True)
        grid_cols = st.columns(2)
        for i, p in enumerate(personas):
            c = next(persona_cycler)
            col = grid_cols[i%2]
            with col:
                st.markdown(
                    f"<div style='background:linear-gradient(90deg,{c}18,#181830 95%);"
                    "padding:24px 26px 16px 26px;border-radius:18px;margin-bottom:24px;"
                    f"box-shadow:0 2px 22px {c}22;'>"
                    f"<h2 style='color:{c};margin-bottom:0.18em'>{p['name']}</h2>"
                    f"<div style='color:#fff;font-size:1.15em;font-weight:500;margin-bottom:10px'>Summary: {p['summary']}</div>"
                    f"<div style='color:{neon_pink};font-weight:700;font-size:1.08em;margin-bottom:2px'>Characteristics</div>"
                    f"<ul style='font-size:1.02em;margin-top:3px'>{''.join([f'<li>{b}</li>' for b in p['bullets']])}</ul>"
                    "</div>", unsafe_allow_html=True
                )
            persona_names.append(p["name"])
        st.markdown("<hr>", unsafe_allow_html=True)

    if personas and len(reviews) > 0:
        # Assign reviews to persona via TF-IDF (fast)
        persona_for_review = assign_review_to_persona_tfidf(df, personas)
        df_reviews = df.copy()
        df_reviews = df_reviews.iloc[:len(persona_for_review)].copy()
        df_reviews["persona"] = persona_for_review

        # --- Generate all summary stats for new graphs
        # 1. Persona Review Share
        persona_counts = df_reviews["persona"].value_counts()
        # 2. Persona Sentiment
        avg_sentiment = df_reviews.groupby("persona")["polarity"].mean()
        # 3. Persona Review Length
        avg_length = df_reviews.groupby("persona")["review_length"].mean()
        # 4. Persona Emotion (optional)
        if "main_emotion" not in df_reviews.columns:
            df_reviews = emotion_pipeline(df_reviews)
        emo_dist = df_reviews.groupby("persona")["main_emotion"].value_counts().unstack().fillna(0)

        # --- Row 1: Pie and Sentiment Bar
        c1, c2 = st.columns(2)
        with c1:
            st.markdown("<h3 style='color:#fff;font-size:2rem;font-weight:700;'>Sales/Review Share by Persona</h3>", unsafe_allow_html=True)
            fig = go.Figure(data=[go.Pie(labels=persona_counts.index, values=persona_counts.values, hole=0.45)])
            fig.update_traces(textinfo='percent+label')
            st.plotly_chart(fig, use_container_width=True)
            st.markdown(block_markdown(
                groq_bullets_persona("Sales/Review Share by Persona", persona_counts.to_dict()), neon_green
            ), unsafe_allow_html=True)

        with c2:
            st.markdown("<h3 style='color:#fff;font-size:2rem;font-weight:700;'>Average Sentiment by Persona</h3>", unsafe_allow_html=True)
            fig2 = go.Figure(data=[go.Bar(x=avg_sentiment.index, y=avg_sentiment.values, marker=dict(color=[neon_green, neon_blue, neon_pink, neon_orange]))])
            fig2.update_layout(xaxis_title="Persona", yaxis_title="Avg Sentiment", font=dict(size=15))
            st.plotly_chart(fig2, use_container_width=True)
            st.markdown(block_markdown(
                groq_bullets_persona("Average Sentiment by Persona", avg_sentiment.to_dict()), neon_blue
            ), unsafe_allow_html=True)

        # --- Row 2: Review Length and Emotion Distribution
        c3, c4 = st.columns(2)
        with c3:
            st.markdown("<h3 style='color:#fff;font-size:2rem;font-weight:700;'>Persona vs. Review Length Distribution</h3>", unsafe_allow_html=True)
            fig3 = go.Figure(data=[go.Bar(x=avg_length.index, y=avg_length.values, marker=dict(color=[neon_green, neon_blue, neon_pink, neon_orange]))])
            fig3.update_layout(xaxis_title="Persona", yaxis_title="Avg Review Length", font=dict(size=15))
            st.plotly_chart(fig3, use_container_width=True)
            st.markdown(block_markdown(
                groq_bullets_persona("Average review length (words) by persona", avg_length.to_dict()), neon_orange
            ), unsafe_allow_html=True)

        with c4:
            st.markdown("<h3 style='color:#fff;font-size:2rem;font-weight:700;'>Persona vs. Emotion Distribution</h3>", unsafe_allow_html=True)
            fig4 = go.Figure()
            for idx, em in enumerate(emo_dist.columns):
                fig4.add_trace(go.Bar(name=em, x=emo_dist.index, y=emo_dist[em].values))
            fig4.update_layout(barmode='stack', xaxis_title="Persona", yaxis_title="Emotion Count", font=dict(size=15))
            st.plotly_chart(fig4, use_container_width=True)
            st.markdown(block_markdown(
                groq_bullets_persona("Distribution of primary emotions per persona", emo_dist.to_dict()), neon_pink
            ), unsafe_allow_html=True)

        # --- Persona-wise Highlights, grouped by persona with headings ---
st.markdown("<hr><h2 style='color:#fff'>Persona-wise Sentiment Highlights & Recommendations</h2>", unsafe_allow_html=True)
persona_grid = st.columns(2)

for idx, p in enumerate(personas):
    persona_df = df_reviews[df_reviews["persona"] == p["name"]]
    top_pos = persona_df[persona_df["polarity"] > 0]["review_text"].head(2).tolist()
    top_neg = persona_df[persona_df["polarity"] < 0]["review_text"].head(2).tolist()
    pos_summary = groq_bullets_persona(
        f"Summarize two main positive sentiment points, with percentage, for persona '{p['name']}'.",
        " ".join(top_pos)
    ) if top_pos else "No positive reviews."
    neg_summary = groq_bullets_persona(
        f"Summarize two main negative sentiment points, with percentage, for persona '{p['name']}'.",
        " ".join(top_neg)
    ) if top_neg else "No negative reviews."

    rec_prompt = (
    f"You are a product marketing strategist. "
    f"Based on the review highlights and persona details for '{p['name']}' "
    f"(do not repeat the characteristics), write one concise or mention name of user, actionable product or marketing recommendation. Dont put * anywhere "
    f"for the company to better engage this persona. "
    f"Focus on practical actions the business can take (such as messaging, offers, features, or campaigns). "
    f"Reply with 1-2 sentences, avoid restating the persona’s traits."
    )

    try:
        rec_out = groq_client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[
                {"role": "system", "content": PRODUCT_CONTEXT},
                {"role": "user", "content": rec_prompt}
            ],
            max_tokens=80, temperature=0.5
        ).choices[0].message.content.strip()
    except:
        rec_out = "No recommendation available."

    with persona_grid[idx % 2]:
        st.markdown(
            f"<div style='margin-bottom:38px;padding:18px 20px 8px 20px;border-radius:18px;"
            f"background:linear-gradient(90deg,{persona_colors[idx%4]}22,#181830 100%);box-shadow:0 2px 22px {persona_colors[idx%4]}18;'>"
            f"<h2 style='color:{persona_colors[idx%4]};font-size:1.35em;margin-bottom:0.3em'>{p['name']}</h2>"
            f"<div style='color:#fff;font-size:1.13em;font-weight:400;margin-bottom:14px;'>{p['summary']}</div>"
            "<div style='margin-bottom:16px'>"
            f"<b style='color:{neon_green};font-size:1.1em;'>Top Positive Sentiments:</b><br>{block_markdown(pos_summary, neon_green)}"
            "</div>"
            "<div style='margin-bottom:16px'>"
            f"<b style='color:{neon_pink};font-size:1.1em;'>Top Negative Sentiments:</b><br>{block_markdown(neg_summary, neon_pink)}"
            "</div>"
            "<div>"
            f"<b style='color:{neon_yellow};font-size:1.1em;'>Recommendation:</b><br>{block_markdown(rec_out, neon_yellow)}"
            "</div>"
            "</div>", unsafe_allow_html=True
        )

   
st.markdown("---")