File size: 5,157 Bytes
0017ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
c54c85f
 
0017ff9
c54c85f
0017ff9
c54c85f
7c75556
c54c85f
0017ff9
c54c85f
0017ff9
c54c85f
0017ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c54c85f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0017ff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c54c85f
0017ff9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
os.environ["HF_HOME"] = "/tmp/hf-home"

import nltk
nltk.download("punkt", download_dir="/tmp/nltk_data")

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize
from transformers import pipeline
import numpy as np

# === Load Hugging Face Pipelines ===
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)

# === Summarization Functions ===
def summarize_review(text, max_len=60, min_len=10):
    """Transformer-based summarization (brief)"""
    return summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]

def smart_summarize(text, n_clusters=1):
    """TF-IDF Clustering based summarization"""
    sentences = sent_tokenize(text)
    if len(sentences) <= 1:
        return text

    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(sentences)

    if len(sentences) <= n_clusters:
        return " ".join(sentences)

    kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(tfidf_matrix)
    summary_sentences = []

    for i in range(n_clusters):
        idx = np.where(kmeans.labels_ == i)[0]
        if not len(idx):
            continue
        avg_vector = np.asarray(tfidf_matrix[idx].mean(axis=0))
        sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
        most_representative = sentences[idx[np.argmax(sim)]]
        summary_sentences.append(most_representative)

    return " ".join(sorted(summary_sentences, key=sentences.index))

# === Emotion Detection ===
def detect_emotion(text):
    try:
        result = emotion_pipeline(text)[0]
        return result["label"]
    except Exception:
        return "unknown"

# === Follow-up Q&A (single or multi) ===
def answer_followup(text, question, verbosity="brief"):
    try:
        if isinstance(question, list):
            answers = []
            for q in question:
                response = qa_pipeline({"question": q, "context": text})
                answer = response.get("answer", "")
                if verbosity.lower() == "detailed":
                    answers.append(f"**{q}** → {answer}")
                else:
                    answers.append(answer)
            return answers
        else:
            response = qa_pipeline({"question": question, "context": text})
            answer = response.get("answer", "")
            if verbosity.lower() == "detailed":
                return f"Based on the review, the answer is: **{answer}**"
            return answer
    except Exception:
        return "Sorry, I couldn't generate a follow-up answer."

# === Optional Explanation Generator ===
def generate_explanation(text):
    try:
        explanation = summarizer(text, max_length=60, min_length=20, do_sample=False)[0]["summary_text"]
        return f"🧠 This review can be explained as: {explanation}"
    except Exception:
        return "⚠️ Explanation could not be generated."

# === Industry Detector ===
def detect_industry(text):
    text = text.lower()
    if any(k in text for k in ["doctor", "hospital", "health", "pill", "med"]):
        return "Healthcare"
    if any(k in text for k in ["flight", "hotel", "trip", "booking"]):
        return "Travel"
    if any(k in text for k in ["bank", "loan", "credit", "payment"]):
        return "Banking"
    if any(k in text for k in ["gym", "trainer", "fitness", "workout"]):
        return "Fitness"
    if any(k in text for k in ["movie", "series", "stream", "video"]):
        return "Entertainment"
    if any(k in text for k in ["game", "gaming", "console"]):
        return "Gaming"
    if any(k in text for k in ["food", "delivery", "restaurant", "order"]):
        return "Food Delivery"
    if any(k in text for k in ["school", "university", "teacher", "course"]):
        return "Education"
    if any(k in text for k in ["insurance", "policy", "claim"]):
        return "Insurance"
    if any(k in text for k in ["property", "rent", "apartment", "house"]):
        return "Real Estate"
    if any(k in text for k in ["shop", "buy", "product", "phone", "amazon", "flipkart"]):
        return "E-commerce"
    return "Generic"

# === Product Category Detector ===
def detect_product_category(text):
    text = text.lower()
    if any(k in text for k in ["mobile", "smartphone", "iphone", "samsung", "phone"]):
        return "Mobile Devices"
    if any(k in text for k in ["laptop", "macbook", "notebook", "chromebook"]):
        return "Laptops"
    if any(k in text for k in ["tv", "refrigerator", "microwave", "washer"]):
        return "Home Appliances"
    if any(k in text for k in ["watch", "band", "fitbit", "wearable"]):
        return "Wearables"
    if any(k in text for k in ["app", "portal", "site", "website"]):
        return "Web App"
    return "General"