Spaces:
Running
Running
File size: 5,157 Bytes
0017ff9 c54c85f 0017ff9 c54c85f 0017ff9 c54c85f 7c75556 c54c85f 0017ff9 c54c85f 0017ff9 c54c85f 0017ff9 c54c85f 0017ff9 c54c85f 0017ff9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
os.environ["HF_HOME"] = "/tmp/hf-home"
import nltk
nltk.download("punkt", download_dir="/tmp/nltk_data")
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize
from transformers import pipeline
import numpy as np
# === Load Hugging Face Pipelines ===
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
emotion_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
# === Summarization Functions ===
def summarize_review(text, max_len=60, min_len=10):
"""Transformer-based summarization (brief)"""
return summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]["summary_text"]
def smart_summarize(text, n_clusters=1):
"""TF-IDF Clustering based summarization"""
sentences = sent_tokenize(text)
if len(sentences) <= 1:
return text
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(sentences)
if len(sentences) <= n_clusters:
return " ".join(sentences)
kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(tfidf_matrix)
summary_sentences = []
for i in range(n_clusters):
idx = np.where(kmeans.labels_ == i)[0]
if not len(idx):
continue
avg_vector = np.asarray(tfidf_matrix[idx].mean(axis=0))
sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
most_representative = sentences[idx[np.argmax(sim)]]
summary_sentences.append(most_representative)
return " ".join(sorted(summary_sentences, key=sentences.index))
# === Emotion Detection ===
def detect_emotion(text):
try:
result = emotion_pipeline(text)[0]
return result["label"]
except Exception:
return "unknown"
# === Follow-up Q&A (single or multi) ===
def answer_followup(text, question, verbosity="brief"):
try:
if isinstance(question, list):
answers = []
for q in question:
response = qa_pipeline({"question": q, "context": text})
answer = response.get("answer", "")
if verbosity.lower() == "detailed":
answers.append(f"**{q}** → {answer}")
else:
answers.append(answer)
return answers
else:
response = qa_pipeline({"question": question, "context": text})
answer = response.get("answer", "")
if verbosity.lower() == "detailed":
return f"Based on the review, the answer is: **{answer}**"
return answer
except Exception:
return "Sorry, I couldn't generate a follow-up answer."
# === Optional Explanation Generator ===
def generate_explanation(text):
try:
explanation = summarizer(text, max_length=60, min_length=20, do_sample=False)[0]["summary_text"]
return f"🧠 This review can be explained as: {explanation}"
except Exception:
return "⚠️ Explanation could not be generated."
# === Industry Detector ===
def detect_industry(text):
text = text.lower()
if any(k in text for k in ["doctor", "hospital", "health", "pill", "med"]):
return "Healthcare"
if any(k in text for k in ["flight", "hotel", "trip", "booking"]):
return "Travel"
if any(k in text for k in ["bank", "loan", "credit", "payment"]):
return "Banking"
if any(k in text for k in ["gym", "trainer", "fitness", "workout"]):
return "Fitness"
if any(k in text for k in ["movie", "series", "stream", "video"]):
return "Entertainment"
if any(k in text for k in ["game", "gaming", "console"]):
return "Gaming"
if any(k in text for k in ["food", "delivery", "restaurant", "order"]):
return "Food Delivery"
if any(k in text for k in ["school", "university", "teacher", "course"]):
return "Education"
if any(k in text for k in ["insurance", "policy", "claim"]):
return "Insurance"
if any(k in text for k in ["property", "rent", "apartment", "house"]):
return "Real Estate"
if any(k in text for k in ["shop", "buy", "product", "phone", "amazon", "flipkart"]):
return "E-commerce"
return "Generic"
# === Product Category Detector ===
def detect_product_category(text):
text = text.lower()
if any(k in text for k in ["mobile", "smartphone", "iphone", "samsung", "phone"]):
return "Mobile Devices"
if any(k in text for k in ["laptop", "macbook", "notebook", "chromebook"]):
return "Laptops"
if any(k in text for k in ["tv", "refrigerator", "microwave", "washer"]):
return "Home Appliances"
if any(k in text for k in ["watch", "band", "fitbit", "wearable"]):
return "Wearables"
if any(k in text for k in ["app", "portal", "site", "website"]):
return "Web App"
return "General"
|