Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,8 @@
|
|
2 |
Hotel Review Analysis System for The Kimberley Hotel Hong Kong
|
3 |
ISOM5240 Group Project
|
4 |
|
5 |
-
|
6 |
analysis and aspect detection, then generates professional responses.
|
7 |
-
|
8 |
"""
|
9 |
|
10 |
import streamlit as st
|
@@ -15,14 +14,12 @@ from transformers import (
|
|
15 |
)
|
16 |
import torch
|
17 |
import re
|
18 |
-
import pyperclip
|
19 |
from langdetect import detect
|
20 |
|
21 |
# ===== CONSTANTS =====
|
22 |
-
MAX_CHARS = 500 #
|
23 |
|
24 |
# Supported languages with their display names
|
25 |
-
# Note: Chinese model handles both Mandarin and Cantonese text
|
26 |
SUPPORTED_LANGUAGES = {
|
27 |
'en': 'English',
|
28 |
'zh': 'Chinese',
|
@@ -33,70 +30,50 @@ SUPPORTED_LANGUAGES = {
|
|
33 |
}
|
34 |
|
35 |
# ===== ASPECT CONFIGURATION =====
|
36 |
-
# Dictionary mapping aspect categories to their keywords
|
37 |
-
# Used for both keyword matching and zero-shot classification
|
38 |
aspect_map = {
|
39 |
-
# Location
|
40 |
"location": ["location", "near", "close", "access", "transport", "distance", "area", "tsim sha tsui", "kowloon"],
|
41 |
"view": ["view", "scenery", "vista", "panorama", "outlook", "skyline"],
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
"room
|
46 |
-
|
47 |
-
|
48 |
-
"
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
"
|
53 |
-
"
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
"dining": ["breakfast", "dinner", "restaurant", "meal", "food", "buffet", "lunch"],
|
59 |
-
"bar": ["bar", "drinks", "cocktail", "wine", "lounge", "happy hour"],
|
60 |
-
"pool": ["pool", "swimming", "jacuzzi", "sun lounger", "deck", "towels"],
|
61 |
-
"spa": ["spa", "massage", "treatment", "relax", "wellness", "sauna"],
|
62 |
-
"fitness": ["gym", "fitness", "exercise", "workout", "training", "weights"],
|
63 |
-
|
64 |
-
# Technical aspects
|
65 |
-
"Wi-Fi": ["wifi", "internet", "connection", "online", "network", "speed"],
|
66 |
-
"AC": ["air conditioning", "AC", "temperature", "heating", "cooling", "ventilation"],
|
67 |
-
"elevator": ["elevator", "lift", "escalator", "vertical transport", "wait"],
|
68 |
-
|
69 |
-
# Value aspects
|
70 |
-
"pricing": ["price", "expensive", "cheap", "value", "rate", "cost", "worth"],
|
71 |
-
"extra charges": ["charge", "fee", "bill", "surcharge", "additional", "hidden"]
|
72 |
}
|
73 |
|
74 |
-
# Pre-defined professional responses for positive aspects
|
75 |
aspect_responses = {
|
76 |
-
"location": "We're delighted you enjoyed our prime location in
|
77 |
"view": "It's wonderful to hear you appreciated the views from your room.",
|
78 |
-
"room comfort": "Our team takes special care to ensure room comfort
|
79 |
-
|
|
|
|
|
80 |
}
|
81 |
|
82 |
-
# Improvement actions for negative aspects
|
83 |
improvement_actions = {
|
84 |
"AC": "have addressed the air conditioning issues",
|
85 |
"housekeeping": "have reviewed our cleaning procedures",
|
86 |
-
|
87 |
}
|
88 |
|
89 |
# ===== MODEL CONFIGURATION =====
|
90 |
-
# Helsinki-NLP translation models for supported language pairs
|
91 |
TRANSLATION_MODELS = {
|
92 |
-
|
93 |
-
'
|
94 |
-
'
|
95 |
-
'
|
96 |
-
'
|
97 |
-
'de-en': 'Helsinki-NLP/opus-mt-de-en', # German
|
98 |
-
|
99 |
-
# Translations from English (for responses)
|
100 |
'en-zh': 'Helsinki-NLP/opus-mt-en-zh',
|
101 |
'en-ja': 'Helsinki-NLP/opus-mt-en-ja',
|
102 |
'en-ko': 'Helsinki-NLP/opus-mt-en-ko',
|
@@ -104,82 +81,43 @@ TRANSLATION_MODELS = {
|
|
104 |
'en-de': 'Helsinki-NLP/opus-mt-en-de'
|
105 |
}
|
106 |
|
107 |
-
# ===== MODEL LOADING
|
108 |
@st.cache_resource
|
109 |
def load_sentiment_model():
|
110 |
-
"""
|
111 |
-
Load and cache the fine-tuned sentiment analysis model.
|
112 |
-
Uses a BERTweet model fine-tuned on hotel reviews.
|
113 |
-
Returns:
|
114 |
-
tuple: (model, tokenizer)
|
115 |
-
"""
|
116 |
model = AutoModelForSequenceClassification.from_pretrained("smtsead/fine_tuned_bertweet_hotel")
|
117 |
tokenizer = AutoTokenizer.from_pretrained('finiteautomata/bertweet-base-sentiment-analysis')
|
118 |
return model, tokenizer
|
119 |
|
120 |
@st.cache_resource
|
121 |
def load_aspect_classifier():
|
122 |
-
"""
|
123 |
-
Load and cache the zero-shot aspect classifier.
|
124 |
-
Uses DeBERTa model for multi-label aspect classification.
|
125 |
-
Returns:
|
126 |
-
pipeline: Zero-shot classification pipeline
|
127 |
-
"""
|
128 |
return pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33")
|
129 |
|
130 |
@st.cache_resource
|
131 |
def load_translation_model(src_lang, target_lang='en'):
|
132 |
-
"""
|
133 |
-
Load and cache the appropriate Helsinki-NLP translation model.
|
134 |
-
Args:
|
135 |
-
src_lang (str): Source language code
|
136 |
-
target_lang (str): Target language code (default 'en')
|
137 |
-
Returns:
|
138 |
-
pipeline: Translation pipeline
|
139 |
-
Raises:
|
140 |
-
ValueError: If language pair is not supported
|
141 |
-
"""
|
142 |
model_key = f"{src_lang}-{target_lang}"
|
143 |
if model_key not in TRANSLATION_MODELS:
|
144 |
raise ValueError(f"Unsupported translation: {src_lang}→{target_lang}")
|
145 |
return pipeline("translation", model=TRANSLATION_MODELS[model_key])
|
146 |
|
147 |
# ===== CORE FUNCTIONS =====
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
def translate_text(text, src_lang, target_lang='en'):
|
149 |
-
"""
|
150 |
-
Translate text between supported languages using Helsinki-NLP models.
|
151 |
-
Args:
|
152 |
-
text (str): Text to translate
|
153 |
-
src_lang (str): Source language code
|
154 |
-
target_lang (str): Target language code (default 'en')
|
155 |
-
Returns:
|
156 |
-
dict: Translation results or error message
|
157 |
-
"""
|
158 |
try:
|
159 |
if src_lang == target_lang:
|
160 |
return {'translation': text, 'source_lang': src_lang}
|
161 |
-
|
162 |
translator = load_translation_model(src_lang, target_lang)
|
163 |
result = translator(text)[0]['translation_text']
|
164 |
-
return {
|
165 |
-
'original': text,
|
166 |
-
'translation': result,
|
167 |
-
'source_lang': src_lang,
|
168 |
-
'target_lang': target_lang
|
169 |
-
}
|
170 |
except Exception as e:
|
171 |
return {'error': str(e)}
|
172 |
|
173 |
def analyze_sentiment(text, model, tokenizer):
|
174 |
-
"""
|
175 |
-
Perform sentiment analysis on text.
|
176 |
-
Args:
|
177 |
-
text (str): Text to analyze
|
178 |
-
model: Pretrained sentiment model
|
179 |
-
tokenizer: Corresponding tokenizer
|
180 |
-
Returns:
|
181 |
-
dict: Sentiment analysis results (label, confidence, sentiment)
|
182 |
-
"""
|
183 |
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
|
184 |
with torch.no_grad():
|
185 |
outputs = model(**inputs)
|
@@ -193,16 +131,6 @@ def analyze_sentiment(text, model, tokenizer):
|
|
193 |
}
|
194 |
|
195 |
def detect_aspects(text, aspect_classifier):
|
196 |
-
"""
|
197 |
-
Detect hotel aspects mentioned in text using two-stage approach:
|
198 |
-
1. Keyword matching to identify potential aspects
|
199 |
-
2. Zero-shot classification to confirm and score aspects
|
200 |
-
Args:
|
201 |
-
text (str): Text to analyze
|
202 |
-
aspect_classifier: Zero-shot classification pipeline
|
203 |
-
Returns:
|
204 |
-
list: Detected aspects with confidence scores
|
205 |
-
"""
|
206 |
relevant_aspects = []
|
207 |
text_lower = text.lower()
|
208 |
for aspect, keywords in aspect_map.items():
|
@@ -216,21 +144,10 @@ def detect_aspects(text, aspect_classifier):
|
|
216 |
multi_label=True,
|
217 |
hypothesis_template="This review discusses the hotel's {}."
|
218 |
)
|
219 |
-
return [(aspect, f"{score:.0%}") for aspect, score in
|
220 |
-
zip(result['labels'], result['scores']) if score > 0.6]
|
221 |
return []
|
222 |
|
223 |
def generate_response(sentiment, aspects, original_text):
|
224 |
-
"""
|
225 |
-
Generate professional response based on sentiment and aspects.
|
226 |
-
Args:
|
227 |
-
sentiment (dict): Sentiment analysis results
|
228 |
-
aspects (list): Detected aspects with scores
|
229 |
-
original_text (str): Original review text
|
230 |
-
Returns:
|
231 |
-
str: Generated response
|
232 |
-
"""
|
233 |
-
# Personalization - extract guest name if mentioned
|
234 |
guest_name = ""
|
235 |
name_match = re.search(r"(Mr\.|Ms\.|Mrs\.)\s(\w+)", original_text, re.IGNORECASE)
|
236 |
if name_match:
|
@@ -239,12 +156,11 @@ def generate_response(sentiment, aspects, original_text):
|
|
239 |
if sentiment['label'] == 1:
|
240 |
response = f"""Dear{guest_name if guest_name else ' Valued Guest'},
|
241 |
|
242 |
-
Thank you for choosing The Kimberley Hotel Hong Kong
|
243 |
|
244 |
-
# Add relevant aspect responses (limit to 2 most relevant)
|
245 |
added_aspects = set()
|
246 |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
247 |
-
if aspect in aspect_responses
|
248 |
response += "\n\n" + aspect_responses[aspect]
|
249 |
added_aspects.add(aspect)
|
250 |
if len(added_aspects) >= 2:
|
@@ -254,209 +170,120 @@ Thank you for choosing The Kimberley Hotel Hong Kong and for sharing your feedba
|
|
254 |
else:
|
255 |
response = f"""Dear{guest_name if guest_name else ' Guest'},
|
256 |
|
257 |
-
Thank you for your feedback.
|
258 |
|
259 |
-
# Add improvement actions (limit to 2 most relevant)
|
260 |
added_improvements = set()
|
261 |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
262 |
-
if aspect in improvement_actions
|
263 |
response += f"\n\nRegarding your comments about the {aspect}, we {improvement_actions[aspect]}."
|
264 |
added_improvements.add(aspect)
|
265 |
if len(added_improvements) >= 2:
|
266 |
break
|
267 |
|
268 |
-
response += "\n\nPlease
|
269 |
|
270 |
return response + "\nSam Tse\nGuest Relations Manager\nThe Kimberley Hotel Hong Kong"
|
271 |
|
272 |
# ===== STREAMLIT UI =====
|
273 |
def main():
|
274 |
-
"""Main application function for Streamlit interface"""
|
275 |
-
# Page configuration
|
276 |
st.set_page_config(
|
277 |
page_title="Kimberley Review Assistant",
|
278 |
page_icon="🏨",
|
279 |
layout="centered"
|
280 |
)
|
281 |
|
282 |
-
# Custom CSS styling
|
283 |
st.markdown("""
|
284 |
<style>
|
285 |
-
|
286 |
-
.
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
margin-bottom: 10px;
|
291 |
-
}
|
292 |
-
/* Subheader styling */
|
293 |
-
.subheader {
|
294 |
-
color: #666666;
|
295 |
-
font-size: 16px;
|
296 |
-
margin-bottom: 30px;
|
297 |
-
}
|
298 |
-
/* Language badge styling */
|
299 |
-
.badge {
|
300 |
-
background-color: #e6f2ff;
|
301 |
-
color: #003366;
|
302 |
-
padding: 3px 10px;
|
303 |
-
border-radius: 15px;
|
304 |
-
font-size: 14px;
|
305 |
-
display: inline-block;
|
306 |
-
margin: 0 5px 5px 0;
|
307 |
-
}
|
308 |
-
/* Character counter styling */
|
309 |
-
.char-counter {
|
310 |
-
font-size: 12px;
|
311 |
-
color: #666;
|
312 |
-
text-align: right;
|
313 |
-
margin-top: -15px;
|
314 |
-
margin-bottom: 15px;
|
315 |
-
}
|
316 |
-
/* Warning style for character limit */
|
317 |
-
.char-counter.warning {
|
318 |
-
color: #ff6b6b;
|
319 |
-
}
|
320 |
-
/* Result box styling */
|
321 |
-
.result-box {
|
322 |
-
border-left: 4px solid #003366;
|
323 |
-
padding: 15px;
|
324 |
-
background-color: #f9f9f9;
|
325 |
-
margin: 20px 0;
|
326 |
-
border-radius: 0 8px 8px 0;
|
327 |
-
white-space: pre-wrap;
|
328 |
-
}
|
329 |
-
/* Aspect badge styling */
|
330 |
-
.aspect-badge {
|
331 |
-
background-color: #e6f2ff;
|
332 |
-
color: #003366;
|
333 |
-
padding: 2px 8px;
|
334 |
-
border-radius: 4px;
|
335 |
-
font-size: 14px;
|
336 |
-
display: inline-block;
|
337 |
-
margin: 2px;
|
338 |
-
}
|
339 |
</style>
|
340 |
""", unsafe_allow_html=True)
|
341 |
|
342 |
-
# Application header
|
343 |
st.markdown('<div class="header">The Kimberley Hotel Hong Kong</div>', unsafe_allow_html=True)
|
344 |
st.markdown('<div class="subheader">Guest Review Analysis System</div>', unsafe_allow_html=True)
|
345 |
|
346 |
-
# Supported languages display
|
347 |
-
st.markdown("**Supported Review Languages:**")
|
348 |
-
lang_cols = st.columns(6)
|
349 |
-
for i, (code, name) in enumerate(SUPPORTED_LANGUAGES.items()):
|
350 |
-
lang_cols[i%6].markdown(f'<div class="badge">{name}</div>', unsafe_allow_html=True)
|
351 |
-
|
352 |
-
# Language selection dropdown
|
353 |
-
review_lang = st.selectbox(
|
354 |
-
"Select review language:",
|
355 |
-
options=list(SUPPORTED_LANGUAGES.keys()),
|
356 |
-
format_func=lambda x: SUPPORTED_LANGUAGES[x],
|
357 |
-
index=0
|
358 |
-
)
|
359 |
-
|
360 |
-
# Review input with character counter
|
361 |
review = st.text_area("**Paste Guest Review:**",
|
362 |
height=200,
|
363 |
max_chars=MAX_CHARS,
|
364 |
-
placeholder=f"Enter review
|
365 |
key="review_input")
|
366 |
|
367 |
-
# Character counter logic
|
368 |
char_count = len(st.session_state.review_input) if 'review_input' in st.session_state else 0
|
369 |
-
|
370 |
-
st.markdown(f'<div class="char-counter {char_class}">{char_count}/{MAX_CHARS} characters</div>',
|
371 |
unsafe_allow_html=True)
|
372 |
|
373 |
-
# Main analysis button
|
374 |
if st.button("Analyze & Generate Response", type="primary"):
|
375 |
if not review.strip():
|
376 |
st.error("Please enter a review")
|
377 |
return
|
378 |
|
379 |
-
# Enforce character limit
|
380 |
if char_count > MAX_CHARS:
|
381 |
-
st.warning(f"Review truncated to {MAX_CHARS} characters
|
382 |
review = review[:MAX_CHARS]
|
383 |
|
384 |
with st.spinner("Analyzing feedback..."):
|
385 |
try:
|
386 |
-
#
|
|
|
|
|
|
|
|
|
387 |
if review_lang != 'en':
|
388 |
translation = translate_text(review, review_lang, 'en')
|
389 |
if 'error' in translation:
|
390 |
st.error(f"Translation error: {translation['error']}")
|
391 |
return
|
392 |
analysis_text = translation['translation']
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
else:
|
394 |
analysis_text = review
|
395 |
|
396 |
-
#
|
397 |
sentiment_model, tokenizer = load_sentiment_model()
|
398 |
aspect_classifier = load_aspect_classifier()
|
399 |
|
400 |
-
# Perform analysis
|
401 |
sentiment = analyze_sentiment(analysis_text, sentiment_model, tokenizer)
|
402 |
aspects = detect_aspects(analysis_text, aspect_classifier)
|
403 |
response = generate_response(sentiment, aspects, analysis_text)
|
404 |
|
405 |
-
# Translate response back
|
406 |
if review_lang != 'en':
|
407 |
translation_back = translate_text(response, 'en', review_lang)
|
408 |
-
if 'error' not in translation_back
|
409 |
-
final_response = translation_back['translation']
|
410 |
-
else:
|
411 |
-
st.warning(f"Couldn't translate response back: {translation_back['error']}")
|
412 |
-
final_response = response
|
413 |
else:
|
414 |
final_response = response
|
415 |
|
416 |
-
# Store results in session state
|
417 |
-
st.session_state.analysis_results = {
|
418 |
-
'sentiment': sentiment,
|
419 |
-
'aspects': aspects,
|
420 |
-
'response': final_response,
|
421 |
-
'original_lang': review_lang
|
422 |
-
}
|
423 |
-
|
424 |
# Display results
|
425 |
st.divider()
|
426 |
|
427 |
-
# Sentiment analysis results
|
428 |
col1, col2 = st.columns(2)
|
429 |
with col1:
|
430 |
st.markdown("### Sentiment Analysis")
|
431 |
-
|
432 |
-
st.
|
433 |
-
st.caption(f"Confidence level: {sentiment['confidence']}")
|
434 |
|
435 |
-
# Detected aspects
|
436 |
with col2:
|
437 |
-
st.markdown("### Key Aspects
|
438 |
if aspects:
|
439 |
for aspect, score in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
440 |
-
st.markdown(f
|
441 |
else:
|
442 |
st.markdown("_No specific aspects detected_")
|
443 |
|
444 |
-
# Generated response
|
445 |
st.divider()
|
446 |
st.markdown("### Draft Response")
|
447 |
st.markdown(f'<div class="result-box">{final_response}</div>', unsafe_allow_html=True)
|
448 |
|
449 |
-
# Clipboard copy functionality
|
450 |
-
if st.button("Copy Response to Clipboard"):
|
451 |
-
try:
|
452 |
-
pyperclip.copy(final_response)
|
453 |
-
st.success("Response copied to clipboard!")
|
454 |
-
except Exception as e:
|
455 |
-
st.error(f"Could not copy to clipboard: {e}")
|
456 |
-
|
457 |
except Exception as e:
|
458 |
-
st.error(f"An error occurred
|
459 |
|
460 |
-
# Entry point
|
461 |
if __name__ == "__main__":
|
462 |
main()
|
|
|
2 |
Hotel Review Analysis System for The Kimberley Hotel Hong Kong
|
3 |
ISOM5240 Group Project
|
4 |
|
5 |
+
Automatically analyzes guest reviews in multiple languages, performs sentiment
|
6 |
analysis and aspect detection, then generates professional responses.
|
|
|
7 |
"""
|
8 |
|
9 |
import streamlit as st
|
|
|
14 |
)
|
15 |
import torch
|
16 |
import re
|
|
|
17 |
from langdetect import detect
|
18 |
|
19 |
# ===== CONSTANTS =====
|
20 |
+
MAX_CHARS = 500 # Character limit for reviews
|
21 |
|
22 |
# Supported languages with their display names
|
|
|
23 |
SUPPORTED_LANGUAGES = {
|
24 |
'en': 'English',
|
25 |
'zh': 'Chinese',
|
|
|
30 |
}
|
31 |
|
32 |
# ===== ASPECT CONFIGURATION =====
|
|
|
|
|
33 |
aspect_map = {
|
34 |
+
# Location
|
35 |
"location": ["location", "near", "close", "access", "transport", "distance", "area", "tsim sha tsui", "kowloon"],
|
36 |
"view": ["view", "scenery", "vista", "panorama", "outlook", "skyline"],
|
37 |
+
|
38 |
+
# Room
|
39 |
+
"room comfort": ["comfortable", "bed", "pillows", "mattress", "linens", "cozy"],
|
40 |
+
"room cleanliness": ["clean", "dirty", "spotless", "stains", "hygiene"],
|
41 |
+
|
42 |
+
# Service
|
43 |
+
"staff service": ["staff", "friendly", "helpful", "rude", "welcoming"],
|
44 |
+
"reception": ["reception", "check-in", "check-out", "front desk"],
|
45 |
+
|
46 |
+
# Facilities
|
47 |
+
"dining": ["breakfast", "dinner", "restaurant", "meal", "food"],
|
48 |
+
"spa": ["spa", "massage", "treatment", "relax"],
|
49 |
+
|
50 |
+
# Technical
|
51 |
+
"Wi-Fi": ["wifi", "internet", "connection"],
|
52 |
+
"AC": ["air conditioning", "AC", "temperature"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
}
|
54 |
|
|
|
55 |
aspect_responses = {
|
56 |
+
"location": "We're delighted you enjoyed our prime location in Tsim Sha Tsui.",
|
57 |
"view": "It's wonderful to hear you appreciated the views from your room.",
|
58 |
+
"room comfort": "Our team takes special care to ensure room comfort.",
|
59 |
+
"room cleanliness": "Your comments about cleanliness have been noted.",
|
60 |
+
"staff service": "Your feedback about our staff has been shared with the team.",
|
61 |
+
"dining": "We appreciate your comments about our dining options."
|
62 |
}
|
63 |
|
|
|
64 |
improvement_actions = {
|
65 |
"AC": "have addressed the air conditioning issues",
|
66 |
"housekeeping": "have reviewed our cleaning procedures",
|
67 |
+
"Wi-Fi": "are upgrading our network infrastructure"
|
68 |
}
|
69 |
|
70 |
# ===== MODEL CONFIGURATION =====
|
|
|
71 |
TRANSLATION_MODELS = {
|
72 |
+
'zh-en': 'Helsinki-NLP/opus-mt-zh-en',
|
73 |
+
'ja-en': 'Helsinki-NLP/opus-mt-ja-en',
|
74 |
+
'ko-en': 'Helsinki-NLP/opus-mt-ko-en',
|
75 |
+
'fr-en': 'Helsinki-NLP/opus-mt-fr-en',
|
76 |
+
'de-en': 'Helsinki-NLP/opus-mt-de-en',
|
|
|
|
|
|
|
77 |
'en-zh': 'Helsinki-NLP/opus-mt-en-zh',
|
78 |
'en-ja': 'Helsinki-NLP/opus-mt-en-ja',
|
79 |
'en-ko': 'Helsinki-NLP/opus-mt-en-ko',
|
|
|
81 |
'en-de': 'Helsinki-NLP/opus-mt-en-de'
|
82 |
}
|
83 |
|
84 |
+
# ===== MODEL LOADING =====
|
85 |
@st.cache_resource
|
86 |
def load_sentiment_model():
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
model = AutoModelForSequenceClassification.from_pretrained("smtsead/fine_tuned_bertweet_hotel")
|
88 |
tokenizer = AutoTokenizer.from_pretrained('finiteautomata/bertweet-base-sentiment-analysis')
|
89 |
return model, tokenizer
|
90 |
|
91 |
@st.cache_resource
|
92 |
def load_aspect_classifier():
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
return pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33")
|
94 |
|
95 |
@st.cache_resource
|
96 |
def load_translation_model(src_lang, target_lang='en'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
model_key = f"{src_lang}-{target_lang}"
|
98 |
if model_key not in TRANSLATION_MODELS:
|
99 |
raise ValueError(f"Unsupported translation: {src_lang}→{target_lang}")
|
100 |
return pipeline("translation", model=TRANSLATION_MODELS[model_key])
|
101 |
|
102 |
# ===== CORE FUNCTIONS =====
|
103 |
+
def detect_language(text):
|
104 |
+
try:
|
105 |
+
lang = detect(text)
|
106 |
+
return 'zh' if lang in ['zh', 'yue'] else lang if lang in SUPPORTED_LANGUAGES else 'en'
|
107 |
+
except:
|
108 |
+
return 'en'
|
109 |
+
|
110 |
def translate_text(text, src_lang, target_lang='en'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
try:
|
112 |
if src_lang == target_lang:
|
113 |
return {'translation': text, 'source_lang': src_lang}
|
|
|
114 |
translator = load_translation_model(src_lang, target_lang)
|
115 |
result = translator(text)[0]['translation_text']
|
116 |
+
return {'translation': result, 'source_lang': src_lang}
|
|
|
|
|
|
|
|
|
|
|
117 |
except Exception as e:
|
118 |
return {'error': str(e)}
|
119 |
|
120 |
def analyze_sentiment(text, model, tokenizer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
|
122 |
with torch.no_grad():
|
123 |
outputs = model(**inputs)
|
|
|
131 |
}
|
132 |
|
133 |
def detect_aspects(text, aspect_classifier):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
relevant_aspects = []
|
135 |
text_lower = text.lower()
|
136 |
for aspect, keywords in aspect_map.items():
|
|
|
144 |
multi_label=True,
|
145 |
hypothesis_template="This review discusses the hotel's {}."
|
146 |
)
|
147 |
+
return [(aspect, f"{score:.0%}") for aspect, score in zip(result['labels'], result['scores']) if score > 0.6]
|
|
|
148 |
return []
|
149 |
|
150 |
def generate_response(sentiment, aspects, original_text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
guest_name = ""
|
152 |
name_match = re.search(r"(Mr\.|Ms\.|Mrs\.)\s(\w+)", original_text, re.IGNORECASE)
|
153 |
if name_match:
|
|
|
156 |
if sentiment['label'] == 1:
|
157 |
response = f"""Dear{guest_name if guest_name else ' Valued Guest'},
|
158 |
|
159 |
+
Thank you for choosing The Kimberley Hotel Hong Kong."""
|
160 |
|
|
|
161 |
added_aspects = set()
|
162 |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
163 |
+
if aspect in aspect_responses:
|
164 |
response += "\n\n" + aspect_responses[aspect]
|
165 |
added_aspects.add(aspect)
|
166 |
if len(added_aspects) >= 2:
|
|
|
170 |
else:
|
171 |
response = f"""Dear{guest_name if guest_name else ' Guest'},
|
172 |
|
173 |
+
Thank you for your feedback."""
|
174 |
|
|
|
175 |
added_improvements = set()
|
176 |
for aspect, _ in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
177 |
+
if aspect in improvement_actions:
|
178 |
response += f"\n\nRegarding your comments about the {aspect}, we {improvement_actions[aspect]}."
|
179 |
added_improvements.add(aspect)
|
180 |
if len(added_improvements) >= 2:
|
181 |
break
|
182 |
|
183 |
+
response += "\n\nPlease contact us if we can assist further.\n\nSincerely,"
|
184 |
|
185 |
return response + "\nSam Tse\nGuest Relations Manager\nThe Kimberley Hotel Hong Kong"
|
186 |
|
187 |
# ===== STREAMLIT UI =====
|
188 |
def main():
|
|
|
|
|
189 |
st.set_page_config(
|
190 |
page_title="Kimberley Review Assistant",
|
191 |
page_icon="🏨",
|
192 |
layout="centered"
|
193 |
)
|
194 |
|
|
|
195 |
st.markdown("""
|
196 |
<style>
|
197 |
+
.header { color: #003366; font-size: 28px; font-weight: bold; margin-bottom: 10px; }
|
198 |
+
.subheader { color: #666666; font-size: 16px; margin-bottom: 30px; }
|
199 |
+
.char-counter { font-size: 12px; color: #666; text-align: right; margin-top: -15px; }
|
200 |
+
.char-counter.warning { color: #ff6b6b; }
|
201 |
+
.result-box { border-left: 4px solid #003366; padding: 15px; background-color: #f9f9f9; margin: 20px 0; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
</style>
|
203 |
""", unsafe_allow_html=True)
|
204 |
|
|
|
205 |
st.markdown('<div class="header">The Kimberley Hotel Hong Kong</div>', unsafe_allow_html=True)
|
206 |
st.markdown('<div class="subheader">Guest Review Analysis System</div>', unsafe_allow_html=True)
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
review = st.text_area("**Paste Guest Review:**",
|
209 |
height=200,
|
210 |
max_chars=MAX_CHARS,
|
211 |
+
placeholder=f"Enter review (max {MAX_CHARS} characters)...",
|
212 |
key="review_input")
|
213 |
|
|
|
214 |
char_count = len(st.session_state.review_input) if 'review_input' in st.session_state else 0
|
215 |
+
st.markdown(f'<div class="char-counter{" warning" if char_count > MAX_CHARS else ""}">{char_count}/{MAX_CHARS} characters</div>',
|
|
|
216 |
unsafe_allow_html=True)
|
217 |
|
|
|
218 |
if st.button("Analyze & Generate Response", type="primary"):
|
219 |
if not review.strip():
|
220 |
st.error("Please enter a review")
|
221 |
return
|
222 |
|
|
|
223 |
if char_count > MAX_CHARS:
|
224 |
+
st.warning(f"Review truncated to {MAX_CHARS} characters")
|
225 |
review = review[:MAX_CHARS]
|
226 |
|
227 |
with st.spinner("Analyzing feedback..."):
|
228 |
try:
|
229 |
+
# Auto-detect language
|
230 |
+
review_lang = detect_language(review)
|
231 |
+
st.info(f"Detected language: {SUPPORTED_LANGUAGES.get(review_lang, 'English')}")
|
232 |
+
|
233 |
+
# Translate if not English
|
234 |
if review_lang != 'en':
|
235 |
translation = translate_text(review, review_lang, 'en')
|
236 |
if 'error' in translation:
|
237 |
st.error(f"Translation error: {translation['error']}")
|
238 |
return
|
239 |
analysis_text = translation['translation']
|
240 |
+
|
241 |
+
with st.expander("View Translation"):
|
242 |
+
st.write("**Original Review:**")
|
243 |
+
st.write(review)
|
244 |
+
st.write("**English Translation:**")
|
245 |
+
st.write(translation['translation'])
|
246 |
else:
|
247 |
analysis_text = review
|
248 |
|
249 |
+
# Analyze text
|
250 |
sentiment_model, tokenizer = load_sentiment_model()
|
251 |
aspect_classifier = load_aspect_classifier()
|
252 |
|
|
|
253 |
sentiment = analyze_sentiment(analysis_text, sentiment_model, tokenizer)
|
254 |
aspects = detect_aspects(analysis_text, aspect_classifier)
|
255 |
response = generate_response(sentiment, aspects, analysis_text)
|
256 |
|
257 |
+
# Translate response back if needed
|
258 |
if review_lang != 'en':
|
259 |
translation_back = translate_text(response, 'en', review_lang)
|
260 |
+
final_response = translation_back['translation'] if 'error' not in translation_back else response
|
|
|
|
|
|
|
|
|
261 |
else:
|
262 |
final_response = response
|
263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
# Display results
|
265 |
st.divider()
|
266 |
|
|
|
267 |
col1, col2 = st.columns(2)
|
268 |
with col1:
|
269 |
st.markdown("### Sentiment Analysis")
|
270 |
+
st.markdown(f"{'✅' if sentiment['label'] == 1 else '⚠️'} **{sentiment['sentiment']}**")
|
271 |
+
st.caption(f"Confidence: {sentiment['confidence']}")
|
|
|
272 |
|
|
|
273 |
with col2:
|
274 |
+
st.markdown("### Key Aspects")
|
275 |
if aspects:
|
276 |
for aspect, score in sorted(aspects, key=lambda x: float(x[1][:-1]), reverse=True):
|
277 |
+
st.markdown(f"- {aspect} ({score})")
|
278 |
else:
|
279 |
st.markdown("_No specific aspects detected_")
|
280 |
|
|
|
281 |
st.divider()
|
282 |
st.markdown("### Draft Response")
|
283 |
st.markdown(f'<div class="result-box">{final_response}</div>', unsafe_allow_html=True)
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
except Exception as e:
|
286 |
+
st.error(f"An error occurred: {str(e)}")
|
287 |
|
|
|
288 |
if __name__ == "__main__":
|
289 |
main()
|