ghostai1 commited on
Commit
d32a7b1
·
verified ·
1 Parent(s): f9579ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -40
app.py CHANGED
@@ -6,41 +6,84 @@ import faiss
6
  import matplotlib.pyplot as plt
7
  import seaborn as sns
8
  import time
 
 
9
  import os
10
 
11
- # Sample FAQs (embedded in script for simplicity)
12
- faq_data = pd.DataFrame({
13
- 'question': [
14
- 'How do I reset my password?',
15
- 'What are your pricing plans?',
16
- 'How do I contact support?',
17
- None, # Junk data (null)
18
- 'How do I reset my password?' # Duplicate
19
- ],
20
- 'answer': [
21
- 'Go to the login page, click "Forgot Password," and follow the email instructions.',
22
- 'We offer Basic ($10/month), Pro ($50/month), and Enterprise (custom).',
23
- 'Email [email protected] or call +1-800-123-4567.',
24
- None, # Junk data
25
- 'Duplicate answer.' # Duplicate
26
- ]
27
- })
28
 
29
  # Data cleanup function
30
  def clean_faqs(df):
31
- df = df.dropna() # Remove nulls
32
- df = df[~df['question'].duplicated()] # Remove duplicates
33
- df = df[df['answer'].str.len() > 20] # Filter short answers
34
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Preprocess FAQs
37
- faq_data = clean_faqs(faq_data)
 
 
 
 
38
 
39
  # Initialize RAG components
40
- embedder = SentenceTransformer('all-MiniLM-L6-v2')
41
- embeddings = embedder.encode(faq_data['question'].tolist(), show_progress_bar=False)
42
- index = faiss.IndexFlatL2(embeddings.shape[1])
43
- index.add(embeddings.astype(np.float32))
 
 
 
44
 
45
  # RAG process
46
  def rag_process(query, k=2):
@@ -50,8 +93,11 @@ def rag_process(query, k=2):
50
  start_time = time.perf_counter()
51
 
52
  # Embed query
53
- query_embedding = embedder.encode([query], show_progress_bar=False)
54
- embed_time = time.perf_counter() - start_time
 
 
 
55
 
56
  # Retrieve FAQs
57
  start_time = time.perf_counter()
@@ -66,10 +112,10 @@ def rag_process(query, k=2):
66
 
67
  # Metrics
68
  metrics = {
69
- 'embed_time': embed_time * 1000, # ms
70
  'retrieval_time': retrieval_time * 1000,
71
  'generation_time': generation_time * 1000,
72
- 'accuracy': 95.0 if retrieved_faqs else 0.0 # Simulated
73
  }
74
 
75
  return response, retrieved_faqs, metrics
@@ -103,13 +149,23 @@ def plot_metrics(metrics):
103
 
104
  # Gradio interface
105
  def chat_interface(query):
106
- response, retrieved_faqs, metrics = rag_process(query)
107
- plot_path = plot_metrics(metrics)
108
-
109
- faq_text = "\n".join([f"Q: {faq['question']}\nA: {faq['answer']}" for faq in retrieved_faqs])
110
- cleanup_stats = f"Cleaned FAQs: {len(faq_data)} (removed {5 - len(faq_data)} junk entries)"
111
-
112
- return response, faq_text, cleanup_stats, plot_path
 
 
 
 
 
 
 
 
 
 
113
 
114
  # Dark theme CSS
115
  custom_css = """
@@ -120,8 +176,8 @@ body { background-color: #2a2a2a; color: #e0e0e0; }
120
  """
121
 
122
  with gr.Blocks(css=custom_css) as demo:
123
- gr.Markdown("# Crescendo CX Bot Demo")
124
- gr.Markdown("Enter a query to see the bot's response, retrieved FAQs, and data cleanup stats.")
125
 
126
  with gr.Row():
127
  query_input = gr.Textbox(label="Your Query", placeholder="e.g., How do I reset my password?")
 
6
  import matplotlib.pyplot as plt
7
  import seaborn as sns
8
  import time
9
+ import io
10
+ import re
11
  import os
12
 
13
+ # Embedded call center FAQs
14
+ csv_data = """question,answer,call_id,agent_id,timestamp,language
15
+ How do I reset my password?,Go to the login page, click "Forgot Password," and follow the email instructions.,12345,A001,2025-04-01 10:15:23,en
16
+ What are your pricing plans?,We offer Basic ($10/month), Pro ($50/month), and Enterprise (custom).,12346,A002,2025-04-01 10:17:45,en
17
+ How do I contact support?,Email [email protected] or call +1-800-123-4567.,12347,A003,2025-04-01 10:20:10,en
18
+ ,,12348,A001,2025-04-01 10:22:00,en
19
+ How do I reset my password?,Duplicate answer.,12349,A002,2025-04-01 10:25:30,en
20
+ help,Contact us.,12350,A004,2025-04-01 10:27:15,
21
+ What is the refund policy?,Refunds available within 30 days; contact support.,12351,A005,2025-04-01 10:30:00,es
22
+ Invalid query!!!,N/A,12352,A006,2025-04-01 10:32:45,en
23
+ How do I update my billing?,Log in, go to "Billing," and update your payment method.,,A007,2025-04-01 10:35:10,en
24
+ What are pricing plans?,Basic ($10/mo), Pro ($50/mo).,12353,A002,2025-04-01 10:37:20,en
25
+ """
 
 
 
 
26
 
27
  # Data cleanup function
28
  def clean_faqs(df):
29
+ original_count = len(df)
30
+ cleanup_details = {
31
+ 'original': original_count,
32
+ 'nulls_removed': 0,
33
+ 'duplicates_removed': 0,
34
+ 'short_removed': 0,
35
+ 'malformed_removed': 0
36
+ }
37
+
38
+ # Remove nulls
39
+ null_rows = df['question'].isna() | df['answer'].isna()
40
+ cleanup_details['nulls_removed'] = null_rows.sum()
41
+ df = df[~null_rows]
42
+
43
+ # Remove duplicates
44
+ duplicate_rows = df['question'].duplicated()
45
+ cleanup_details['duplicates_removed'] = duplicate_rows.sum()
46
+ df = df[~duplicate_rows]
47
+
48
+ # Remove short entries
49
+ short_rows = (df['question'].str.len() < 10) | (df['answer'].str.len() < 20)
50
+ cleanup_details['short_removed'] = short_rows.sum()
51
+ df = df[~short_rows]
52
+
53
+ # Remove malformed questions
54
+ malformed_rows = df['question'].str.contains(r'[!?]{2,}|\b(Invalid|N/A)\b', regex=True, case=False, na=False)
55
+ cleanup_details['malformed_removed'] = malformed_rows.sum()
56
+ df = df[~malformed_rows]
57
+
58
+ # Standardize text
59
+ df['answer'] = df['answer'].str.replace(r'\bmo\b', 'month', regex=True, case=False)
60
+ df['language'] = df['language'].fillna('en')
61
+
62
+ cleaned_count = len(df)
63
+ cleanup_details['cleaned'] = cleaned_count
64
+ cleanup_details['removed'] = original_count - cleaned_count
65
+
66
+ # Save cleaned CSV for modeling
67
+ cleaned_path = 'cleaned_call_center_faqs.csv'
68
+ df.to_csv(cleaned_path, index=False)
69
+
70
+ return df, cleanup_details
71
 
72
+ # Load and clean FAQs
73
+ try:
74
+ faq_data = pd.read_csv(io.StringIO(csv_data))
75
+ faq_data, cleanup_details = clean_faqs(faq_data)
76
+ except Exception as e:
77
+ raise Exception(f"Failed to load/clean FAQs: {str(e)}")
78
 
79
  # Initialize RAG components
80
+ try:
81
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
82
+ embeddings = embedder.encode(faq_data['question'].tolist(), show_progress_bar=False)
83
+ index = faiss.IndexFlatL2(embeddings.shape[1])
84
+ index.add(embeddings.astype(np.float32))
85
+ except Exception as e:
86
+ raise Exception(f"Failed to initialize RAG components: {str(e)}")
87
 
88
  # RAG process
89
  def rag_process(query, k=2):
 
93
  start_time = time.perf_counter()
94
 
95
  # Embed query
96
+ try:
97
+ query_embedding = embedder.encode([query], show_progress_bar=False)
98
+ embed_time = time.perf_counter() - start_time
99
+ except Exception as e:
100
+ return f"Error embedding query: {str(e)}", [], {}
101
 
102
  # Retrieve FAQs
103
  start_time = time.perf_counter()
 
112
 
113
  # Metrics
114
  metrics = {
115
+ 'embed_time': embed_time * 1000,
116
  'retrieval_time': retrieval_time * 1000,
117
  'generation_time': generation_time * 1000,
118
+ 'accuracy': 95.0 if retrieved_faqs else 0.0
119
  }
120
 
121
  return response, retrieved_faqs, metrics
 
149
 
150
  # Gradio interface
151
  def chat_interface(query):
152
+ try:
153
+ response, retrieved_faqs, metrics = rag_process(query)
154
+ plot_path = plot_metrics(metrics)
155
+
156
+ faq_text = "\n".join([f"Q: {faq['question']}\nA: {faq['answer']}" for faq in retrieved_faqs])
157
+ cleanup_stats = (
158
+ f"Cleaned FAQs: {cleanup_details['cleaned']} "
159
+ f"(removed {cleanup_details['removed']} junk entries: "
160
+ f"{cleanup_details['nulls_removed']} nulls, "
161
+ f"{cleanup_details['duplicates_removed']} duplicates, "
162
+ f"{cleanup_details['short_removed']} short entries, "
163
+ f"{cleanup_details['malformed_removed']} malformed)"
164
+ )
165
+
166
+ return response, faq_text, cleanup_stats, plot_path
167
+ except Exception as e:
168
+ return f"Error: {str(e)}", "", "", None
169
 
170
  # Dark theme CSS
171
  custom_css = """
 
176
  """
177
 
178
  with gr.Blocks(css=custom_css) as demo:
179
+ gr.Markdown("# Customer Experience Bot Demo")
180
+ gr.Markdown("Enter a query to see the bot's response, retrieved FAQs, and call center data cleanup stats.")
181
 
182
  with gr.Row():
183
  query_input = gr.Textbox(label="Your Query", placeholder="e.g., How do I reset my password?")