Mohammed Foud commited on
Commit
1c87021
·
1 Parent(s): dc961fb

first commit

Browse files
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -53,11 +53,31 @@ def get_initial_summary():
53
  return "Error: Could not load dataset.csv"
54
 
55
  try:
 
 
 
 
56
  # Generate summaries for all categories
57
  summaries = generate_category_summaries(df)
58
 
59
  # Convert summaries to HTML format for Gradio
60
  html_output = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  for category, tables in summaries.items():
62
  html_output.append(f"<h2>CATEGORY: {category}</h2>")
63
 
@@ -78,18 +98,23 @@ def get_initial_summary():
78
  border-collapse: collapse;
79
  margin: 15px 0;
80
  width: 100%;
 
81
  }}
82
  th, td {{
83
- padding: 8px;
84
  border: 1px solid #ddd;
85
  text-align: left;
86
  }}
87
  th {{
88
  background-color: #f5f5f5;
 
89
  }}
90
  tr:nth-child(even) {{
91
  background-color: #f9f9f9;
92
  }}
 
 
 
93
  </style>
94
  {table_html}
95
  """
@@ -99,6 +124,8 @@ def get_initial_summary():
99
 
100
  return "\n".join(html_output)
101
  except Exception as e:
 
 
102
  return f"Error generating initial summary: {str(e)}"
103
 
104
  def predict_sentiment(text):
@@ -318,31 +345,29 @@ def add_clusters_to_df(df):
318
 
319
  def generate_category_summaries(df):
320
  """Generate product summaries in table format"""
321
- # First, ensure we have clusters
322
- if 'cluster_name' not in df.columns:
323
- df = create_clusters(df)
324
-
325
  summaries = {}
326
 
327
- for cluster_name in df['cluster_name'].unique():
328
- cluster_df = df[df['cluster_name'] == cluster_name]
329
 
330
- # Get top products by rating
331
- top_products = cluster_df.groupby('name').agg({
 
 
 
332
  'reviews.rating': ['mean', 'count'],
333
  'reviews.text': list
334
  }).reset_index()
335
 
336
- top_products.columns = ['name', 'avg_rating', 'review_count', 'reviews']
337
- top_products = top_products[top_products['review_count'] >= 5] # Min reviews threshold
338
- top_products = top_products.sort_values('avg_rating', ascending=False)
339
 
340
- if len(top_products) < 3:
341
  continue
342
 
343
  # Get top 3 and worst products
344
- top_3 = top_products.head(3)
345
- worst_product = top_products.tail(1)
346
 
347
  # Analyze reviews for each product
348
  product_details = []
@@ -371,7 +396,7 @@ def generate_category_summaries(df):
371
  ])
372
 
373
  tables.append({
374
- 'section': f"TOP PRODUCTS IN {cluster_name.upper()}",
375
  'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
376
  'data': top_table
377
  })
@@ -390,7 +415,7 @@ def generate_category_summaries(df):
390
  ]]
391
  })
392
 
393
- summaries[cluster_name] = tables
394
 
395
  return summaries
396
 
 
53
  return "Error: Could not load dataset.csv"
54
 
55
  try:
56
+ # First, create clusters if they don't exist
57
+ if 'cluster_name' not in df.columns:
58
+ df = create_clusters(df)
59
+
60
  # Generate summaries for all categories
61
  summaries = generate_category_summaries(df)
62
 
63
  # Convert summaries to HTML format for Gradio
64
  html_output = []
65
+
66
+ # Add dataset statistics
67
+ unique_count = df['name'].nunique()
68
+ total_count = len(df)
69
+ avg_rating = df['reviews.rating'].mean()
70
+
71
+ html_output.append(f"""
72
+ <h2>Dataset Statistics</h2>
73
+ <ul>
74
+ <li>Total Reviews: {total_count}</li>
75
+ <li>Unique Products: {unique_count}</li>
76
+ <li>Average Rating: {avg_rating:.2f}⭐</li>
77
+ </ul>
78
+ """)
79
+
80
+ # Add category summaries
81
  for category, tables in summaries.items():
82
  html_output.append(f"<h2>CATEGORY: {category}</h2>")
83
 
 
98
  border-collapse: collapse;
99
  margin: 15px 0;
100
  width: 100%;
101
+ box-shadow: 0 1px 3px rgba(0,0,0,0.2);
102
  }}
103
  th, td {{
104
+ padding: 12px;
105
  border: 1px solid #ddd;
106
  text-align: left;
107
  }}
108
  th {{
109
  background-color: #f5f5f5;
110
+ font-weight: bold;
111
  }}
112
  tr:nth-child(even) {{
113
  background-color: #f9f9f9;
114
  }}
115
+ tr:hover {{
116
+ background-color: #f5f5f5;
117
+ }}
118
  </style>
119
  {table_html}
120
  """
 
124
 
125
  return "\n".join(html_output)
126
  except Exception as e:
127
+ import traceback
128
+ print(traceback.format_exc()) # Print full error trace for debugging
129
  return f"Error generating initial summary: {str(e)}"
130
 
131
  def predict_sentiment(text):
 
345
 
346
  def generate_category_summaries(df):
347
  """Generate product summaries in table format"""
 
 
 
 
348
  summaries = {}
349
 
350
+ for category in df['cluster_name'].unique():
351
+ category_df = df[df['cluster_name'] == category]
352
 
353
+ if len(category_df) < 10:
354
+ continue
355
+
356
+ # Get product statistics
357
+ product_stats = category_df.groupby('name').agg({
358
  'reviews.rating': ['mean', 'count'],
359
  'reviews.text': list
360
  }).reset_index()
361
 
362
+ product_stats.columns = ['name', 'avg_rating', 'review_count', 'reviews']
363
+ product_stats = product_stats[product_stats['review_count'] >= 5]
 
364
 
365
+ if len(product_stats) < 3:
366
  continue
367
 
368
  # Get top 3 and worst products
369
+ top_3 = product_stats.nlargest(3, 'avg_rating')
370
+ worst_product = product_stats.nsmallest(1, 'avg_rating')
371
 
372
  # Analyze reviews for each product
373
  product_details = []
 
396
  ])
397
 
398
  tables.append({
399
+ 'section': f"TOP PRODUCTS IN {category.upper()}",
400
  'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
401
  'data': top_table
402
  })
 
415
  ]]
416
  })
417
 
418
+ summaries[category] = tables
419
 
420
  return summaries
421