Spaces:

mfoud444
/

oop

Running

App Files Files Community

Mohammed Foud commited on Apr 14

Commit

1c87021

1 Parent(s): dc961fb

first commit

Browse files

Files changed (1) hide show

app.py +42 -17

app.py CHANGED Viewed

@@ -53,11 +53,31 @@ def get_initial_summary():
         return "Error: Could not load dataset.csv"
     try:
         # Generate summaries for all categories
         summaries = generate_category_summaries(df)
         # Convert summaries to HTML format for Gradio
         html_output = []
         for category, tables in summaries.items():
             html_output.append(f"<h2>CATEGORY: {category}</h2>")
@@ -78,18 +98,23 @@ def get_initial_summary():
                         border-collapse: collapse;
                         margin: 15px 0;
                         width: 100%;
                     }}
                     th, td {{
-                        padding: 8px;
                         border: 1px solid #ddd;
                         text-align: left;
                     }}
                     th {{
                         background-color: #f5f5f5;
                     }}
                     tr:nth-child(even) {{
                         background-color: #f9f9f9;
                     }}
                 </style>
                 {table_html}
                 """
@@ -99,6 +124,8 @@ def get_initial_summary():
         return "\n".join(html_output)
     except Exception as e:
         return f"Error generating initial summary: {str(e)}"
 def predict_sentiment(text):
@@ -318,31 +345,29 @@ def add_clusters_to_df(df):
 def generate_category_summaries(df):
     """Generate product summaries in table format"""
-    # First, ensure we have clusters
-    if 'cluster_name' not in df.columns:
-        df = create_clusters(df)
     summaries = {}
-    for cluster_name in df['cluster_name'].unique():
-        cluster_df = df[df['cluster_name'] == cluster_name]
-        # Get top products by rating
-        top_products = cluster_df.groupby('name').agg({
             'reviews.rating': ['mean', 'count'],
             'reviews.text': list
         }).reset_index()
-        top_products.columns = ['name', 'avg_rating', 'review_count', 'reviews']
-        top_products = top_products[top_products['review_count'] >= 5]  # Min reviews threshold
-        top_products = top_products.sort_values('avg_rating', ascending=False)
-        if len(top_products) < 3:
             continue
         # Get top 3 and worst products
-        top_3 = top_products.head(3)
-        worst_product = top_products.tail(1)
         # Analyze reviews for each product
         product_details = []
@@ -371,7 +396,7 @@ def generate_category_summaries(df):
             ])
         tables.append({
-            'section': f"TOP PRODUCTS IN {cluster_name.upper()}",
             'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
             'data': top_table
         })
@@ -390,7 +415,7 @@ def generate_category_summaries(df):
                 ]]
             })
-        summaries[cluster_name] = tables
     return summaries

         return "Error: Could not load dataset.csv"
     try:
+        # First, create clusters if they don't exist
+        if 'cluster_name' not in df.columns:
+            df = create_clusters(df)
         # Generate summaries for all categories
         summaries = generate_category_summaries(df)
         # Convert summaries to HTML format for Gradio
         html_output = []
+        # Add dataset statistics
+        unique_count = df['name'].nunique()
+        total_count = len(df)
+        avg_rating = df['reviews.rating'].mean()
+        html_output.append(f"""
+        <h2>Dataset Statistics</h2>
+        <ul>
+            <li>Total Reviews: {total_count}</li>
+            <li>Unique Products: {unique_count}</li>
+            <li>Average Rating: {avg_rating:.2f}⭐</li>
+        </ul>
+        """)
+        # Add category summaries
         for category, tables in summaries.items():
             html_output.append(f"<h2>CATEGORY: {category}</h2>")
                         border-collapse: collapse;
                         margin: 15px 0;
                         width: 100%;
+                        box-shadow: 0 1px 3px rgba(0,0,0,0.2);
                     }}
                     th, td {{
+                        padding: 12px;
                         border: 1px solid #ddd;
                         text-align: left;
                     }}
                     th {{
                         background-color: #f5f5f5;
+                        font-weight: bold;
                     }}
                     tr:nth-child(even) {{
                         background-color: #f9f9f9;
                     }}
+                    tr:hover {{
+                        background-color: #f5f5f5;
+                    }}
                 </style>
                 {table_html}
                 """
         return "\n".join(html_output)
     except Exception as e:
+        import traceback
+        print(traceback.format_exc())  # Print full error trace for debugging
         return f"Error generating initial summary: {str(e)}"
 def predict_sentiment(text):
 def generate_category_summaries(df):
     """Generate product summaries in table format"""
     summaries = {}
+    for category in df['cluster_name'].unique():
+        category_df = df[df['cluster_name'] == category]
+        if len(category_df) < 10:
+            continue
+        # Get product statistics
+        product_stats = category_df.groupby('name').agg({
             'reviews.rating': ['mean', 'count'],
             'reviews.text': list
         }).reset_index()
+        product_stats.columns = ['name', 'avg_rating', 'review_count', 'reviews']
+        product_stats = product_stats[product_stats['review_count'] >= 5]
+        if len(product_stats) < 3:
             continue
         # Get top 3 and worst products
+        top_3 = product_stats.nlargest(3, 'avg_rating')
+        worst_product = product_stats.nsmallest(1, 'avg_rating')
         # Analyze reviews for each product
         product_details = []
             ])
         tables.append({
+            'section': f"TOP PRODUCTS IN {category.upper()}",
             'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
             'data': top_table
         })
                 ]]
             })
+        summaries[category] = tables
     return summaries