Spaces:

dolphinium
/

pc-ai-data-analyst-v2

Running

App Files Files Community

dolphinium commited on Jul 18

Commit

26466c8

1 Parent(s): 1514258

adding direct solr links on accordions for quantitative and qualitative results.

Browse files

Files changed (2) hide show

data_processing.py +7 -7
ui.py +24 -18

data_processing.py CHANGED Viewed

@@ -72,14 +72,14 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
 def execute_quantitative_query(solr_client, plan):
     """Executes the facet query to get aggregate data."""
     if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
-        return None
     try:
         params = {
             "q": plan.get('query_filter', '*_*'),
             "rows": 0,
             "json.facet": json.dumps(plan['quantitative_request']['json.facet'])
         }
         # Build the full Solr URL manually (for logging)
         base_url = "http://69.167.186.48:8983/solr/news/select"
         query_string = urllib.parse.urlencode(params)
@@ -87,15 +87,15 @@ def execute_quantitative_query(solr_client, plan):
         print(f"[DEBUG] Solr QUANTITIVE query URL: {full_url}")
         results = solr_client.search(**params)
-        return results.raw_response.get("facets", {})
     except Exception as e:
         print(f"Error in quantitative query: {e}")
-        return None
 def execute_qualitative_query(solr_client, plan):
     """Executes the grouping query to get the best example docs."""
     if not plan or 'qualitative_request' not in plan:
-        return None
     try:
         qual_request = copy.deepcopy(plan['qualitative_request'])
         params = {
@@ -110,10 +110,10 @@ def execute_qualitative_query(solr_client, plan):
         print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
         results = solr_client.search(**params)
-        return results.grouped
     except Exception as e:
         print(f"Error in qualitative query: {e}")
-        return None
 def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
     """

 def execute_quantitative_query(solr_client, plan):
     """Executes the facet query to get aggregate data."""
     if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
+        return None, None
     try:
         params = {
             "q": plan.get('query_filter', '*_*'),
             "rows": 0,
             "json.facet": json.dumps(plan['quantitative_request']['json.facet'])
         }
         # Build the full Solr URL manually (for logging)
         base_url = "http://69.167.186.48:8983/solr/news/select"
         query_string = urllib.parse.urlencode(params)
         print(f"[DEBUG] Solr QUANTITIVE query URL: {full_url}")
         results = solr_client.search(**params)
+        return results.raw_response.get("facets", {}), full_url
     except Exception as e:
         print(f"Error in quantitative query: {e}")
+        return None, None
 def execute_qualitative_query(solr_client, plan):
     """Executes the grouping query to get the best example docs."""
     if not plan or 'qualitative_request' not in plan:
+        return None, None
     try:
         qual_request = copy.deepcopy(plan['qualitative_request'])
         params = {
         print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
         results = solr_client.search(**params)
+        return results.grouped, full_url
     except Exception as e:
         print(f"Error in qualitative query: {e}")
+        return None, None
 def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
     """

ui.py CHANGED Viewed

@@ -51,8 +51,10 @@ def create_ui(llm_model, solr_client):
                 with gr.Accordion("Generated Analysis Plan", open=False):
                     plan_display = gr.Markdown("Plan will appear here...", visible=True)
                 with gr.Accordion("Retrieved Quantitative Data", open=False):
                     quantitative_data_display = gr.Markdown("Aggregate data will appear here...", visible=False)
                 with gr.Accordion("Retrieved Qualitative Data (Examples)", open=False):
                     qualitative_data_display = gr.Markdown("Example data will appear here...", visible=False)
                 plot_display = gr.Image(label="Visualization", type="filepath", visible=False)
                 report_display = gr.Markdown("Report will be streamed here...", visible=False)
@@ -67,16 +69,16 @@ def create_ui(llm_model, solr_client):
                 history = []
             # Reset all displays at the beginning of a new flow
-            yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
             query_context = user_input.strip()
             if not query_context:
                 history.append((user_input, "Please enter a question to analyze."))
-                yield (history, state, None, None, None, None, None, None)
                 return
             history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
-            yield (history, state, None, None, None, None, None, None)
             # Generate plan and get search field suggestions
             analysis_plan, search_fields = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
@@ -90,7 +92,7 @@ def create_ui(llm_model, solr_client):
             if not analysis_plan:
                 history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
-                yield (history, state, None, None, None, None, None, suggestions_display_update)
                 return
             history.append((None, "✅ Analysis plan generated!"))
@@ -101,33 +103,35 @@ def create_ui(llm_model, solr_client):
 """
             history.append((None, plan_summary))
             formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
             history.append((None, "*Executing queries for aggregates and examples...*"))
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
             # Execute queries in parallel
-            aggregate_data = None
-            example_data = None
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 future_agg = executor.submit(execute_quantitative_query, solr_client, analysis_plan)
                 future_ex = executor.submit(execute_qualitative_query, solr_client, analysis_plan)
-                aggregate_data = future_agg.result()
-                example_data = future_ex.result()
             if not aggregate_data or aggregate_data.get('count', 0) == 0:
                 history.append((None, "No data was found for your query. Please try a different question."))
-                yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
                 return
             # Display retrieved data
             formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
             formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
             qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
             history.append((None, "✅ Data retrieved. Generating visualization and final report..."))
-            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
             # Generate viz and report
             with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -137,7 +141,7 @@ def create_ui(llm_model, solr_client):
                 stream_history = history[:]
                 for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
                     report_text += chunk
-                    yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
                 history.append((None, report_text))
@@ -147,13 +151,13 @@ def create_ui(llm_model, solr_client):
                 if not plot_path:
                     history.append((None, "*I was unable to generate a plot for this data.*\n"))
-                yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
             state['query_count'] += 1
             state['last_suggestions'] = parse_suggestions_from_report(report_text)
             next_prompt = "Analysis complete. What would you like to explore next?"
             history.append((None, next_prompt))
-            yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
         def reset_all():
             """Resets the entire UI for a new analysis session."""
@@ -166,13 +170,15 @@ def create_ui(llm_model, solr_client):
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False)
             )
         msg_textbox.submit(
             fn=process_analysis_flow,
             inputs=[msg_textbox, chatbot, state],
-            outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
         ).then(
             lambda: gr.update(value=""),
             None,
@@ -183,7 +189,7 @@ def create_ui(llm_model, solr_client):
         clear_button.click(
             fn=reset_all,
             inputs=None,
-            outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
             queue=False
         )

                 with gr.Accordion("Generated Analysis Plan", open=False):
                     plan_display = gr.Markdown("Plan will appear here...", visible=True)
                 with gr.Accordion("Retrieved Quantitative Data", open=False):
+                    quantitative_url_display = gr.Markdown("Quantitative URL will appear here...", visible=False)
                     quantitative_data_display = gr.Markdown("Aggregate data will appear here...", visible=False)
                 with gr.Accordion("Retrieved Qualitative Data (Examples)", open=False):
+                    qualitative_url_display = gr.Markdown("Qualitative URL will appear here...", visible=False)
                     qualitative_data_display = gr.Markdown("Example data will appear here...", visible=False)
                 plot_display = gr.Image(label="Visualization", type="filepath", visible=False)
                 report_display = gr.Markdown("Report will be streamed here...", visible=False)
                 history = []
             # Reset all displays at the beginning of a new flow
+            yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
             query_context = user_input.strip()
             if not query_context:
                 history.append((user_input, "Please enter a question to analyze."))
+                yield (history, state, None, None, None, None, None, None, None, None)
                 return
             history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
+            yield (history, state, None, None, None, None, None, None, None, None)
             # Generate plan and get search field suggestions
             analysis_plan, search_fields = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
             if not analysis_plan:
                 history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
+                yield (history, state, None, None, None, None, None, None, None, suggestions_display_update)
                 return
             history.append((None, "✅ Analysis plan generated!"))
 """
             history.append((None, plan_summary))
             formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
             history.append((None, "*Executing queries for aggregates and examples...*"))
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
             # Execute queries in parallel
+            aggregate_data, quantitative_url = None, None
+            example_data, qualitative_url = None, None
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 future_agg = executor.submit(execute_quantitative_query, solr_client, analysis_plan)
                 future_ex = executor.submit(execute_qualitative_query, solr_client, analysis_plan)
+                aggregate_data, quantitative_url = future_agg.result()
+                example_data, qualitative_url = future_ex.result()
             if not aggregate_data or aggregate_data.get('count', 0) == 0:
                 history.append((None, "No data was found for your query. Please try a different question."))
+                yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
                 return
             # Display retrieved data
+            quantitative_url_update = gr.update(value=f"**Solr URL:** [{quantitative_url}]({quantitative_url})", visible=True)
+            qualitative_url_update = gr.update(value=f"**Solr URL:** [{qualitative_url}]({qualitative_url})", visible=True)
             formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
             formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
             qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             history.append((None, "✅ Data retrieved. Generating visualization and final report..."))
+            yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             # Generate viz and report
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 stream_history = history[:]
                 for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
                     report_text += chunk
+                    yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
                 history.append((None, report_text))
                 if not plot_path:
                     history.append((None, "*I was unable to generate a plot for this data.*\n"))
+                yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
             state['query_count'] += 1
             state['last_suggestions'] = parse_suggestions_from_report(report_text)
             next_prompt = "Analysis complete. What would you like to explore next?"
             history.append((None, next_prompt))
+            yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
         def reset_all():
             """Resets the entire UI for a new analysis session."""
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False),
+                gr.update(value=None, visible=False),
+                gr.update(value=None, visible=False),
                 gr.update(value=None, visible=False)
             )
         msg_textbox.submit(
             fn=process_analysis_flow,
             inputs=[msg_textbox, chatbot, state],
+            outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_url_display, quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
         ).then(
             lambda: gr.update(value=""),
             None,
         clear_button.click(
             fn=reset_all,
             inputs=None,
+            outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_url_display, quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
             queue=False
         )