dolphinium
commited on
Commit
Β·
26466c8
1
Parent(s):
1514258
adding direct solr links on accordions for quantitative and qualitative results.
Browse files- data_processing.py +7 -7
- ui.py +24 -18
data_processing.py
CHANGED
@@ -72,14 +72,14 @@ def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, c
|
|
72 |
def execute_quantitative_query(solr_client, plan):
|
73 |
"""Executes the facet query to get aggregate data."""
|
74 |
if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
|
75 |
-
return None
|
76 |
try:
|
77 |
params = {
|
78 |
"q": plan.get('query_filter', '*_*'),
|
79 |
"rows": 0,
|
80 |
"json.facet": json.dumps(plan['quantitative_request']['json.facet'])
|
81 |
}
|
82 |
-
|
83 |
# Build the full Solr URL manually (for logging)
|
84 |
base_url = "http://69.167.186.48:8983/solr/news/select"
|
85 |
query_string = urllib.parse.urlencode(params)
|
@@ -87,15 +87,15 @@ def execute_quantitative_query(solr_client, plan):
|
|
87 |
|
88 |
print(f"[DEBUG] Solr QUANTITIVE query URL: {full_url}")
|
89 |
results = solr_client.search(**params)
|
90 |
-
return results.raw_response.get("facets", {})
|
91 |
except Exception as e:
|
92 |
print(f"Error in quantitative query: {e}")
|
93 |
-
return None
|
94 |
|
95 |
def execute_qualitative_query(solr_client, plan):
|
96 |
"""Executes the grouping query to get the best example docs."""
|
97 |
if not plan or 'qualitative_request' not in plan:
|
98 |
-
return None
|
99 |
try:
|
100 |
qual_request = copy.deepcopy(plan['qualitative_request'])
|
101 |
params = {
|
@@ -110,10 +110,10 @@ def execute_qualitative_query(solr_client, plan):
|
|
110 |
|
111 |
print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
|
112 |
results = solr_client.search(**params)
|
113 |
-
return results.grouped
|
114 |
except Exception as e:
|
115 |
print(f"Error in qualitative query: {e}")
|
116 |
-
return None
|
117 |
|
118 |
def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
|
119 |
"""
|
|
|
72 |
def execute_quantitative_query(solr_client, plan):
|
73 |
"""Executes the facet query to get aggregate data."""
|
74 |
if not plan or 'quantitative_request' not in plan or 'json.facet' not in plan.get('quantitative_request', {}):
|
75 |
+
return None, None
|
76 |
try:
|
77 |
params = {
|
78 |
"q": plan.get('query_filter', '*_*'),
|
79 |
"rows": 0,
|
80 |
"json.facet": json.dumps(plan['quantitative_request']['json.facet'])
|
81 |
}
|
82 |
+
|
83 |
# Build the full Solr URL manually (for logging)
|
84 |
base_url = "http://69.167.186.48:8983/solr/news/select"
|
85 |
query_string = urllib.parse.urlencode(params)
|
|
|
87 |
|
88 |
print(f"[DEBUG] Solr QUANTITIVE query URL: {full_url}")
|
89 |
results = solr_client.search(**params)
|
90 |
+
return results.raw_response.get("facets", {}), full_url
|
91 |
except Exception as e:
|
92 |
print(f"Error in quantitative query: {e}")
|
93 |
+
return None, None
|
94 |
|
95 |
def execute_qualitative_query(solr_client, plan):
|
96 |
"""Executes the grouping query to get the best example docs."""
|
97 |
if not plan or 'qualitative_request' not in plan:
|
98 |
+
return None, None
|
99 |
try:
|
100 |
qual_request = copy.deepcopy(plan['qualitative_request'])
|
101 |
params = {
|
|
|
110 |
|
111 |
print(f"[DEBUG] Solr QUALITATIVE query URL: {full_url}")
|
112 |
results = solr_client.search(**params)
|
113 |
+
return results.grouped, full_url
|
114 |
except Exception as e:
|
115 |
print(f"Error in qualitative query: {e}")
|
116 |
+
return None, None
|
117 |
|
118 |
def llm_synthesize_enriched_report_stream(llm_model, query, quantitative_data, qualitative_data, plan):
|
119 |
"""
|
ui.py
CHANGED
@@ -51,8 +51,10 @@ def create_ui(llm_model, solr_client):
|
|
51 |
with gr.Accordion("Generated Analysis Plan", open=False):
|
52 |
plan_display = gr.Markdown("Plan will appear here...", visible=True)
|
53 |
with gr.Accordion("Retrieved Quantitative Data", open=False):
|
|
|
54 |
quantitative_data_display = gr.Markdown("Aggregate data will appear here...", visible=False)
|
55 |
with gr.Accordion("Retrieved Qualitative Data (Examples)", open=False):
|
|
|
56 |
qualitative_data_display = gr.Markdown("Example data will appear here...", visible=False)
|
57 |
plot_display = gr.Image(label="Visualization", type="filepath", visible=False)
|
58 |
report_display = gr.Markdown("Report will be streamed here...", visible=False)
|
@@ -67,16 +69,16 @@ def create_ui(llm_model, solr_client):
|
|
67 |
history = []
|
68 |
|
69 |
# Reset all displays at the beginning of a new flow
|
70 |
-
yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
|
71 |
|
72 |
query_context = user_input.strip()
|
73 |
if not query_context:
|
74 |
history.append((user_input, "Please enter a question to analyze."))
|
75 |
-
yield (history, state, None, None, None, None, None, None)
|
76 |
return
|
77 |
|
78 |
history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
|
79 |
-
yield (history, state, None, None, None, None, None, None)
|
80 |
|
81 |
# Generate plan and get search field suggestions
|
82 |
analysis_plan, search_fields = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
|
@@ -90,7 +92,7 @@ def create_ui(llm_model, solr_client):
|
|
90 |
|
91 |
if not analysis_plan:
|
92 |
history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
|
93 |
-
yield (history, state, None, None, None, None, None, suggestions_display_update)
|
94 |
return
|
95 |
|
96 |
history.append((None, "β
Analysis plan generated!"))
|
@@ -101,33 +103,35 @@ def create_ui(llm_model, solr_client):
|
|
101 |
"""
|
102 |
history.append((None, plan_summary))
|
103 |
formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
|
104 |
-
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
|
105 |
|
106 |
history.append((None, "*Executing queries for aggregates and examples...*"))
|
107 |
-
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
|
108 |
|
109 |
# Execute queries in parallel
|
110 |
-
aggregate_data = None
|
111 |
-
example_data = None
|
112 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
113 |
future_agg = executor.submit(execute_quantitative_query, solr_client, analysis_plan)
|
114 |
future_ex = executor.submit(execute_qualitative_query, solr_client, analysis_plan)
|
115 |
-
aggregate_data = future_agg.result()
|
116 |
-
example_data = future_ex.result()
|
117 |
|
118 |
if not aggregate_data or aggregate_data.get('count', 0) == 0:
|
119 |
history.append((None, "No data was found for your query. Please try a different question."))
|
120 |
-
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
|
121 |
return
|
122 |
|
123 |
# Display retrieved data
|
|
|
|
|
124 |
formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
|
125 |
formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
|
126 |
qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
|
127 |
-
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
|
128 |
|
129 |
history.append((None, "β
Data retrieved. Generating visualization and final report..."))
|
130 |
-
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
|
131 |
|
132 |
# Generate viz and report
|
133 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
@@ -137,7 +141,7 @@ def create_ui(llm_model, solr_client):
|
|
137 |
stream_history = history[:]
|
138 |
for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
|
139 |
report_text += chunk
|
140 |
-
yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
|
141 |
|
142 |
history.append((None, report_text))
|
143 |
|
@@ -147,13 +151,13 @@ def create_ui(llm_model, solr_client):
|
|
147 |
if not plot_path:
|
148 |
history.append((None, "*I was unable to generate a plot for this data.*\n"))
|
149 |
|
150 |
-
yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
|
151 |
|
152 |
state['query_count'] += 1
|
153 |
state['last_suggestions'] = parse_suggestions_from_report(report_text)
|
154 |
next_prompt = "Analysis complete. What would you like to explore next?"
|
155 |
history.append((None, next_prompt))
|
156 |
-
yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
|
157 |
|
158 |
def reset_all():
|
159 |
"""Resets the entire UI for a new analysis session."""
|
@@ -166,13 +170,15 @@ def create_ui(llm_model, solr_client):
|
|
166 |
gr.update(value=None, visible=False),
|
167 |
gr.update(value=None, visible=False),
|
168 |
gr.update(value=None, visible=False),
|
|
|
|
|
169 |
gr.update(value=None, visible=False)
|
170 |
)
|
171 |
|
172 |
msg_textbox.submit(
|
173 |
fn=process_analysis_flow,
|
174 |
inputs=[msg_textbox, chatbot, state],
|
175 |
-
outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
|
176 |
).then(
|
177 |
lambda: gr.update(value=""),
|
178 |
None,
|
@@ -183,7 +189,7 @@ def create_ui(llm_model, solr_client):
|
|
183 |
clear_button.click(
|
184 |
fn=reset_all,
|
185 |
inputs=None,
|
186 |
-
outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
|
187 |
queue=False
|
188 |
)
|
189 |
|
|
|
51 |
with gr.Accordion("Generated Analysis Plan", open=False):
|
52 |
plan_display = gr.Markdown("Plan will appear here...", visible=True)
|
53 |
with gr.Accordion("Retrieved Quantitative Data", open=False):
|
54 |
+
quantitative_url_display = gr.Markdown("Quantitative URL will appear here...", visible=False)
|
55 |
quantitative_data_display = gr.Markdown("Aggregate data will appear here...", visible=False)
|
56 |
with gr.Accordion("Retrieved Qualitative Data (Examples)", open=False):
|
57 |
+
qualitative_url_display = gr.Markdown("Qualitative URL will appear here...", visible=False)
|
58 |
qualitative_data_display = gr.Markdown("Example data will appear here...", visible=False)
|
59 |
plot_display = gr.Image(label="Visualization", type="filepath", visible=False)
|
60 |
report_display = gr.Markdown("Report will be streamed here...", visible=False)
|
|
|
69 |
history = []
|
70 |
|
71 |
# Reset all displays at the beginning of a new flow
|
72 |
+
yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
|
73 |
|
74 |
query_context = user_input.strip()
|
75 |
if not query_context:
|
76 |
history.append((user_input, "Please enter a question to analyze."))
|
77 |
+
yield (history, state, None, None, None, None, None, None, None, None)
|
78 |
return
|
79 |
|
80 |
history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
|
81 |
+
yield (history, state, None, None, None, None, None, None, None, None)
|
82 |
|
83 |
# Generate plan and get search field suggestions
|
84 |
analysis_plan, search_fields = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
|
|
|
92 |
|
93 |
if not analysis_plan:
|
94 |
history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
|
95 |
+
yield (history, state, None, None, None, None, None, None, None, suggestions_display_update)
|
96 |
return
|
97 |
|
98 |
history.append((None, "β
Analysis plan generated!"))
|
|
|
103 |
"""
|
104 |
history.append((None, plan_summary))
|
105 |
formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
|
106 |
+
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
|
107 |
|
108 |
history.append((None, "*Executing queries for aggregates and examples...*"))
|
109 |
+
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
|
110 |
|
111 |
# Execute queries in parallel
|
112 |
+
aggregate_data, quantitative_url = None, None
|
113 |
+
example_data, qualitative_url = None, None
|
114 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
115 |
future_agg = executor.submit(execute_quantitative_query, solr_client, analysis_plan)
|
116 |
future_ex = executor.submit(execute_qualitative_query, solr_client, analysis_plan)
|
117 |
+
aggregate_data, quantitative_url = future_agg.result()
|
118 |
+
example_data, qualitative_url = future_ex.result()
|
119 |
|
120 |
if not aggregate_data or aggregate_data.get('count', 0) == 0:
|
121 |
history.append((None, "No data was found for your query. Please try a different question."))
|
122 |
+
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, None, None, suggestions_display_update)
|
123 |
return
|
124 |
|
125 |
# Display retrieved data
|
126 |
+
quantitative_url_update = gr.update(value=f"**Solr URL:** [{quantitative_url}]({quantitative_url})", visible=True)
|
127 |
+
qualitative_url_update = gr.update(value=f"**Solr URL:** [{qualitative_url}]({qualitative_url})", visible=True)
|
128 |
formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
|
129 |
formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
|
130 |
qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
|
131 |
+
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
|
132 |
|
133 |
history.append((None, "β
Data retrieved. Generating visualization and final report..."))
|
134 |
+
yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
|
135 |
|
136 |
# Generate viz and report
|
137 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
141 |
stream_history = history[:]
|
142 |
for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
|
143 |
report_text += chunk
|
144 |
+
yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
|
145 |
|
146 |
history.append((None, report_text))
|
147 |
|
|
|
151 |
if not plot_path:
|
152 |
history.append((None, "*I was unable to generate a plot for this data.*\n"))
|
153 |
|
154 |
+
yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
|
155 |
|
156 |
state['query_count'] += 1
|
157 |
state['last_suggestions'] = parse_suggestions_from_report(report_text)
|
158 |
next_prompt = "Analysis complete. What would you like to explore next?"
|
159 |
history.append((None, next_prompt))
|
160 |
+
yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), quantitative_url_update, gr.update(value=formatted_agg_data, visible=True), qualitative_url_update, qual_data_display_update, suggestions_display_update)
|
161 |
|
162 |
def reset_all():
|
163 |
"""Resets the entire UI for a new analysis session."""
|
|
|
170 |
gr.update(value=None, visible=False),
|
171 |
gr.update(value=None, visible=False),
|
172 |
gr.update(value=None, visible=False),
|
173 |
+
gr.update(value=None, visible=False),
|
174 |
+
gr.update(value=None, visible=False),
|
175 |
gr.update(value=None, visible=False)
|
176 |
)
|
177 |
|
178 |
msg_textbox.submit(
|
179 |
fn=process_analysis_flow,
|
180 |
inputs=[msg_textbox, chatbot, state],
|
181 |
+
outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_url_display, quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
|
182 |
).then(
|
183 |
lambda: gr.update(value=""),
|
184 |
None,
|
|
|
189 |
clear_button.click(
|
190 |
fn=reset_all,
|
191 |
inputs=None,
|
192 |
+
outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_url_display, quantitative_data_display, qualitative_url_display, qualitative_data_display, suggestions_display],
|
193 |
queue=False
|
194 |
)
|
195 |
|