dolphinium commited on
Commit
f74c067
Β·
1 Parent(s): a4b0896

feat: Integrate dynamic field suggestions from external API into analysis plan generation and UI

Browse files
Files changed (4) hide show
  1. data_processing.py +23 -6
  2. extract_results.py +28 -0
  3. llm_prompts.py +25 -3
  4. ui.py +32 -17
data_processing.py CHANGED
@@ -26,6 +26,8 @@ from llm_prompts import (
26
  get_synthesis_report_prompt,
27
  get_visualization_code_prompt
28
  )
 
 
29
 
30
  def parse_suggestions_from_report(report_text):
31
  """Extracts numbered suggestions from the report's markdown text."""
@@ -35,20 +37,36 @@ def parse_suggestions_from_report(report_text):
35
  suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
36
  return [s.strip() for s in suggestions]
37
 
 
38
  def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, chat_history):
39
  """
40
- Generates a complete analysis plan from a user query, considering chat history.
 
41
  """
42
- prompt = get_analysis_plan_prompt(natural_language_query, chat_history)
 
 
 
 
 
 
 
 
 
 
43
  try:
44
  response = llm_model.generate_content(prompt)
45
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
46
  plan = json.loads(cleaned_text)
47
- return plan
 
48
  except Exception as e:
49
  raw_response_text = response.text if 'response' in locals() else 'N/A'
50
  print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
51
- return None
 
 
 
52
 
53
  def execute_quantitative_query(solr_client, plan):
54
  """Executes the facet query to get aggregate data."""
@@ -126,5 +144,4 @@ def execute_viz_code_and_get_path(viz_code, facet_data):
126
  return None
127
  except Exception as e:
128
  print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
129
- return None
130
-
 
26
  get_synthesis_report_prompt,
27
  get_visualization_code_prompt
28
  )
29
+ from extract_results import get_search_list_params
30
+
31
 
32
  def parse_suggestions_from_report(report_text):
33
  """Extracts numbered suggestions from the report's markdown text."""
 
37
  suggestions = re.findall(r"^\s*\d+\.\s*(.*)", suggestions_text, re.MULTILINE)
38
  return [s.strip() for s in suggestions]
39
 
40
+
41
  def llm_generate_analysis_plan_with_history(llm_model, natural_language_query, chat_history):
42
  """
43
+ Generates a complete analysis plan from a user query, considering chat history
44
+ and dynamic field suggestions from an external API.
45
  """
46
+ search_fields, search_name = [], ""
47
+ try:
48
+ # Call the external API to get dynamic field suggestions
49
+ search_fields, search_name = get_search_list_params(natural_language_query)
50
+ print(f"Successfully retrieved {len(search_fields)} dynamic fields.")
51
+ except Exception as e:
52
+ print(f"Warning: Could not retrieve dynamic search fields. Proceeding without them. Error: {e}")
53
+
54
+ # Generate the prompt, including the (potentially empty) search_fields
55
+ prompt = get_analysis_plan_prompt(natural_language_query, chat_history, search_fields)
56
+
57
  try:
58
  response = llm_model.generate_content(prompt)
59
  cleaned_text = re.sub(r'```json\s*|\s*```', '', response.text, flags=re.MULTILINE | re.DOTALL).strip()
60
  plan = json.loads(cleaned_text)
61
+ # Return the plan and the retrieved fields for UI display
62
+ return plan, search_fields
63
  except Exception as e:
64
  raw_response_text = response.text if 'response' in locals() else 'N/A'
65
  print(f"Error in llm_generate_analysis_plan_with_history: {e}\nRaw Response:\n{raw_response_text}")
66
+ # Return None for the plan but still return search_fields for debugging in the UI
67
+ return None, search_fields
68
+
69
+
70
 
71
  def execute_quantitative_query(solr_client, plan):
72
  """Executes the facet query to get aggregate data."""
 
144
  return None
145
  except Exception as e:
146
  print(f"ERROR executing visualization code: {e}\n---Code---\n{viz_code}")
147
+ return None
 
extract_results.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import yaml
4
+
5
+ def get_search_list_params(query, k=20):
6
+ """
7
+ Returns tuple: (search_fields, search_name)
8
+ """
9
+ url = "https://aitest.ebalina.com/stream"
10
+
11
+ response = requests.post(url,
12
+ headers={'Content-Type': 'application/json'},
13
+ json={"query": query, "k": k},
14
+ stream=True)
15
+
16
+ for line in response.iter_lines():
17
+ if line and line.startswith(b'data: '):
18
+ try:
19
+ data = json.loads(line[6:])
20
+ if data.get('log_title') == 'Search List Result':
21
+ yaml_data = yaml.safe_load(data['content'])
22
+ # As requested, ignoring 'search_name' and only returning fields
23
+ return yaml_data.get('search_fields', []), yaml_data.get('search_name', '')
24
+ except:
25
+ continue
26
+
27
+ # Return empty list if no valid data is found
28
+ return [], ""
llm_prompts.py CHANGED
@@ -11,9 +11,13 @@ import datetime
11
  import json
12
  from solr_metadata import format_metadata_for_prompt
13
 
14
- def get_analysis_plan_prompt(natural_language_query, chat_history):
15
  """
16
  Generates the prompt for creating a Solr analysis plan from a user query.
 
 
 
 
17
  """
18
  formatted_field_info = format_metadata_for_prompt()
19
  formatted_history = ""
@@ -21,6 +25,22 @@ def get_analysis_plan_prompt(natural_language_query, chat_history):
21
  if user_msg:
22
  formatted_history += f"- User: \"{user_msg}\"\n"
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return f"""
25
  You are an expert data analyst and Solr query engineer. Your task is to convert a natural language question into a structured JSON "Analysis Plan". This plan will be used to run two separate, efficient queries: one for aggregate data (facets) and one for finding illustrative examples (grouping).
26
 
@@ -43,6 +63,8 @@ You are an expert data analyst and Solr query engineer. Your task is to convert
43
  ### FIELD DEFINITIONS (Your Source of Truth)
44
 
45
  {formatted_field_info}
 
 
46
  ---
47
  ### CHAT HISTORY
48
  {formatted_history}
@@ -113,7 +135,7 @@ Convert the following user query into a single, raw JSON "Analysis Plan" object,
113
 
114
  **Current User Query:** `{natural_language_query}`
115
  """
116
-
117
  def get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan):
118
  """
119
  Generates the prompt for synthesizing a final report from the query results.
@@ -343,4 +365,4 @@ plt.tight_layout()
343
 
344
  **Your Task:**
345
  Now, generate the Python code.
346
- """
 
11
  import json
12
  from solr_metadata import format_metadata_for_prompt
13
 
14
+ def get_analysis_plan_prompt(natural_language_query, chat_history, search_fields=None):
15
  """
16
  Generates the prompt for creating a Solr analysis plan from a user query.
17
+ Args:
18
+ natural_language_query (str): The user's query.
19
+ chat_history (list): A list of previous user and bot messages.
20
+ search_fields (list, optional): A list of dictionaries with 'field_name' and 'field_value'.
21
  """
22
  formatted_field_info = format_metadata_for_prompt()
23
  formatted_history = ""
 
25
  if user_msg:
26
  formatted_history += f"- User: \"{user_msg}\"\n"
27
 
28
+ dynamic_fields_prompt_section = ""
29
+ if search_fields:
30
+ formatted_fields = "\n".join([f" - {field['field_name']}: {field['field_value']}" for field in search_fields])
31
+ dynamic_fields_prompt_section = f"""
32
+ ---
33
+ ### DYNAMIC FIELD SUGGESTIONS (Use Critically)
34
+
35
+ An external API has suggested the following field-value pairs based on your query.
36
+ **These are only HINTS.** Do NOT use them blindly.
37
+ Critically evaluate if they make sense. For example, a `molecule_name` associated with a `company_name` might be irrelevant or illogical.
38
+ Use only what is logical for the query. Do not construct filters from fields/values that do not make sense.
39
+
40
+ **Suggested Fields:**
41
+ {formatted_fields}
42
+ """
43
+
44
  return f"""
45
  You are an expert data analyst and Solr query engineer. Your task is to convert a natural language question into a structured JSON "Analysis Plan". This plan will be used to run two separate, efficient queries: one for aggregate data (facets) and one for finding illustrative examples (grouping).
46
 
 
63
  ### FIELD DEFINITIONS (Your Source of Truth)
64
 
65
  {formatted_field_info}
66
+ {dynamic_fields_prompt_section}
67
+
68
  ---
69
  ### CHAT HISTORY
70
  {formatted_history}
 
135
 
136
  **Current User Query:** `{natural_language_query}`
137
  """
138
+ # The other prompt functions remain unchanged.
139
  def get_synthesis_report_prompt(query, quantitative_data, qualitative_data, plan):
140
  """
141
  Generates the prompt for synthesizing a final report from the query results.
 
365
 
366
  **Your Task:**
367
  Now, generate the Python code.
368
+ """
ui.py CHANGED
@@ -46,6 +46,8 @@ def create_ui(llm_model, solr_client):
46
  msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
47
 
48
  with gr.Column(scale=2):
 
 
49
  with gr.Accordion("Generated Analysis Plan", open=False):
50
  plan_display = gr.Markdown("Plan will appear here...", visible=True)
51
  with gr.Accordion("Retrieved Quantitative Data", open=False):
@@ -64,21 +66,31 @@ def create_ui(llm_model, solr_client):
64
  if history is None:
65
  history = []
66
 
67
- yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False))
 
68
 
69
  query_context = user_input.strip()
70
  if not query_context:
71
  history.append((user_input, "Please enter a question to analyze."))
72
- yield (history, state, None, None, None, None, None)
73
  return
74
 
75
  history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
76
- yield (history, state, None, None, None, None, None)
 
 
 
 
 
 
 
 
 
 
77
 
78
- analysis_plan = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
79
  if not analysis_plan:
80
  history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
81
- yield (history, state, None, None, None, None, None)
82
  return
83
 
84
  history.append((None, "βœ… Analysis plan generated!"))
@@ -89,11 +101,12 @@ def create_ui(llm_model, solr_client):
89
  """
90
  history.append((None, plan_summary))
91
  formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
92
- yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None)
93
 
94
  history.append((None, "*Executing queries for aggregates and examples...*"))
95
- yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None)
96
 
 
97
  aggregate_data = None
98
  example_data = None
99
  with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -104,17 +117,19 @@ def create_ui(llm_model, solr_client):
104
 
105
  if not aggregate_data or aggregate_data.get('count', 0) == 0:
106
  history.append((None, "No data was found for your query. Please try a different question."))
107
- yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None)
108
  return
109
 
 
110
  formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
111
  formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
112
  qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
113
- yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update)
114
 
115
  history.append((None, "βœ… Data retrieved. Generating visualization and final report..."))
116
- yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update)
117
 
 
118
  with concurrent.futures.ThreadPoolExecutor() as executor:
119
  viz_future = executor.submit(llm_generate_visualization_code, llm_model, query_context, aggregate_data)
120
 
@@ -122,7 +137,7 @@ def create_ui(llm_model, solr_client):
122
  stream_history = history[:]
123
  for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
124
  report_text += chunk
125
- yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update)
126
 
127
  history.append((None, report_text))
128
 
@@ -132,13 +147,13 @@ def create_ui(llm_model, solr_client):
132
  if not plot_path:
133
  history.append((None, "*I was unable to generate a plot for this data.*\n"))
134
 
135
- yield (history, state, output_plot, report_text, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update)
136
 
137
  state['query_count'] += 1
138
  state['last_suggestions'] = parse_suggestions_from_report(report_text)
139
  next_prompt = "Analysis complete. What would you like to explore next?"
140
  history.append((None, next_prompt))
141
- yield (history, state, output_plot, report_text, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update)
142
 
143
  def reset_all():
144
  """Resets the entire UI for a new analysis session."""
@@ -150,13 +165,14 @@ def create_ui(llm_model, solr_client):
150
  gr.update(value=None, visible=False),
151
  gr.update(value=None, visible=False),
152
  gr.update(value=None, visible=False),
 
153
  gr.update(value=None, visible=False)
154
  )
155
 
156
  msg_textbox.submit(
157
  fn=process_analysis_flow,
158
  inputs=[msg_textbox, chatbot, state],
159
- outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display],
160
  ).then(
161
  lambda: gr.update(value=""),
162
  None,
@@ -167,9 +183,8 @@ def create_ui(llm_model, solr_client):
167
  clear_button.click(
168
  fn=reset_all,
169
  inputs=None,
170
- outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display],
171
  queue=False
172
  )
173
 
174
- return demo
175
-
 
46
  msg_textbox = gr.Textbox(placeholder="Ask a question, e.g., 'Show me the top 5 companies by total deal value in 2023'", label="Your Question", interactive=True)
47
 
48
  with gr.Column(scale=2):
49
+ with gr.Accordion("Dynamic Field Suggestions", open=False):
50
+ suggestions_display = gr.Markdown("Suggestions from the external API will appear here...", visible=True)
51
  with gr.Accordion("Generated Analysis Plan", open=False):
52
  plan_display = gr.Markdown("Plan will appear here...", visible=True)
53
  with gr.Accordion("Retrieved Quantitative Data", open=False):
 
66
  if history is None:
67
  history = []
68
 
69
+ # Reset all displays at the beginning of a new flow
70
+ yield (history, state, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Suggestions from the external API will appear here...", visible=False))
71
 
72
  query_context = user_input.strip()
73
  if not query_context:
74
  history.append((user_input, "Please enter a question to analyze."))
75
+ yield (history, state, None, None, None, None, None, None)
76
  return
77
 
78
  history.append((user_input, f"Analyzing: '{query_context}'\n\n*Generating analysis plan...*"))
79
+ yield (history, state, None, None, None, None, None, None)
80
+
81
+ # Generate plan and get search field suggestions
82
+ analysis_plan, search_fields = llm_generate_analysis_plan_with_history(llm_model, query_context, history)
83
+
84
+ # Update and display search field suggestions in its own accordion
85
+ if search_fields:
86
+ suggestions_md = "**External API Suggestions:**\n" + "\n".join([f"- `{field['field_name']}`: `{field['field_value']}`" for field in search_fields])
87
+ suggestions_display_update = gr.update(value=suggestions_md, visible=True)
88
+ else:
89
+ suggestions_display_update = gr.update(value="No suggestions were returned from the external API.", visible=True)
90
 
 
91
  if not analysis_plan:
92
  history.append((None, "I'm sorry, I couldn't generate a valid analysis plan. Please try rephrasing."))
93
+ yield (history, state, None, None, None, None, None, suggestions_display_update)
94
  return
95
 
96
  history.append((None, "βœ… Analysis plan generated!"))
 
101
  """
102
  history.append((None, plan_summary))
103
  formatted_plan = f"**Full Analysis Plan:**\n```json\n{json.dumps(analysis_plan, indent=2)}\n```"
104
+ yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
105
 
106
  history.append((None, "*Executing queries for aggregates and examples...*"))
107
+ yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
108
 
109
+ # Execute queries in parallel
110
  aggregate_data = None
111
  example_data = None
112
  with concurrent.futures.ThreadPoolExecutor() as executor:
 
117
 
118
  if not aggregate_data or aggregate_data.get('count', 0) == 0:
119
  history.append((None, "No data was found for your query. Please try a different question."))
120
+ yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), None, None, suggestions_display_update)
121
  return
122
 
123
+ # Display retrieved data
124
  formatted_agg_data = f"**Quantitative (Aggregate) Data:**\n```json\n{json.dumps(aggregate_data, indent=2)}\n```"
125
  formatted_qual_data = f"**Qualitative (Example) Data:**\n```json\n{json.dumps(example_data, indent=2)}\n```"
126
  qual_data_display_update = gr.update(value=formatted_qual_data, visible=True)
127
+ yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
128
 
129
  history.append((None, "βœ… Data retrieved. Generating visualization and final report..."))
130
+ yield (history, state, None, None, gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
131
 
132
+ # Generate viz and report
133
  with concurrent.futures.ThreadPoolExecutor() as executor:
134
  viz_future = executor.submit(llm_generate_visualization_code, llm_model, query_context, aggregate_data)
135
 
 
137
  stream_history = history[:]
138
  for chunk in llm_synthesize_enriched_report_stream(llm_model, query_context, aggregate_data, example_data, analysis_plan):
139
  report_text += chunk
140
+ yield (stream_history, state, None, gr.update(value=report_text, visible=True), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
141
 
142
  history.append((None, report_text))
143
 
 
147
  if not plot_path:
148
  history.append((None, "*I was unable to generate a plot for this data.*\n"))
149
 
150
+ yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
151
 
152
  state['query_count'] += 1
153
  state['last_suggestions'] = parse_suggestions_from_report(report_text)
154
  next_prompt = "Analysis complete. What would you like to explore next?"
155
  history.append((None, next_prompt))
156
+ yield (history, state, output_plot, gr.update(value=report_text), gr.update(value=formatted_plan, visible=True), gr.update(value=formatted_agg_data, visible=True), qual_data_display_update, suggestions_display_update)
157
 
158
  def reset_all():
159
  """Resets the entire UI for a new analysis session."""
 
165
  gr.update(value=None, visible=False),
166
  gr.update(value=None, visible=False),
167
  gr.update(value=None, visible=False),
168
+ gr.update(value=None, visible=False),
169
  gr.update(value=None, visible=False)
170
  )
171
 
172
  msg_textbox.submit(
173
  fn=process_analysis_flow,
174
  inputs=[msg_textbox, chatbot, state],
175
+ outputs=[chatbot, state, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
176
  ).then(
177
  lambda: gr.update(value=""),
178
  None,
 
183
  clear_button.click(
184
  fn=reset_all,
185
  inputs=None,
186
+ outputs=[chatbot, state, msg_textbox, plot_display, report_display, plan_display, quantitative_data_display, qualitative_data_display, suggestions_display],
187
  queue=False
188
  )
189
 
190
+ return demo