Muhammad Mustehson commited on
Commit
44de337
Β·
1 Parent(s): 59bfecd

Initial Draft

Browse files
Files changed (5) hide show
  1. app.py +136 -0
  2. logo.png +0 -0
  3. notebook.py +114 -0
  4. output.json +17 -0
  5. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from notebook import Notebook
7
+
8
+ # Height of the Tabs Text Area
9
+ TAB_LINES = 8
10
+
11
+
12
+ # Custom CSS styling
13
+ custom_css = """
14
+ .gradio-container {
15
+ background-color: #f0f4f8;
16
+ }
17
+ .logo {
18
+ max-width: 200px;
19
+ margin: 20px auto;
20
+ display: block;
21
+ .gr-button {
22
+ background-color: #4a90e2 !important;
23
+ }
24
+ .gr-button:hover {
25
+ background-color: #3a7bc8 !important;
26
+ }
27
+ }
28
+ """
29
+
30
+ def read_data(path):
31
+ with open(path, "r") as f:
32
+ data = json.load(f)
33
+ return data
34
+ data = read_data(path="output.json")
35
+
36
+
37
+ def build_chart(fig_data, chart_config):
38
+ chart_type = chart_config.get("type")
39
+
40
+ if chart_type == "bar":
41
+ fig = px.bar(fig_data, x='x', y='y', title=chart_config['title'],
42
+ labels={'y': chart_config['y_axis_label'], 'x': chart_config['x_axis_label']},
43
+ color='x', template='plotly_white')
44
+
45
+ if chart_type == "line":
46
+ fig = px.line(fig_data, x='x', y='y', title=chart_config['title'],
47
+ labels={'y': chart_config['y_axis_label'], 'x': chart_config['x_axis_label']},
48
+ template='plotly_white'
49
+ )
50
+
51
+ if chart_type == "pie":
52
+ fig = px.pie(fig_data, values='y', names='x',
53
+ title=chart_config['title'], labels={'y': chart_config['y_axis_label']},template='plotly_white',
54
+ hole=0.3
55
+ )
56
+ fig.update_traces(
57
+ textposition='inside',
58
+ textfont_size=12,
59
+ )
60
+
61
+ if chart_type == "hist":
62
+ fig_data['bin_center'] = (fig_data['bin_start'] + fig_data['bin_end']) / 2
63
+ fig = px.bar(fig_data, x='bin_center', y='frequency', title=chart_config['title'],
64
+ labels={'frequency': chart_config['y_axis_label'], 'bin_center': chart_config['x_axis_label']},
65
+ template='plotly_white')
66
+
67
+ fig.update_layout(showlegend=False)
68
+
69
+ return fig
70
+
71
+ def display_data(data_item):
72
+ body = json.loads(data_item.get("body"))
73
+
74
+ sql_config = body.get("sql_config")
75
+ sql_query, query_description = sql_config.get("sql_query"), sql_config.get("explanation")
76
+
77
+ table_data = body.get("table_data")
78
+ table_data = pd.DataFrame(table_data)
79
+
80
+ chart_data = body.get("chart_data")
81
+ config = body.get("chart_config")
82
+ chart_type = config.get("type")
83
+ fig_data = pd.DataFrame(chart_data.get(chart_type).get("data"))
84
+ fig = build_chart(fig_data=fig_data, chart_config=config)
85
+
86
+ return sql_query, query_description, table_data, fig
87
+
88
+
89
+ def export_notebook():
90
+ global data
91
+ notebook = Notebook(data=data)
92
+ notebook_path = notebook.export_notebook()
93
+ return notebook_path
94
+
95
+
96
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="indigo")) as demo:
97
+ gr.Image("logo.png", label=None, show_label=False, container=False, height=100)
98
+ gr.Markdown("""
99
+ <div style='text-align: center;'>
100
+ <strong style='font-size: 36px;'>Export As Notebook</strong>
101
+ <br>
102
+ <span style='font-size: 20px;'>Automate Export Multiple Tabs to Jupyter Notebook.</span>
103
+ </div>
104
+ """)
105
+ with gr.Row():
106
+ with gr.Tabs():
107
+ for i,data_item in enumerate(data):
108
+ sql_query, query_description, table_data, fig = display_data(data_item=data_item)
109
+ with gr.Tab(f"Tab {i+1}"):
110
+ with gr.Row():
111
+ with gr.Column():
112
+ row1 = gr.Textbox(lines=TAB_LINES, label="Generated SQL", value=sql_query, interactive=False,
113
+ autoscroll=False)
114
+ with gr.Column():
115
+ row2 = gr.Textbox(lines=TAB_LINES, label="Query Description", value=query_description, interactive=False,
116
+ autoscroll=False)
117
+
118
+ with gr.Row():
119
+ with gr.Column():
120
+ row3 = gr.DataFrame(label="Table Data", value=table_data, interactive=False,
121
+ max_height=400, min_width=800)
122
+ with gr.Column():
123
+ row4 = gr.Plot(value=fig)
124
+
125
+ exprt_btn = gr.Button("Export as Notebook", variant="primary")
126
+ download_component = gr.File(label="Click on the File Size Below πŸ‘‡ to Download Notebook",visible=False, interactive=False)
127
+ exprt_btn.click(export_notebook, inputs=None, outputs=download_component)
128
+ download_component.change(
129
+ lambda x: gr.update(visible=bool(x)),
130
+ inputs=download_component,
131
+ outputs=download_component
132
+ )
133
+
134
+ if __name__ == "__main__":
135
+ demo.launch(debug=True)
136
+
logo.png ADDED
notebook.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import textwrap
3
+ import nbformat
4
+ from nbformat import v4 as nbf
5
+ import logging
6
+ import json
7
+ from datetime import datetime, timezone
8
+ import pandas as pd
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class Notebook:
13
+ def __init__(self, data):
14
+ self.nb = nbf.new_notebook()
15
+
16
+ self.nb.cells = []
17
+ self.data = data
18
+ self.output_path = "tmp/notebook.ipynb"
19
+
20
+ self.imports_added = False
21
+
22
+
23
+ def build_notebook(self):
24
+ success = 0
25
+ metrics = self.data
26
+ logger.info(f"Start building notebook with {len(metrics)} metrics")
27
+ for i, metric in enumerate(metrics):
28
+ logger.info(f"Processing metric {i+1} of {len(metrics)}")
29
+ metric_result = json.loads(metric.get("body"))
30
+ table_data = metric_result.get("table_data")
31
+ query_config = metric_result.get("sql_config")
32
+
33
+ chart_data = metric_result.get("chart_data", {})
34
+ chart_config = metric_result.get("chart_config", {})
35
+ chart_type = metric_result.get("chart_config", {}).get("type", {})
36
+
37
+ if not self.nb.cells:
38
+ self._add_title()
39
+
40
+ self._add_metric_header(config=chart_config, index=i)
41
+
42
+ self._add_query_details(query_config=query_config)
43
+ if not self.imports_added:
44
+ self._import_libs()
45
+ self.imports_added = True
46
+
47
+ self._add_table(raw_table=table_data)
48
+
49
+ if chart_data and chart_config:
50
+ self._chart_code(chart_type=chart_type, chart_data=chart_data, chart_config=chart_config)
51
+
52
+ def export_notebook(self):
53
+ self.build_notebook()
54
+ logger.info(f"Exporting notebook to {self.output_path}")
55
+ os.makedirs(os.path.dirname(self.output_path), exist_ok=True)
56
+ with open(self.output_path, 'w') as f:
57
+ nbformat.write(self.nb, f)
58
+ logger.info(f"Notebook exported successfully to {self.output_path}")
59
+ return self.output_path
60
+
61
+ def _import_libs(self):
62
+ self.nb.cells.append(nbf.new_markdown_cell("### πŸ“¦ Imports"))
63
+ self.nb.cells.append(nbf.new_code_cell("import pandas as pd\nimport plotly.express as px"))
64
+
65
+
66
+ def _add_title(self):
67
+ self.nb.cells.append(nbf.new_markdown_cell("# Metrics Dashboard"))
68
+
69
+
70
+ def _add_metric_header(self, config, index):
71
+ self.nb.cells.append(nbf.new_markdown_cell(f"## πŸ“ˆ Metric {index+1} {config.get('title', "")}\n**Timestamp:** {datetime.now(timezone.utc).isoformat()}"))
72
+
73
+
74
+ def _chart_code(self, chart_type, chart_data, chart_config):
75
+ df_code = f'df = pd.DataFrame({chart_data.get(chart_type).get('data')})'
76
+ chart_dispatch = {
77
+ "bar": f"""
78
+ fig = px.bar(df, x='x', y='y', title='{chart_config['title']}',
79
+ labels={{'y': '{chart_config['y_axis_label']}', 'x': '{chart_config['x_axis_label']}'}}, color='x', template='plotly_white')
80
+ """,
81
+
82
+ "line": f"""
83
+ fig = px.line(df, x='x', y='y', title='{chart_config['title']}',
84
+ labels={{'y': '{chart_config['y_axis_label']}', 'x': '{chart_config['x_axis_label']}'}}, template='plotly_white')
85
+ """,
86
+
87
+ "pie": f"""
88
+ fig = px.pie(df, values='y', names='x', title='{chart_config['title']}',
89
+ labels={{'y': '{chart_config['y_axis_label']}'}}, template='plotly_white', hole=0.3)
90
+ fig.update_traces(textposition='inside', textfont_size=12)
91
+ """,
92
+
93
+ "hist": f"""
94
+ df['bin_center'] = (df['bin_start'] + df['bin_end']) / 2
95
+ fig = px.bar(df, x='bin_center', y='frequency', title='{chart_config['title']}',
96
+ labels={{'frequency': '{chart_config['y_axis_label']}', 'bin_center': '{chart_config['x_axis_label']}'}},
97
+ template='plotly_white')
98
+ """
99
+ }
100
+ chart_code = f"{df_code}\n{textwrap.dedent(chart_dispatch.get(chart_type))}\nfig.update_layout(showlegend=False)\nfig.show()"
101
+
102
+ self.nb.cells.append(nbf.new_markdown_cell(f"### πŸ“Š Chart"))
103
+ self.nb.cells.append(nbf.new_code_cell(chart_code))
104
+
105
+ def _add_query_details(self, query_config):
106
+ sql_clean = query_config['sql_query'].replace("\\n", "\n").strip()
107
+ self.nb.cells.append(nbf.new_markdown_cell(f"**User Query:** {query_config['text_query']}\n"))
108
+ self.nb.cells.append(nbf.new_markdown_cell(f"### πŸ” Query Details\n**SQL:**\n```sql\n{sql_clean}\n```\n**Explanation:** {query_config['explanation']}"))
109
+
110
+ def _add_table(self, raw_table):
111
+ table_data = pd.DataFrame(raw_table).fillna("NAN").to_dict(orient="records")
112
+ raw_code = f'raw_table = pd.DataFrame({table_data})\nraw_table.head()'
113
+ self.nb.cells.append(nbf.new_markdown_cell("### Table"))
114
+ self.nb.cells.append(nbf.new_code_cell(raw_code))
output.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "body": "{\"chart_data\": {\"bar\": {\"data\": [{\"x\": \"1\", \"y\": 2483550}, {\"x\": \"2\", \"y\": 598081}, {\"x\": \"0\", \"y\": 121958}, {\"x\": \"4\", \"y\": 31334}, {\"x\": \"3\", \"y\": 17793}, {\"x\": \"5\", \"y\": 1}]}}, \"table_data\": {\"payment_type\": [1, 2, 0, 4, 3, 5], \"trip_count\": [2483550, 598081, 121958, 31334, 17793, 1]}, \"chart_config\": {\"type\": \"bar\", \"title\": \"Total Number of Trips by Payment Type\", \"x_axis_label\": \"Payment Type\", \"y_axis_label\": \"Number of Trips\", \"legend\": false, \"colors\": []}, \"sql_config\": {\"sql_query\": \"SELECT payment_type, COUNT(*) AS trip_count FROM sample_data. nyc.taxi GROUP BY payment_type ORDER BY trip_count DESC LIMIT 20000;\", \"explanation\": \"Counts the number of trips for each payment type.\", \"text_query\": \"What is the total number of trips by each payment_type?\"}, \"error\": {\"success\": true}, \"hash_key\": \"da469599b102c658f3ef269c9ba5770b\"}"
4
+ },
5
+ {
6
+ "body": "{\"chart_data\": {\"line\": {\"data\": [{\"x\": \"2022-10-31\", \"y\": 34}, {\"x\": \"2022-11-01\", \"y\": 120506}, {\"x\": \"2022-11-02\", \"y\": 126271}, {\"x\": \"2022-11-03\", \"y\": 131484}, {\"x\": \"2022-11-04\", \"y\": 130682}, {\"x\": \"2022-11-05\", \"y\": 130401}, {\"x\": \"2022-11-06\", \"y\": 104570}, {\"x\": \"2022-11-07\", \"y\": 112135}, {\"x\": \"2022-11-08\", \"y\": 121121}, {\"x\": \"2022-11-09\", \"y\": 128700}, {\"x\": \"2022-11-10\", \"y\": 132727}, {\"x\": \"2022-11-11\", \"y\": 117728}, {\"x\": \"2022-11-12\", \"y\": 130944}, {\"x\": \"2022-11-13\", \"y\": 114269}, {\"x\": \"2022-11-14\", \"y\": 82278}, {\"x\": \"2022-11-15\", \"y\": 90728}, {\"x\": \"2022-11-16\", \"y\": 92660}, {\"x\": \"2022-11-17\", \"y\": 95344}, {\"x\": \"2022-11-18\", \"y\": 95454}, {\"x\": \"2022-11-19\", \"y\": 94932}, {\"x\": \"2022-11-20\", \"y\": 81125}, {\"x\": \"2022-11-21\", \"y\": 108526}, {\"x\": \"2022-11-22\", \"y\": 114546}, {\"x\": \"2022-11-23\", \"y\": 105640}, {\"x\": \"2022-11-24\", \"y\": 69663}, {\"x\": \"2022-11-25\", \"y\": 86607}, {\"x\": \"2022-11-26\", \"y\": 99513}, {\"x\": \"2022-11-27\", \"y\": 90529}, {\"x\": \"2022-11-28\", \"y\": 106488}, {\"x\": \"2022-11-29\", \"y\": 119372}, {\"x\": \"2022-11-30\", \"y\": 117677}, {\"x\": \"2022-12-01\", \"y\": 57}]}}, \"table_data\": {\"x\": [\"2022-10-31\", \"2022-11-01\", \"2022-11-02\", \"2022-11-03\", \"2022-11-04\", \"2022-11-05\", \"2022-11-06\", \"2022-11-07\", \"2022-11-08\", \"2022-11-09\", \"2022-11-10\", \"2022-11-11\", \"2022-11-12\", \"2022-11-13\", \"2022-11-14\", \"2022-11-15\", \"2022-11-16\", \"2022-11-17\", \"2022-11-18\", \"2022-11-19\", \"2022-11-20\", \"2022-11-21\", \"2022-11-22\", \"2022-11-23\", \"2022-11-24\", \"2022-11-25\", \"2022-11-26\", \"2022-11-27\", \"2022-11-28\", \"2022-11-29\", \"2022-11-30\", \"2022-12-01\"], \"y\": [34, 120506, 126271, 131484, 130682, 130401, 104570, 112135, 121121, 128700, 132727, 117728, 130944, 114269, 82278, 90728, 92660, 95344, 95454, 94932, 81125, 108526, 114546, 105640, 69663, 86607, 99513, 90529, 106488, 119372, 117677, 57]}, \"chart_config\": {\"type\": \"line\", \"title\": \"Total Number of Trips per Day Over Time\", \"x_axis_label\": \"Date\", \"y_axis_label\": \"Number of Trips\", \"legend\": false, \"colors\": []}, \"sql_config\": {\"sql_query\": \"SELECT CAST(tpep_pickup_datetime AS DATE) AS x, COUNT(*) AS y FROM sample_data.nyc.taxi GROUP BY x ORDER BY x LIMIT 20000;\", \"explanation\": \"Counts the number of trips per day based on the pickup datetime and formats the data for line plot.\", \"text_query\": \"What is the total number of trips per day over time?\"}, \"error\": {\"success\": true}, \"hash_key\": \"fc224d097e70ed97e3c95914afb2c0d5\"}"
7
+ },
8
+ {
9
+ "body": "{\"chart_data\": {\"hist\": {\"data\": [{\"bin_start\": 0, \"bin_end\": 1, \"frequency\": 667647.0}, {\"bin_start\": 1, \"bin_end\": 2, \"frequency\": 1043642.0}, {\"bin_start\": 2, \"bin_end\": 3, \"frequency\": 547827.0}, {\"bin_start\": 3, \"bin_end\": 4, \"frequency\": 268437.0}, {\"bin_start\": 4, \"bin_end\": 5, \"frequency\": 140938.0}, {\"bin_start\": 5, \"bin_end\": 10, \"frequency\": 296746.0}, {\"bin_start\": 10, \"bin_end\": 20, \"frequency\": 251072.0}, {\"bin_start\": 20, \"bin_end\": 30, \"frequency\": 34069.0}, {\"bin_start\": 30, \"bin_end\": 40, \"frequency\": 1547.0}, {\"bin_start\": 40, \"bin_end\": 50, \"frequency\": 355.0}, {\"bin_start\": 50, \"bin_end\": 100, \"frequency\": 303.0}]}}, \"table_data\": {\"bin_id\": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], \"frequency\": [667647, 1043642, 547827, 268437, 140938, 296746, 251072, 34069, 1547, 355, 303]}, \"chart_config\": {\"type\": \"hist\", \"title\": \"Distribution of Trip Distance\", \"x_axis_label\": \"Trip Distance (miles)\", \"y_axis_label\": \"Frequency\", \"legend\": false, \"colors\": []}, \"sql_config\": {\"sql_query\": \"\\nSELECT \\n CASE \\n WHEN trip_distance < 1 THEN 1\\n WHEN trip_distance < 2 THEN 2\\n WHEN trip_distance < 3 THEN 3\\n WHEN trip_distance < 4 THEN 4\\n WHEN trip_distance < 5 THEN 5\\n WHEN trip_distance < 10 THEN 6\\n WHEN trip_distance < 20 THEN 7\\n WHEN trip_distance < 30 THEN 8\\n WHEN trip_distance < 40 THEN 9\\n WHEN trip_distance < 50 THEN 10\\n ELSE 11\\n END AS bin_id,\\n COUNT(*) AS frequency\\nFROM \\n sample_data.nyc.taxi\\nWHERE \\n trip_distance < 100\\nGROUP BY \\n bin_id\\nORDER BY \\n bin_id;\\n\", \"explanation\": \"Retrieves the frequency of trips within specified distance bins for the first 1000 rows using CASE statements.\", \"text_query\": \"What is the distribution of trip_distance? only use first 1000 rows\"}, \"error\": {\"success\": true}, \"hash_key\": \"3ba0e8041cabdeee7f140c9232eb16f7\"}"
10
+ },
11
+ {
12
+ "body": "{\"chart_data\": {\"pie\": {\"data\": [{\"x\": \"1.0\", \"y\": 90.54768674926224}, {\"x\": \"2.0\", \"y\": 4.076100072646959}, {\"x\": \"nan\", \"y\": 3.7494193315926347}, {\"x\": \"5.0\", \"y\": 0.9550169904114006}, {\"x\": \"3.0\", \"y\": 0.27749109436818514}, {\"x\": \"99.0\", \"y\": 0.2704508261862314}, {\"x\": \"4.0\", \"y\": 0.1231893214196009}, {\"x\": \"6.0\", \"y\": 0.0006456141127555825}]}}, \"table_data\": {\"RatecodeID\": [1.0, 2.0, NaN, 5.0, 3.0, 99.0, 4.0, 6.0], \"trip_count\": [2945260, 132584, 121958, 31064, 9026, 8797, 4007, 21], \"percentage\": [90.54768674926224, 4.076100072646959, 3.7494193315926347, 0.9550169904114006, 0.27749109436818514, 0.2704508261862314, 0.1231893214196009, 0.0006456141127555825]}, \"chart_config\": {\"type\": \"pie\", \"title\": \"Percentage Share of Trips by RatecodeID\", \"x_axis_label\": \"RatecodeID\", \"y_axis_label\": \"Percentage\", \"legend\": true, \"colors\": []}, \"sql_config\": {\"sql_query\": \"SELECT RatecodeID, COUNT(*) AS trip_count FROM sample_data.nyc.taxi GROUP BY RatecodeID ORDER BY trip_count DESC LIMIT 20000;\", \"explanation\": \"Counts the number of trips for each RatecodeID.\", \"text_query\": \"What is the percentage share of trips by RatecodeID? show as pie chart\"}, \"error\": {\"success\": true}, \"hash_key\": \"0efdf6fa1df25fd3116353db8630552d\"}"
13
+ },
14
+ {
15
+ "body": "{\"chart_data\": {\"bar\": {\"data\": [{\"x\": \"2\", \"y\": 2.929572542537025}, {\"x\": \"1\", \"y\": 2.579547053346901}, {\"x\": \"6\", \"y\": 0.0}]}}, \"table_data\": {\"x\": [2, 1, 6], \"y\": [2.929572542537025, 2.579547053346901, 0.0]}, \"chart_config\": {\"type\": \"bar\", \"title\": \"Average Tip Amount per VendorID\", \"x_axis_label\": \"VendorID\", \"y_axis_label\": \"Average Tip Amount\", \"legend\": false, \"colors\": []}, \"sql_config\": {\"sql_query\": \"SELECT VendorID AS x, AVG(tip_amount) AS y FROM sample_data. nyc.taxi GROUP BY VendorID ORDER BY y DESC LIMIT 20;\", \"explanation\": \"Calculates the average tip_amount for each VendorID and orders the results by the average tip amount in descending order, limiting to the top 20 VendorIDs.\", \"text_query\": \"What is the average tip_amount per VendorID?\"}, \"error\": {\"success\": true}, \"hash_key\": \"9db02b369fb12b3c3b1974d32e2cb47c\"}"
16
+ }
17
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ plotly
4
+ nbformat