Shad0ws commited on
Commit
0f2a836
·
1 Parent(s): 044b652

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -69
app.py CHANGED
@@ -12,91 +12,108 @@ from IPython.display import HTML, display
12
  import uuid
13
  import base64
14
  import json
 
 
 
 
15
 
16
- st.title("Ask Questions to Data")
17
- st.markdown("##### Demo Application powered by sketch package")
18
- st.sidebar.image("https://avatars.githubusercontent.com/u/106505054?s=200&v=4", width=100)
19
- st.sidebar.title("About the Package used")
20
- st.sidebar.markdown("##### Sketch is an AI code-writing assistant for pandas users that understands the context of the data, greatly improving the relevance of suggestions. Sketch is usable in seconds and doesn't require adding a plugin to IDE.")
 
21
 
22
- st.sidebar.title("How it works:")
23
- st.sidebar.markdown("##### Sketch uses efficient approximation algorithms (data sketches) to quickly summarize the data, and feed that information into language models. Right now, it does this by summarizing the columns and writing these summary statistics as additional context to be used by the code-writing prompt. In the future, the dev team hopes to feed these sketches directly into custom made data + language foundation models to get more accurate results.")
24
 
25
- st.sidebar.title("Usecases:")
26
- st.sidebar.markdown("##### Data Catalogging: General tagging (eg. PII identification), Metadata generation (names and descriptions)")
27
- st.sidebar.markdown("##### Data Engineering: Data cleaning and masking (compliance), Derived feature creation and extraction")
28
- st.sidebar.markdown("##### Data Analysis: Data questions, Data Visualizations")
29
 
30
- st.sidebar.caption("Github Repository: https://github.com/approximatelabs/sketch")
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
 
 
33
 
34
 
 
 
35
 
36
- def upload_data_file():
37
- st.session_state.file = None
38
- st.session_state.df = None
39
- file = st.file_uploader(
40
- label='Upload Data File',
41
- type=["csv","xlsx","xls"]
42
- )
43
- if file is not None:
44
- load_data(file)
45
-
46
-
47
- def load_data(file):
48
- st.session_state.file = file
49
- df = pd.read_csv(file)
50
- st.session_state.df = df
51
-
52
-
53
- # Configure session state
54
- if 'file' not in st.session_state:
55
- st.session_state.file = None
56
- if 'df' not in st.session_state:
57
- st.session_state.df = None
58
 
59
-
60
- if st.session_state.file is None:
61
- upload_data_file()
62
 
 
 
63
 
64
- def to_b64(data):
65
- return base64.b64encode(json.dumps(data).encode("utf-8")).decode("utf-8")
 
 
 
 
66
 
67
- if st.session_state.file is not None:
68
- st.session_state.file.seek(0)
 
 
 
 
69
 
70
- df = pd.read_csv(st.session_state.file)
71
 
72
- st.header("Uploaded Data:")
73
- st.dataframe(df)
 
 
 
 
 
 
 
74
 
75
- with st.form("my_form"):
76
- request_type = st.radio(
77
- label="Selection Panel",
78
- options=['Ask question about the data', 'Generate codes for new analysis'],
79
- index=0
80
- )
81
 
82
- request = st.text_area(
83
- label="Input your request",
84
- value="",
85
- height=50,
86
- max_chars=500
87
- )
88
 
89
- submitted = st.form_submit_button("Submit")
90
-
91
- if submitted:
92
- if request_type== 'Ask question about the data':
93
- if request != "":
94
- answer = df.sketch.ask(request, call_display=False)
95
- st.code(answer)
96
- else:
97
- if request != "":
98
- answer1 = df.sketch.howto(request, call_display=False)
99
- st.code(answer1)
100
 
101
- else:
102
- st.write('Please upload data file in order to ask questions to it.')
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  import uuid
13
  import base64
14
  import json
15
+ import tkinter
16
+ import matplotlib
17
+ matplotlib.use('TkAgg')
18
+ import matplotlib.pyplot as plt
19
 
20
+ col1, mid, col2= st.columns([1,1,20])
21
+ with col1:
22
+ st.image("https://avatars.githubusercontent.com/u/106505054?s=200&v=4", width=70)
23
+ with col2:
24
+ st.markdown("# Ask Questions to Data")
25
+ st.markdown("###### Streamlit implementation of sketch package in pandas ")
26
 
 
 
27
 
28
+ tab1, tab2 = st.tabs(["Ask Questions to Data", "About the Application"])
 
 
 
29
 
30
+ with tab1:
31
 
32
+ def upload_data_file():
33
+ st.session_state.file = None
34
+ st.session_state.df = None
35
+ file = st.file_uploader(
36
+ label='Upload Data File',
37
+ type=["csv","xlsx","xls"]
38
+ )
39
+ if file is not None:
40
+ load_data(file)
41
+
42
+
43
+ def load_data(file):
44
+ st.session_state.file = file
45
+ df = pd.read_csv(file)
46
+ st.session_state.df = df
47
+
48
+
49
+ # Configure session state
50
+ if 'file' not in st.session_state:
51
+ st.session_state.file = None
52
+ if 'df' not in st.session_state:
53
+ st.session_state.df = None
54
 
55
+
56
+ if st.session_state.file is None:
57
+ upload_data_file()
58
 
59
 
60
+ def to_b64(data):
61
+ return base64.b64encode(json.dumps(data).encode("utf-8")).decode("utf-8")
62
 
63
+ if st.session_state.file is not None:
64
+ st.session_state.file.seek(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ df = pd.read_csv(st.session_state.file)
 
 
67
 
68
+ st.header("Uploaded Data:")
69
+ st.dataframe(df)
70
 
71
+ with st.form("my_form"):
72
+ request_type = st.radio(
73
+ label="Selection Panel",
74
+ options=['Ask question about the data', 'Generate codes for new analysis'],
75
+ index=0
76
+ )
77
 
78
+ request = st.text_area(
79
+ label="Input your request",
80
+ value="",
81
+ height=50,
82
+ max_chars=500
83
+ )
84
 
85
+ submitted = st.form_submit_button("Submit")
86
 
87
+ if submitted:
88
+ if request_type== 'Ask question about the data':
89
+ if request != "":
90
+ answer = df.sketch.ask(request, call_display=False)
91
+ st.code(answer)
92
+ else:
93
+ if request != "":
94
+ answer1 = df.sketch.howto(request, call_display=False)
95
+ st.code(answer1)
96
 
97
+ # Plot the bar chart
 
 
 
 
 
98
 
99
+ exec(answer1)
100
+
101
+ else:
102
+ st.write('Please upload data file in order to ask questions to it.')
 
 
103
 
104
+ with tab2:
 
 
 
 
 
 
 
 
 
 
105
 
106
+ st.title("Demo video")
107
+ st.video('https://youtu.be/hDbTjWqmrJY')
108
+ st.title("About the Package used")
109
+ st.markdown("###### Sketch is an AI code-writing assistant for pandas users that understands the context of the data, greatly improving the relevance of suggestions. Sketch is usable in seconds and doesn't require adding a plugin to IDE.")
110
+
111
+ st.title("How it works:")
112
+ st.markdown("###### Sketch uses efficient approximation algorithms (data sketches) to quickly summarize the data, and feed that information into language models. Right now, it does this by summarizing the columns and writing these summary statistics as additional context to be used by the code-writing prompt. In the future, the dev team hopes to feed these sketches directly into custom made data + language foundation models to get more accurate results.")
113
+
114
+ st.title("Usecases:")
115
+ st.markdown("###### --- Data Catalogging: General tagging (eg. PII identification), Metadata generation (names and descriptions)")
116
+ st.markdown("###### --- Data Engineering: Data cleaning and masking (compliance), Derived feature creation and extraction")
117
+ st.markdown("###### --- Data Analysis: Data questions, Data Visualizations")
118
+
119
+ st.caption("More details available in Github Repository: https://github.com/approximatelabs/sketch")