Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import nltk
|
3 |
from nltk.corpus import stopwords
|
4 |
from sklearn.feature_extraction.text import CountVectorizer
|
5 |
import plotly.express as px
|
6 |
|
|
|
|
|
|
|
7 |
def preprocess_text(text):
|
8 |
# Tokenize the text and remove stopwords
|
9 |
tokens = nltk.word_tokenize(text.lower())
|
@@ -11,10 +16,7 @@ def preprocess_text(text):
|
|
11 |
filtered_tokens = [token for token in tokens if token not in stop_words]
|
12 |
return ' '.join(filtered_tokens)
|
13 |
|
14 |
-
def get_context_files(prompt):
|
15 |
-
# Get all .md files in the current directory
|
16 |
-
md_files = [file for file in os.listdir() if file.endswith('.md')]
|
17 |
-
|
18 |
# Preprocess the prompt and context files
|
19 |
processed_prompt = preprocess_text(prompt)
|
20 |
processed_files = {}
|
@@ -46,11 +48,39 @@ def get_context_files(prompt):
|
|
46 |
# Create a plotly graph showing the counts of matching words for the top ten files
|
47 |
fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
|
48 |
fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
|
49 |
-
|
50 |
|
51 |
return context_prompt
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import zipfile
|
3 |
+
import streamlit as st
|
4 |
import nltk
|
5 |
from nltk.corpus import stopwords
|
6 |
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
import plotly.express as px
|
8 |
|
9 |
+
nltk.download('punkt')
|
10 |
+
nltk.download('stopwords')
|
11 |
+
|
12 |
def preprocess_text(text):
|
13 |
# Tokenize the text and remove stopwords
|
14 |
tokens = nltk.word_tokenize(text.lower())
|
|
|
16 |
filtered_tokens = [token for token in tokens if token not in stop_words]
|
17 |
return ' '.join(filtered_tokens)
|
18 |
|
19 |
+
def get_context_files(prompt, md_files):
|
|
|
|
|
|
|
20 |
# Preprocess the prompt and context files
|
21 |
processed_prompt = preprocess_text(prompt)
|
22 |
processed_files = {}
|
|
|
48 |
# Create a plotly graph showing the counts of matching words for the top ten files
|
49 |
fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
|
50 |
fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
|
51 |
+
st.plotly_chart(fig)
|
52 |
|
53 |
return context_prompt
|
54 |
|
55 |
+
# Streamlit app
|
56 |
+
def main():
|
57 |
+
st.title("Context-Aware Prompt Evaluation")
|
58 |
+
|
59 |
+
# File upload
|
60 |
+
uploaded_file = st.file_uploader("Upload a zip file with .md files", type="zip")
|
61 |
+
|
62 |
+
if uploaded_file is not None:
|
63 |
+
# Unzip the uploaded file
|
64 |
+
with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
|
65 |
+
zip_ref.extractall('uploaded_files')
|
66 |
+
|
67 |
+
# Get the list of .md files from the uploaded directory
|
68 |
+
md_files = [os.path.join('uploaded_files', file) for file in os.listdir('uploaded_files') if file.endswith('.md')]
|
69 |
+
|
70 |
+
# Show the list of files
|
71 |
+
st.subheader("Uploaded Files")
|
72 |
+
for file in md_files:
|
73 |
+
st.write(file)
|
74 |
+
|
75 |
+
# Prompt input
|
76 |
+
prompt = st.session_state.get('prompt', 'What are the main use cases of generative AI in healthcare that are currently unsolved?')
|
77 |
+
prompt = st.text_area("Enter your prompt", value=prompt, key='prompt')
|
78 |
+
|
79 |
+
# Evaluate the files for the prompt
|
80 |
+
if st.button("Evaluate"):
|
81 |
+
context_prompt = get_context_files(prompt, md_files)
|
82 |
+
st.subheader("Context Prompt")
|
83 |
+
st.write(context_prompt)
|
84 |
+
|
85 |
+
if __name__ == '__main__':
|
86 |
+
main()
|