awacke1 commited on
Commit
b9db024
·
verified ·
1 Parent(s): b30a340

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -9
app.py CHANGED
@@ -1,9 +1,14 @@
1
  import os
 
 
2
  import nltk
3
  from nltk.corpus import stopwords
4
  from sklearn.feature_extraction.text import CountVectorizer
5
  import plotly.express as px
6
 
 
 
 
7
  def preprocess_text(text):
8
  # Tokenize the text and remove stopwords
9
  tokens = nltk.word_tokenize(text.lower())
@@ -11,10 +16,7 @@ def preprocess_text(text):
11
  filtered_tokens = [token for token in tokens if token not in stop_words]
12
  return ' '.join(filtered_tokens)
13
 
14
- def get_context_files(prompt):
15
- # Get all .md files in the current directory
16
- md_files = [file for file in os.listdir() if file.endswith('.md')]
17
-
18
  # Preprocess the prompt and context files
19
  processed_prompt = preprocess_text(prompt)
20
  processed_files = {}
@@ -46,11 +48,39 @@ def get_context_files(prompt):
46
  # Create a plotly graph showing the counts of matching words for the top ten files
47
  fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
48
  fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
49
- fig.show()
50
 
51
  return context_prompt
52
 
53
- # Example usage
54
- prompt = "What is the importance of machine learning in healthcare?"
55
- context_prompt = get_context_files(prompt)
56
- print(context_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import zipfile
3
+ import streamlit as st
4
  import nltk
5
  from nltk.corpus import stopwords
6
  from sklearn.feature_extraction.text import CountVectorizer
7
  import plotly.express as px
8
 
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+
12
  def preprocess_text(text):
13
  # Tokenize the text and remove stopwords
14
  tokens = nltk.word_tokenize(text.lower())
 
16
  filtered_tokens = [token for token in tokens if token not in stop_words]
17
  return ' '.join(filtered_tokens)
18
 
19
+ def get_context_files(prompt, md_files):
 
 
 
20
  # Preprocess the prompt and context files
21
  processed_prompt = preprocess_text(prompt)
22
  processed_files = {}
 
48
  # Create a plotly graph showing the counts of matching words for the top ten files
49
  fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
50
  fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
51
+ st.plotly_chart(fig)
52
 
53
  return context_prompt
54
 
55
+ # Streamlit app
56
+ def main():
57
+ st.title("Context-Aware Prompt Evaluation")
58
+
59
+ # File upload
60
+ uploaded_file = st.file_uploader("Upload a zip file with .md files", type="zip")
61
+
62
+ if uploaded_file is not None:
63
+ # Unzip the uploaded file
64
+ with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
65
+ zip_ref.extractall('uploaded_files')
66
+
67
+ # Get the list of .md files from the uploaded directory
68
+ md_files = [os.path.join('uploaded_files', file) for file in os.listdir('uploaded_files') if file.endswith('.md')]
69
+
70
+ # Show the list of files
71
+ st.subheader("Uploaded Files")
72
+ for file in md_files:
73
+ st.write(file)
74
+
75
+ # Prompt input
76
+ prompt = st.session_state.get('prompt', 'What are the main use cases of generative AI in healthcare that are currently unsolved?')
77
+ prompt = st.text_area("Enter your prompt", value=prompt, key='prompt')
78
+
79
+ # Evaluate the files for the prompt
80
+ if st.button("Evaluate"):
81
+ context_prompt = get_context_files(prompt, md_files)
82
+ st.subheader("Context Prompt")
83
+ st.write(context_prompt)
84
+
85
+ if __name__ == '__main__':
86
+ main()