Spaces:

awacke1
/

BetterThanRAGPattern

Sleeping

App Files Files Community

awacke1 commited on May 29, 2024

Commit

b9db024

verified ·

1 Parent(s): b30a340

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -9

app.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import os
 import nltk
 from nltk.corpus import stopwords
 from sklearn.feature_extraction.text import CountVectorizer
 import plotly.express as px
 def preprocess_text(text):
     # Tokenize the text and remove stopwords
     tokens = nltk.word_tokenize(text.lower())
@@ -11,10 +16,7 @@ def preprocess_text(text):
     filtered_tokens = [token for token in tokens if token not in stop_words]
     return ' '.join(filtered_tokens)
-def get_context_files(prompt):
-    # Get all .md files in the current directory
-    md_files = [file for file in os.listdir() if file.endswith('.md')]
     # Preprocess the prompt and context files
     processed_prompt = preprocess_text(prompt)
     processed_files = {}
@@ -46,11 +48,39 @@ def get_context_files(prompt):
     # Create a plotly graph showing the counts of matching words for the top ten files
     fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
     fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
-    fig.show()
     return context_prompt
-# Example usage
-prompt = "What is the importance of machine learning in healthcare?"
-context_prompt = get_context_files(prompt)
-print(context_prompt)

 import os
+import zipfile
+import streamlit as st
 import nltk
 from nltk.corpus import stopwords
 from sklearn.feature_extraction.text import CountVectorizer
 import plotly.express as px
+nltk.download('punkt')
+nltk.download('stopwords')
 def preprocess_text(text):
     # Tokenize the text and remove stopwords
     tokens = nltk.word_tokenize(text.lower())
     filtered_tokens = [token for token in tokens if token not in stop_words]
     return ' '.join(filtered_tokens)
+def get_context_files(prompt, md_files):
     # Preprocess the prompt and context files
     processed_prompt = preprocess_text(prompt)
     processed_files = {}
     # Create a plotly graph showing the counts of matching words for the top ten files
     fig = px.bar(x=[file for file, count in sorted_files[:10]], y=[count for file, count in sorted_files[:10]])
     fig.update_layout(xaxis_title='File', yaxis_title='Number of Matching Words')
+    st.plotly_chart(fig)
     return context_prompt
+# Streamlit app
+def main():
+    st.title("Context-Aware Prompt Evaluation")
+    # File upload
+    uploaded_file = st.file_uploader("Upload a zip file with .md files", type="zip")
+    if uploaded_file is not None:
+        # Unzip the uploaded file
+        with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
+            zip_ref.extractall('uploaded_files')
+        # Get the list of .md files from the uploaded directory
+        md_files = [os.path.join('uploaded_files', file) for file in os.listdir('uploaded_files') if file.endswith('.md')]
+        # Show the list of files
+        st.subheader("Uploaded Files")
+        for file in md_files:
+            st.write(file)
+        # Prompt input
+        prompt = st.session_state.get('prompt', 'What are the main use cases of generative AI in healthcare that are currently unsolved?')
+        prompt = st.text_area("Enter your prompt", value=prompt, key='prompt')
+        # Evaluate the files for the prompt
+        if st.button("Evaluate"):
+            context_prompt = get_context_files(prompt, md_files)
+            st.subheader("Context Prompt")
+            st.write(context_prompt)
+if __name__ == '__main__':
+    main()