Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -105,15 +105,30 @@ def init_vector_search() -> MongoDBAtlasVectorSearch:
|
|
105 |
st.error("β Failed to connect to MongoDB Atlas manually")
|
106 |
st.error(str(e))
|
107 |
raise e
|
108 |
-
# ===================
|
109 |
-
def
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
# =================== Format Retrieved Chunks ===================
|
118 |
def format_docs(docs: List[Document]) -> str:
|
119 |
return "\n\n".join(doc.page_content or doc.metadata.get("content", "") for doc in docs)
|
@@ -151,20 +166,41 @@ def main():
|
|
151 |
st.set_page_config(page_title="Grant Buddy RAG", page_icon="π€")
|
152 |
st.title("π€ Grant Buddy: Grant-Writing Assistant")
|
153 |
|
154 |
-
uploaded_file = st.file_uploader("Upload PDF or TXT for extra context (optional)", type=["pdf", "txt"])
|
155 |
uploaded_text = ""
|
|
|
|
|
|
|
|
|
156 |
if uploaded_file:
|
157 |
if uploaded_file.name.endswith(".pdf"):
|
158 |
reader = PdfReader(uploaded_file)
|
159 |
uploaded_text = "\n".join([page.extract_text() for page in reader.pages])
|
160 |
-
questions = extract_questions_from_text(uploaded_text)
|
161 |
elif uploaded_file.name.endswith(".txt"):
|
162 |
uploaded_text = uploaded_file.read().decode("utf-8")
|
163 |
-
questions = extract_questions_from_text(uploaded_text)
|
164 |
|
|
|
|
|
|
|
|
|
|
|
165 |
retriever = init_vector_search().as_retriever(search_kwargs={"k": 10, "score_threshold": 0.75})
|
166 |
rag_chain = get_rag_chain(retriever)
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
query = st.text_input("Ask a grant-related question")
|
169 |
if st.button("Submit"):
|
170 |
if not query:
|
@@ -184,6 +220,7 @@ def main():
|
|
184 |
st.markdown("---")
|
185 |
|
186 |
|
|
|
187 |
if __name__ == "__main__":
|
188 |
main()
|
189 |
|
|
|
105 |
st.error("β Failed to connect to MongoDB Atlas manually")
|
106 |
st.error(str(e))
|
107 |
raise e
|
108 |
+
# =================== Question/Headers Extraction ===================
|
109 |
+
def extract_questions_and_headers(text: str) -> List[str]:
|
110 |
+
header_patterns = [
|
111 |
+
r'\d+\.\s+\*\*([^\*]+)\*\*',
|
112 |
+
r'\*\*([^*]+)\*\*',
|
113 |
+
r'^([A-Z][^a-z]*[A-Z])$',
|
114 |
+
r'^([A-Z][A-Za-z\s]{3,})$',
|
115 |
+
r'^[A-Z][A-Za-z\s]+:$'
|
116 |
+
]
|
117 |
+
question_patterns = [
|
118 |
+
r'^.+\?$',
|
119 |
+
r'^\*?Please .+',
|
120 |
+
r'^How .+',
|
121 |
+
r'^What .+',
|
122 |
+
r'^Describe .+',
|
123 |
+
]
|
124 |
+
combined_header_re = re.compile("|".join(header_patterns), re.MULTILINE)
|
125 |
+
combined_question_re = re.compile("|".join(question_patterns), re.MULTILINE)
|
126 |
+
|
127 |
+
headers = [match for group in combined_header_re.findall(text) for match in group if match]
|
128 |
+
questions = combined_question_re.findall(text)
|
129 |
+
|
130 |
+
return headers + questions
|
131 |
+
|
132 |
# =================== Format Retrieved Chunks ===================
|
133 |
def format_docs(docs: List[Document]) -> str:
|
134 |
return "\n\n".join(doc.page_content or doc.metadata.get("content", "") for doc in docs)
|
|
|
166 |
st.set_page_config(page_title="Grant Buddy RAG", page_icon="π€")
|
167 |
st.title("π€ Grant Buddy: Grant-Writing Assistant")
|
168 |
|
|
|
169 |
uploaded_text = ""
|
170 |
+
questions = []
|
171 |
+
|
172 |
+
# Upload Section
|
173 |
+
uploaded_file = st.file_uploader("Upload PDF or TXT for extra context (optional)", type=["pdf", "txt"])
|
174 |
if uploaded_file:
|
175 |
if uploaded_file.name.endswith(".pdf"):
|
176 |
reader = PdfReader(uploaded_file)
|
177 |
uploaded_text = "\n".join([page.extract_text() for page in reader.pages])
|
|
|
178 |
elif uploaded_file.name.endswith(".txt"):
|
179 |
uploaded_text = uploaded_file.read().decode("utf-8")
|
|
|
180 |
|
181 |
+
# β
Extract questions or headers from uploaded text
|
182 |
+
questions = extract_questions_and_headers(uploaded_text)
|
183 |
+
st.markdown("### π Questions Extracted from Uploaded Document")
|
184 |
+
|
185 |
+
# Initialize RAG
|
186 |
retriever = init_vector_search().as_retriever(search_kwargs={"k": 10, "score_threshold": 0.75})
|
187 |
rag_chain = get_rag_chain(retriever)
|
188 |
|
189 |
+
# β
Batch Q&A from Uploaded File
|
190 |
+
if uploaded_text and questions:
|
191 |
+
answers = []
|
192 |
+
for q in questions:
|
193 |
+
full_query = f"{q}\n\nAdditional context:\n{uploaded_text}"
|
194 |
+
response = rag_chain.invoke(full_query)
|
195 |
+
answers.append({"question": q, "answer": response})
|
196 |
+
|
197 |
+
for item in answers:
|
198 |
+
st.markdown(f"### β {item['question']}")
|
199 |
+
st.markdown(f"π¬ {item['answer']}")
|
200 |
+
|
201 |
+
st.markdown("---")
|
202 |
+
|
203 |
+
# β
Manual Input
|
204 |
query = st.text_input("Ask a grant-related question")
|
205 |
if st.button("Submit"):
|
206 |
if not query:
|
|
|
220 |
st.markdown("---")
|
221 |
|
222 |
|
223 |
+
|
224 |
if __name__ == "__main__":
|
225 |
main()
|
226 |
|