Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,9 @@ import fitz # PyMuPDF
|
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain_community.vectorstores import FAISS
|
7 |
-
from langchain.chains import RetrievalQA
|
8 |
-
from langchain_community.llms import HuggingFaceEndpoint
|
9 |
import requests
|
10 |
import os
|
11 |
-
import
|
12 |
|
13 |
# Page configuration
|
14 |
st.set_page_config(
|
@@ -68,17 +66,22 @@ st.markdown("""
|
|
68 |
animation: fadeIn 0.5s ease-in-out;
|
69 |
}
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
@keyframes fadeIn {
|
72 |
from { opacity: 0; }
|
73 |
to { opacity: 1; }
|
74 |
}
|
75 |
-
|
76 |
-
.spinner {
|
77 |
-
display: flex;
|
78 |
-
justify-content: center;
|
79 |
-
align-items: center;
|
80 |
-
height: 100px;
|
81 |
-
}
|
82 |
</style>
|
83 |
""", unsafe_allow_html=True)
|
84 |
|
@@ -97,10 +100,11 @@ if 'history' not in st.session_state:
|
|
97 |
def load_embedding_model():
|
98 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
99 |
|
100 |
-
def query_hf_inference_api(prompt,
|
101 |
-
"""Query Hugging Face Inference API
|
102 |
-
|
103 |
-
|
|
|
104 |
payload = {
|
105 |
"inputs": prompt,
|
106 |
"parameters": {
|
@@ -112,11 +116,36 @@ def query_hf_inference_api(prompt, model="google/flan-t5-xxl", max_tokens=200):
|
|
112 |
|
113 |
try:
|
114 |
response = requests.post(API_URL, headers=headers, json=payload)
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
except Exception as e:
|
119 |
-
st.error(f"
|
120 |
return ""
|
121 |
|
122 |
def process_pdf(pdf_file):
|
@@ -201,6 +230,8 @@ def generate_qa_for_chapter(start_page, end_page):
|
|
201 |
question = query_hf_inference_api(prompt, max_tokens=100)
|
202 |
if question and not question.endswith("?"):
|
203 |
question += "?"
|
|
|
|
|
204 |
else: # Generate answer
|
205 |
if qa_pairs: # Ensure we have a question to answer
|
206 |
prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
|
@@ -212,6 +243,20 @@ def generate_qa_for_chapter(start_page, end_page):
|
|
212 |
# App header
|
213 |
st.markdown("<h1 class='header'>π PDF Study Assistant</h1>", unsafe_allow_html=True)
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
# PDF Upload Section
|
216 |
with st.container():
|
217 |
st.subheader("π€ Upload Your Textbook/Notes")
|
@@ -246,11 +291,12 @@ if pdf_file:
|
|
246 |
if user_question:
|
247 |
with st.spinner("π€ Thinking..."):
|
248 |
answer, docs = ask_question(user_question)
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
|
|
254 |
|
255 |
# Chapter Q&A Generation Tab
|
256 |
elif selected_tab == "Generate Chapter Q&A":
|
@@ -293,6 +339,6 @@ if pdf_file:
|
|
293 |
st.markdown("---")
|
294 |
st.markdown("""
|
295 |
<div style="text-align: center; padding: 20px;">
|
296 |
-
Built with β€οΈ for students | PDF Study Assistant
|
297 |
</div>
|
298 |
""", unsafe_allow_html=True)
|
|
|
4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain_community.vectorstores import FAISS
|
|
|
|
|
7 |
import requests
|
8 |
import os
|
9 |
+
import time
|
10 |
|
11 |
# Page configuration
|
12 |
st.set_page_config(
|
|
|
66 |
animation: fadeIn 0.5s ease-in-out;
|
67 |
}
|
68 |
|
69 |
+
.error {
|
70 |
+
background-color: #ffebee;
|
71 |
+
border-left: 4px solid #f44336;
|
72 |
+
padding: 10px;
|
73 |
+
}
|
74 |
+
|
75 |
+
.info {
|
76 |
+
background-color: #e3f2fd;
|
77 |
+
border-left: 4px solid #2196f3;
|
78 |
+
padding: 10px;
|
79 |
+
}
|
80 |
+
|
81 |
@keyframes fadeIn {
|
82 |
from { opacity: 0; }
|
83 |
to { opacity: 1; }
|
84 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
</style>
|
86 |
""", unsafe_allow_html=True)
|
87 |
|
|
|
100 |
def load_embedding_model():
|
101 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
102 |
|
103 |
+
def query_hf_inference_api(prompt, max_tokens=200):
|
104 |
+
"""Query Hugging Face Inference API with error handling and retry"""
|
105 |
+
MODEL = "google/flan-t5-large" # Smaller, freely accessible model
|
106 |
+
API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
|
107 |
+
headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
|
108 |
payload = {
|
109 |
"inputs": prompt,
|
110 |
"parameters": {
|
|
|
116 |
|
117 |
try:
|
118 |
response = requests.post(API_URL, headers=headers, json=payload)
|
119 |
+
|
120 |
+
if response.status_code == 200:
|
121 |
+
result = response.json()
|
122 |
+
return result[0]['generated_text'] if result else ""
|
123 |
+
|
124 |
+
elif response.status_code == 403:
|
125 |
+
st.error("403 Forbidden: Please check your Hugging Face API token and model access")
|
126 |
+
st.markdown("""
|
127 |
+
<div class="info">
|
128 |
+
<h4>How to fix this:</h4>
|
129 |
+
<ol>
|
130 |
+
<li>Get your free Hugging Face token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
|
131 |
+
<li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
|
132 |
+
<li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">https://huggingface.co/google/flan-t5-large</a></li>
|
133 |
+
</ol>
|
134 |
+
</div>
|
135 |
+
""", unsafe_allow_html=True)
|
136 |
+
return ""
|
137 |
+
|
138 |
+
elif response.status_code == 429:
|
139 |
+
st.warning("Rate limit exceeded. Waiting and retrying...")
|
140 |
+
time.sleep(5) # Wait 5 seconds before retrying
|
141 |
+
return query_hf_inference_api(prompt, max_tokens)
|
142 |
+
|
143 |
+
else:
|
144 |
+
st.error(f"API Error {response.status_code}: {response.text[:200]}")
|
145 |
+
return ""
|
146 |
+
|
147 |
except Exception as e:
|
148 |
+
st.error(f"Connection error: {str(e)}")
|
149 |
return ""
|
150 |
|
151 |
def process_pdf(pdf_file):
|
|
|
230 |
question = query_hf_inference_api(prompt, max_tokens=100)
|
231 |
if question and not question.endswith("?"):
|
232 |
question += "?"
|
233 |
+
if question: # Only add if we got a valid question
|
234 |
+
qa_pairs.append((question, ""))
|
235 |
else: # Generate answer
|
236 |
if qa_pairs: # Ensure we have a question to answer
|
237 |
prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
|
|
|
243 |
# App header
|
244 |
st.markdown("<h1 class='header'>π PDF Study Assistant</h1>", unsafe_allow_html=True)
|
245 |
|
246 |
+
# API Token Instructions
|
247 |
+
if not os.getenv("HF_API_KEY"):
|
248 |
+
st.markdown("""
|
249 |
+
<div class="info">
|
250 |
+
<h4>Setup Required:</h4>
|
251 |
+
<p>This app requires a free Hugging Face API token to work:</p>
|
252 |
+
<ol>
|
253 |
+
<li>Get your token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
|
254 |
+
<li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
|
255 |
+
<li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">google/flan-t5-large</a></li>
|
256 |
+
</ol>
|
257 |
+
</div>
|
258 |
+
""", unsafe_allow_html=True)
|
259 |
+
|
260 |
# PDF Upload Section
|
261 |
with st.container():
|
262 |
st.subheader("π€ Upload Your Textbook/Notes")
|
|
|
291 |
if user_question:
|
292 |
with st.spinner("π€ Thinking..."):
|
293 |
answer, docs = ask_question(user_question)
|
294 |
+
if answer:
|
295 |
+
st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
|
296 |
+
|
297 |
+
with st.expander("π See source passages"):
|
298 |
+
for i, doc in enumerate(docs):
|
299 |
+
st.markdown(f"**Passage {i+1}:** {doc.page_content[:500]}...")
|
300 |
|
301 |
# Chapter Q&A Generation Tab
|
302 |
elif selected_tab == "Generate Chapter Q&A":
|
|
|
339 |
st.markdown("---")
|
340 |
st.markdown("""
|
341 |
<div style="text-align: center; padding: 20px;">
|
342 |
+
Built with β€οΈ for students | PDF Study Assistant v3.0
|
343 |
</div>
|
344 |
""", unsafe_allow_html=True)
|