sunbal7 commited on
Commit
2f15a0b
Β·
verified Β·
1 Parent(s): 351a569

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -171
app.py CHANGED
@@ -1,179 +1,97 @@
1
  import streamlit as st
2
- import os
3
- import tempfile
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.vectorstores import FAISS
7
- from langchain_community.embeddings import HuggingFaceEmbeddings
8
- from langchain_community.chat_models import ChatOllama
9
- from langchain.chains import RetrievalQA
10
- from langchain.prompts import PromptTemplate
11
- from langchain_core.runnables import RunnablePassthrough
12
- from langchain_core.output_parsers import StrOutputParser
13
- import base64
14
-
15
- # Set page config
16
- st.set_page_config(
17
- page_title="EduQuery - Smart PDF Assistant",
18
- page_icon="πŸ“š",
19
- layout="wide",
20
- initial_sidebar_state="collapsed"
 
 
21
  )
22
 
23
- # Custom CSS for colorful UI
24
- def local_css(file_name):
25
- with open(file_name) as f:
26
- st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
27
-
28
- local_css("style.css")
29
-
30
- # Header with gradient
31
- st.markdown("""
32
- <div class="header">
33
- <h1>πŸ“š EduQuery</h1>
34
- <p>Smart PDF Assistant for Students</p>
35
- </div>
36
- """, unsafe_allow_html=True)
37
-
38
- # Initialize session state
39
- if "vector_store" not in st.session_state:
40
- st.session_state.vector_store = None
41
- if "messages" not in st.session_state:
42
- st.session_state.messages = []
43
-
44
- # Model selection
45
- MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following
46
-
47
- # PDF Processing
48
- def process_pdf(pdf_file):
49
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
50
- tmp_file.write(pdf_file.getvalue())
51
- tmp_path = tmp_file.name
52
-
53
- loader = PyPDFLoader(tmp_path)
54
- docs = loader.load()
55
-
56
- text_splitter = RecursiveCharacterTextSplitter(
57
- chunk_size=1000,
58
- chunk_overlap=200,
59
- length_function=len
60
- )
61
- chunks = text_splitter.split_documents(docs)
62
-
63
- embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
64
- vector_store = FAISS.from_documents(chunks, embeddings)
65
-
66
- os.unlink(tmp_path)
67
- return vector_store
68
-
69
- # RAG Setup
70
- def setup_qa_chain(vector_store):
71
- llm = ChatOllama(model=MODEL_NAME, temperature=0.3)
72
-
73
- custom_prompt = """
74
- You are an expert academic assistant. Answer the question based only on the following context:
75
- {context}
76
-
77
- Question: {question}
78
-
79
- Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document".
80
- """
81
-
82
- prompt = PromptTemplate(
83
- template=custom_prompt,
84
- input_variables=["context", "question"]
85
- )
86
-
87
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
88
-
89
- qa_chain = (
90
- {"context": retriever, "question": RunnablePassthrough()}
91
- | prompt
92
- | llm
93
- | StrOutputParser()
94
- )
95
-
96
- return qa_chain
97
-
98
- # Generate questions from chapter
99
- def generate_chapter_questions(vector_store, chapter_title):
100
- llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
101
-
102
- prompt = PromptTemplate(
103
- input_variables=["chapter_title"],
104
- template="""
105
- You are an expert educator. Generate 5 important questions and answers about '{chapter_title}'
106
- that would help students understand key concepts. Format as:
107
-
108
- Q1: [Question]
109
- A1: [Answer with page reference]
110
-
111
- Q2: [Question]
112
- A2: [Answer with page reference]
113
- ..."""
114
  )
115
-
116
- chain = prompt | llm | StrOutputParser()
117
- return chain.invoke({"chapter_title": chapter_title})
118
 
119
- # File upload section
120
- st.subheader("πŸ“€ Upload Your Textbook/Notes")
121
- uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- if uploaded_file:
124
- with st.spinner("Processing PDF..."):
125
- st.session_state.vector_store = process_pdf(uploaded_file)
126
- st.success("PDF processed successfully! You can now ask questions.")
127
-
128
- # Main content columns
129
- col1, col2 = st.columns([1, 2])
130
-
131
- # Chapter-based Q&A Generator
132
- with col1:
133
- st.subheader("πŸ” Generate Chapter Questions")
134
- chapter_title = st.text_input("Enter chapter title/section name:")
135
-
136
- if st.button("Generate Q&A") and chapter_title and st.session_state.vector_store:
137
- with st.spinner(f"Generating questions about {chapter_title}..."):
138
- questions = generate_chapter_questions(
139
- st.session_state.vector_store,
140
- chapter_title
141
- )
142
- st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True)
143
- elif chapter_title and not st.session_state.vector_store:
144
- st.warning("Please upload a PDF first")
145
-
146
- # Chat interface
147
- with col2:
148
- st.subheader("πŸ’¬ Ask Anything About the Document")
149
-
150
- for message in st.session_state.messages:
151
- with st.chat_message(message["role"]):
152
- st.markdown(message["content"])
153
-
154
- if prompt := st.chat_input("Your question..."):
155
- if not st.session_state.vector_store:
156
- st.warning("Please upload a PDF first")
157
- st.stop()
158
-
159
- st.session_state.messages.append({"role": "user", "content": prompt})
160
- with st.chat_message("user"):
161
- st.markdown(prompt)
162
-
163
- with st.chat_message("assistant"):
164
- with st.spinner("Thinking..."):
165
- qa_chain = setup_qa_chain(st.session_state.vector_store)
166
- response = qa_chain.invoke(prompt)
167
- st.markdown(response)
168
- st.session_state.messages.append({"role": "assistant", "content": response})
169
-
170
- # Footer
171
- st.markdown("---")
172
- st.markdown(
173
- """
174
- <div class="footer">
175
- <p>EduQuery - Helping students learn smarter β€’ Powered by Nous-Hermes2 and LangChain</p>
176
- </div>
177
- """,
178
- unsafe_allow_html=True
179
- )
 
1
  import streamlit as st
 
 
2
  from langchain_community.document_loaders import PyPDFLoader
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.llms import HuggingFaceHub
8
+ from langchain.memory import ConversationBufferMemory
9
+ import os
10
+
11
+ # App title and color theme
12
+ st.set_page_config(page_title="πŸ“˜ PDF Q&A Agent", layout="centered", page_icon="πŸ“˜")
13
+
14
+ st.markdown(
15
+ \"\"\"
16
+ <div style="background-color:#E3E8FF;padding:10px;border-radius:10px">
17
+ <h2 style="color:#3C3C88;text-align:center">πŸ“˜ Student PDF Assistant</h2>
18
+ <p style="color:#444;text-align:center">Ask questions from your uploaded PDF and generate Q&A for chapters!</p>
19
+ </div>
20
+ \"\"\", unsafe_allow_html=True
21
  )
22
 
23
+ # Upload PDF
24
+ uploaded_file = st.file_uploader("πŸ“Ž Upload your PDF file", type=["pdf"])
25
+
26
+ if uploaded_file:
27
+ # Save PDF temporarily
28
+ with open("uploaded.pdf", "wb") as f:
29
+ f.write(uploaded_file.read())
30
+
31
+ st.success("βœ… PDF uploaded successfully!")
32
+
33
+ # Load and split PDF
34
+ loader = PyPDFLoader("uploaded.pdf")
35
+ pages = loader.load_and_split()
36
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
37
+ chunks = text_splitter.split_documents(pages)
38
+
39
+ # Embedding
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
+ vectordb = FAISS.from_documents(chunks, embeddings)
42
+
43
+ # Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM)
44
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
45
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500})
46
+
47
+ # Memory and Chain
48
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
49
+ qa_chain = ConversationalRetrievalChain.from_llm(
50
+ llm, retriever=vectordb.as_retriever(), memory=memory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  )
 
 
 
52
 
53
+ # Chat Interface
54
+ st.markdown("---")
55
+ st.markdown("πŸ’¬ **Ask a question from the PDF:**")
56
+
57
+ if "chat_history" not in st.session_state:
58
+ st.session_state.chat_history = []
59
+
60
+ question = st.text_input("Type your question here...", key="user_input")
61
+
62
+ if question:
63
+ result = qa_chain.run(question)
64
+ st.session_state.chat_history.append(("You", question))
65
+ st.session_state.chat_history.append(("Bot", result))
66
+
67
+ # Show chat history
68
+ for sender, msg in st.session_state.chat_history[::-1]:
69
+ st.markdown(f"**{sender}:** {msg}")
70
+
71
+ # Question Generation Button
72
+ st.markdown("---")
73
+ if st.button("πŸ“š Generate Q&A from all chapters"):
74
+ st.info("Generating questions and answers from the content...")
75
+ questions = [
76
+ "What is the main idea of this chapter?",
77
+ "What are the key points discussed?",
78
+ "Can you summarize this section?",
79
+ "Are there any definitions or terms introduced?"
80
+ ]
81
+ for i, chunk in enumerate(chunks[:3]): # Limit to first 3 chunks for demo
82
+ st.markdown(f"**Chapter Section {i+1}:**")
83
+ for q in questions:
84
+ answer = llm.invoke(q + "\\n" + chunk.page_content[:1000])
85
+ st.markdown(f"**Q:** {q}")
86
+ st.markdown(f"**A:** {answer}")
87
+ st.markdown("---")
88
+
89
+ """
90
+
91
+ # Save both files to /mnt/data for user download or deployment
92
+ with open("/mnt/data/requirements.txt", "w") as f:
93
+ f.write(requirements_txt.strip())
94
+
95
+ with open("/mnt/data/app.py", "w") as f:
96
+ f.write(app_py.strip())
97