sathvikk commited on
Commit
f4b5840
Β·
verified Β·
1 Parent(s): f3bf523

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +32 -52
src/streamlit_app.py CHANGED
@@ -6,69 +6,56 @@ import streamlit as st
6
  import fitz # PyMuPDF
7
  from transformers import pipeline
8
 
9
- # Security configuration
10
  st.set_page_config(
11
  page_title="PrepPal",
12
  page_icon="πŸ“˜",
13
- layout="wide",
14
- menu_items={'About': "PrepPal - AI-powered PDF summarizer"}
15
  )
16
 
17
- # Fix for 403 errors
18
  st.markdown("""
19
- <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;">
20
  """, unsafe_allow_html=True)
21
 
22
  @st.cache_resource
23
- def load_summarizer():
24
  try:
25
  return pipeline(
26
- "summarization",
27
- model="facebook/bart-large-cnn", # Reliable model
28
- device=-1 # Force CPU for Hugging Face Spaces
29
  )
30
  except Exception as e:
31
- st.error(f"Model loading failed: {str(e)}")
32
  return None
33
 
34
- def safe_extract_text(uploaded_file):
35
- """Secure PDF text extraction with temp files"""
36
  try:
37
- # First save to temporary file
38
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
39
  tmp.write(uploaded_file.getbuffer())
40
  tmp_path = tmp.name
41
 
42
- # Process from filesystem
43
  with fitz.open(tmp_path) as doc:
44
  text = "\n".join([page.get_text() for page in doc])
45
 
46
- # Clean up
47
  os.unlink(tmp_path)
48
  return text.strip()
49
  except Exception as e:
50
- st.error(f"PDF processing error: {str(e)}")
51
  return ""
52
 
53
- def summarize_text(text, model, max_chunk=1500):
54
  if not text or not model:
55
  return ""
56
 
57
- chunks = [text[i:i+max_chunk] for i in range(0, len(text), max_chunk)]
58
  summary = []
59
-
60
  for chunk in chunks:
61
- result = model(
62
- chunk,
63
- max_length=150,
64
- min_length=50,
65
- do_sample=False
66
- )
67
  summary.append(result[0]['summary_text'])
68
-
69
- return "\n".join(summary)
70
 
71
- # Main App with all tabs
72
  def main():
73
  st.title("πŸ“˜ PrepPal - Study Assistant")
74
 
@@ -76,52 +63,45 @@ def main():
76
 
77
  with tab1:
78
  st.header("PDF Summarizer")
79
- st.write("Upload your PDF (max 10MB) for an AI-generated summary")
80
-
81
  uploaded_file = st.file_uploader(
82
- "Choose PDF file",
83
  type=["pdf"],
84
- accept_multiple_files=False,
85
- key="pdf_uploader"
86
  )
87
 
88
  if uploaded_file:
89
- if uploaded_file.size > 10_000_000:
90
- st.error("File too large (max 10MB)")
91
  else:
92
- with st.spinner("Extracting text..."):
93
- text = safe_extract_text(uploaded_file)
94
 
95
  if text:
96
  with st.expander("View extracted text"):
97
- st.text(text[:1000] + "...")
98
 
99
  if st.button("Generate Summary"):
100
- with st.spinner("Summarizing..."):
101
- model = load_summarizer()
102
  if model:
103
- summary = summarize_text(text, model)
104
-
105
- st.subheader("AI Summary")
106
  st.write(summary)
107
-
108
  st.download_button(
109
  "Download Summary",
110
  data=summary,
111
- file_name="summary.txt",
112
- mime="text/plain"
113
  )
114
 
115
  with tab2:
116
  st.header("Ask a Question")
117
- st.info("This feature will allow you to ask questions about your documents")
118
- st.write("Coming in the next update!")
119
 
120
  with tab3:
121
- st.header("Your Feedback")
122
- feedback = st.text_area("How can we improve PrepPal?")
123
- if st.button("Submit Feedback"):
124
- st.success("Thank you! Your feedback has been recorded.")
125
 
126
  if __name__ == "__main__":
127
  main()
 
6
  import fitz # PyMuPDF
7
  from transformers import pipeline
8
 
9
+ # App configuration
10
  st.set_page_config(
11
  page_title="PrepPal",
12
  page_icon="πŸ“˜",
13
+ layout="wide"
 
14
  )
15
 
16
+ # Security headers
17
  st.markdown("""
18
+ <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline'">
19
  """, unsafe_allow_html=True)
20
 
21
  @st.cache_resource
22
+ def load_model():
23
  try:
24
  return pipeline(
25
+ "summarization",
26
+ model="Falconsai/text_summarization", # Smaller model for Spaces
27
+ device=-1 # Use CPU
28
  )
29
  except Exception as e:
30
+ st.error(f"Failed to load model: {str(e)}")
31
  return None
32
 
33
+ def process_pdf(uploaded_file):
 
34
  try:
 
35
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
36
  tmp.write(uploaded_file.getbuffer())
37
  tmp_path = tmp.name
38
 
 
39
  with fitz.open(tmp_path) as doc:
40
  text = "\n".join([page.get_text() for page in doc])
41
 
 
42
  os.unlink(tmp_path)
43
  return text.strip()
44
  except Exception as e:
45
+ st.error(f"Error processing PDF: {str(e)}")
46
  return ""
47
 
48
+ def generate_summary(text, model):
49
  if not text or not model:
50
  return ""
51
 
52
+ chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
53
  summary = []
 
54
  for chunk in chunks:
55
+ result = model(chunk, max_length=150, min_length=30, do_sample=False)
 
 
 
 
 
56
  summary.append(result[0]['summary_text'])
57
+ return " ".join(summary)
 
58
 
 
59
  def main():
60
  st.title("πŸ“˜ PrepPal - Study Assistant")
61
 
 
63
 
64
  with tab1:
65
  st.header("PDF Summarizer")
 
 
66
  uploaded_file = st.file_uploader(
67
+ "Upload PDF (max 5MB)",
68
  type=["pdf"],
69
+ accept_multiple_files=False
 
70
  )
71
 
72
  if uploaded_file:
73
+ if uploaded_file.size > 5_000_000:
74
+ st.error("File too large (max 5MB)")
75
  else:
76
+ with st.spinner("Processing PDF..."):
77
+ text = process_pdf(uploaded_file)
78
 
79
  if text:
80
  with st.expander("View extracted text"):
81
+ st.text(text[:500] + "...")
82
 
83
  if st.button("Generate Summary"):
84
+ with st.spinner("Creating summary..."):
85
+ model = load_model()
86
  if model:
87
+ summary = generate_summary(text, model)
88
+ st.subheader("Summary")
 
89
  st.write(summary)
 
90
  st.download_button(
91
  "Download Summary",
92
  data=summary,
93
+ file_name="summary.txt"
 
94
  )
95
 
96
  with tab2:
97
  st.header("Ask a Question")
98
+ st.info("This feature is coming soon!")
 
99
 
100
  with tab3:
101
+ st.header("Feedback")
102
+ feedback = st.text_area("Your suggestions")
103
+ if st.button("Submit"):
104
+ st.success("Thank you for your feedback!")
105
 
106
  if __name__ == "__main__":
107
  main()