import streamlit as st from textsumm import summarizer from pdfsum import extract_text_from_pdf, summarize_text, split_text_into_chunks from pdfpass import remove_pdf_password from papersearch import fetch_papers, filter_papers_by_year from io import BytesIO from datetime import datetime from pypdf import PdfReader, PdfWriter # Streamlit App Config st.set_page_config(page_title="PDF Tools Suite", page_icon="📄", layout="wide") # Sidebar Navigation st.sidebar.title("📄 PDF Tools Suite") page = st.sidebar.radio("Select a tool", ["Text Summarizer", "PDF Summarizer", "PDF Password Remover", "Research Paper Search", "PDF Merger", "PDF Splitter", "PDF to Text Converter"]) # Tool: Text Summarizer if page == "Text Summarizer": st.title("📝 Text Summarizer") user_input = st.text_area("Enter text to summarize") if st.button("Summarize"): summary = summarizer(user_input, max_length=130, min_length=30, do_sample=False) st.subheader("Summary") st.write(summary[0]["summary_text"]) # Tool: PDF Summarizer elif page == "PDF Summarizer": st.title("📜 PDF Summarizer") uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"]) if uploaded_file is not None: pdf_text = extract_text_from_pdf(uploaded_file) chunks = split_text_into_chunks(pdf_text) summaries = summarize_text(chunks) full_summary = " ".join(summaries) st.subheader("Summary") st.write(full_summary) # Tool: PDF Password Remover elif page == "PDF Password Remover": st.title("🔑 Remove PDF Password") uploaded_file = st.file_uploader("Choose a password-protected PDF", type=["pdf"]) password = st.text_input("Enter the PDF password", type="password") if uploaded_file and password and st.button("Remove Password"): output = remove_pdf_password(uploaded_file, password) if isinstance(output, BytesIO): st.success("Password removed successfully!") st.download_button("Download PDF", data=output, file_name="unlocked_pdf.pdf", mime="application/pdf") else: st.error(f"Error: {output}") # Tool: Research Paper Search elif page == "Research Paper Search": st.title("🔍 Research Paper Search (arXiv)") query = st.text_input("Enter topic or keywords", placeholder="e.g., machine learning") max_results = st.slider("Number of results", 1, 50, 10) col1, col2 = st.columns(2) with col1: start_year = st.number_input("Start Year", min_value=1900, max_value=datetime.now().year, value=2000) with col2: end_year = st.number_input("End Year", min_value=1900, max_value=datetime.now().year, value=datetime.now().year) if st.button("Search"): papers = fetch_papers(query, max_results) papers_filtered = filter_papers_by_year(papers, start_year, end_year) if papers_filtered: for idx, paper in enumerate(papers_filtered, start=1): st.write(f"### {idx}. {paper['title']}") st.write(f"**Authors**: {', '.join(paper['authors'])}") st.write(f"**Published**: {paper['published']}") st.write(f"[Read More]({paper['link']})") st.write("---") else: st.warning("No papers found in the selected range.") # Tool: PDF Merger elif page == "PDF Merger": st.title("📎 Merge Multiple PDFs") uploaded_files = st.file_uploader("Upload multiple PDF files", type=["pdf"], accept_multiple_files=True) if uploaded_files and st.button("Merge PDFs"): pdf_writer = PdfWriter() for file in uploaded_files: pdf_reader = PdfReader(file) for page in pdf_reader.pages: pdf_writer.add_page(page) output = BytesIO() pdf_writer.write(output) output.seek(0) st.download_button("Download Merged PDF", data=output, file_name="merged.pdf", mime="application/pdf") # Tool: PDF Splitter elif page == "PDF Splitter": st.title("✂️ Split PDF into Pages") uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: pdf_reader = PdfReader(uploaded_file) for i, page in enumerate(pdf_reader.pages): pdf_writer = PdfWriter() pdf_writer.add_page(page) output = BytesIO() pdf_writer.write(output) output.seek(0) st.download_button(f"Download Page {i+1}", data=output, file_name=f"page_{i+1}.pdf", mime="application/pdf") # Tool: PDF to Text Converter elif page == "PDF to Text Converter": st.title("📜 Extract Text from PDF") uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: pdf_text = extract_text_from_pdf(uploaded_file) st.text_area("Extracted Text", pdf_text, height=300)