import streamlit as st import transformers import altair as alt import pandas as pd import streamlit_authenticator as stauth from difflib import SequenceMatcher # ------------------------------ # User Authentication Setup # ------------------------------ # Sample configuration for authentication config = { 'credentials': { 'usernames': { 'demo_user': { 'name': 'Demo User', 'password': stauth.Hasher(['password123']).generate()[0] # hashed password } } }, 'cookie': { 'expiry_days': 30, 'key': 'some_signature_key', 'name': 'some_cookie_name' }, 'preauthorized': { 'emails': [] } } authenticator = stauth.Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'] ) name, authentication_status, username = authenticator.login('Login', 'main') if not authentication_status: st.error('Authentication failed. Please refresh and try again.') st.stop() st.sidebar.write(f"Welcome *{name}*") authenticator.logout('Logout', 'sidebar') # ------------------------------ # Load Models # ------------------------------ @st.cache_resource def load_qwen(): return transformers.pipeline( "text2text-generation", model="Qwen/Qwen2.5-14B", device_map="auto" ) @st.cache_resource def load_phi(): return transformers.pipeline( "text-generation", model="microsoft/phi-4", model_kwargs={"torch_dtype": "auto"}, device_map="auto" ) qwen_pipeline = load_qwen() phi_pipeline = load_phi() # ------------------------------ # Utility Functions # ------------------------------ def summarize_document(document_text): prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}" summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text'] return summary def answer_question(summary, question): prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}" answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text'] return answer def find_similar_chunks(original, output): matcher = SequenceMatcher(None, original, output) segments = [] left = 0 for _, j, n in matcher.get_matching_blocks(): if left < j: segments.append({'text': output[left:j], 'match': False}) segments.append({'text': output[j:j+n], 'match': True}) left = j+n return segments # ------------------------------ # Streamlit App Layout # ------------------------------ st.title("SmartDoc Analyzer") st.markdown("Analyze Financial & Health Documents with AI") # Tabs for different functionalities tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"]) # -------- Document Summarization Tab -------- with tabs[0]: st.header("Document Summarization") document_text = st.text_area("Paste Document Text:", height=300) if st.button("Summarize Document"): if document_text: summary = summarize_document(document_text) st.subheader("Summary") st.write(summary) # Save summary in session for use in Q&A tab st.session_state['last_summary'] = summary else: st.warning("Please paste document text to summarize.") # -------- Interactive Q&A Tab -------- with tabs[1]: st.header("Interactive Q&A") default_summary = st.session_state.get('last_summary', '') summary_context = st.text_area("Summary Context:", value=default_summary, height=150) question = st.text_input("Enter your question about the document:") if st.button("Get Answer"): if summary_context and question: answer = answer_question(summary_context, question) st.subheader("Answer") st.write(answer) # For session saving, one could store Q&A pairs in st.session_state or database. else: st.warning("Please provide both a summary context and a question.") # -------- Visualization & Data Extraction Tab -------- with tabs[2]: st.header("Visualization & Data Extraction") st.subheader("Visualization Placeholder") st.markdown("An interactive chart can be displayed here using Altair or Plotly.") # Example static Altair chart (replace with dynamic data extraction logic) data = pd.DataFrame({ 'Year': [2019, 2020, 2021, 2022], 'Revenue': [150, 200, 250, 300] }) chart = alt.Chart(data).mark_line(point=True).encode( x='Year:O', y='Revenue:Q', tooltip=['Year', 'Revenue'] ).interactive() st.altair_chart(chart, use_container_width=True) st.subheader("Data Extraction Placeholder") st.markdown("Implement NLP techniques or model prompts to extract structured data here.") # File uploader example for future data extraction features uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"]) if uploaded_file is not None: st.info("File uploaded successfully. Data extraction logic would process this file.") # Add logic to extract tables, key figures, etc. from the uploaded file. # ------------------------------ # Safety & Compliance Layer (Placeholder) # ------------------------------ st.sidebar.markdown("### Safety & Compliance") st.sidebar.info( "This tool provides AI-driven insights. " "Please note that summaries and answers are for informational purposes only and should not be " "considered professional financial or medical advice." ) # ------------------------------ # End of Application # ------------------------------