import streamlit as st from transformers import pipeline import pdfplumber from langchain.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate # Function to extract text from a PDF def extract_text_from_pdf(pdf_file): with pdfplumber.open(pdf_file) as pdf: text = '' for page in pdf.pages: text += page.extract_text() return text # Define the prompt template template = """ You are a medical summarization expert. Focus on the following key aspects when summarizing: 1. Patient History 2. Diagnosis 3. Treatment Recommendations 4. Follow-up Plans Here’s the medical report to summarize: {text} """ prompt = PromptTemplate( input_variables=["text"], template=template ) # Streamlit application layout st.title("Medical Report Summarizer") # Option to upload PDF or enter text option = st.selectbox("Choose Input Method", ["Upload PDF", "Enter Text"]) if option == "Upload PDF": uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"]) if uploaded_file is not None: # Extract text from the uploaded PDF extracted_text = extract_text_from_pdf(uploaded_file) # Dynamic calculation for max_length based on extracted text length length = max(2, int(len(extracted_text) // 2)) # Load the summarization pipeline with updated max_length summarizer = pipeline( "summarization", model="fine_tuned_model", # Ensure the path to your fine-tuned model is correct temperature=0.3, min_length=100, max_length=int(length), # top_k=80, # Uncomment if you want to use top_k # top_p=0.95 # Uncomment if you want to use top_p ) llm = HuggingFacePipeline(pipeline=summarizer) # Create the formatted prompt formatted_prompt = prompt.format(text=extracted_text) # Generate the summary summary = llm(formatted_prompt) st.subheader("Summary:") st.write(summary) elif option == "Enter Text": input_text = st.text_area("Enter the text to summarize", height=300) if st.button("Summarize"): if input_text: # Dynamic calculation for max_length based on entered text length length = max(2, int(len(input_text) // 2)) # Load the summarization pipeline with updated max_length summarizer = pipeline( "summarization", model="fine_tuned_model", temperature=0.3, min_length=100, max_length=int(length), top_k=80, top_p=0.95 ) llm = HuggingFacePipeline(pipeline=summarizer) # Create the formatted prompt formatted_prompt = prompt.format(text=input_text) # Generate the summary summary = llm(formatted_prompt) st.subheader("Summary:") st.write(summary) else: st.warning("Please enter some text to summarize.")