Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import pdfplumber | |
from langchain.llms import HuggingFacePipeline | |
from langchain.prompts import PromptTemplate | |
# Function to extract text from a PDF | |
def extract_text_from_pdf(pdf_file): | |
with pdfplumber.open(pdf_file) as pdf: | |
text = '' | |
for page in pdf.pages: | |
text += page.extract_text() | |
return text | |
# Define the prompt template | |
template = """ | |
You are a medical summarization expert. Focus on the following key aspects when summarizing: | |
1. Patient History | |
2. Diagnosis | |
3. Treatment Recommendations | |
4. Follow-up Plans | |
Here’s the medical report to summarize: | |
{text} | |
""" | |
prompt = PromptTemplate( | |
input_variables=["text"], | |
template=template | |
) | |
# Streamlit application layout | |
st.title("Medical Report Summarizer") | |
# Option to upload PDF or enter text | |
option = st.selectbox("Choose Input Method", ["Upload PDF", "Enter Text"]) | |
if option == "Upload PDF": | |
uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"]) | |
if uploaded_file is not None: | |
# Extract text from the uploaded PDF | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
# Dynamic calculation for max_length based on extracted text length | |
length = max(2, int(len(extracted_text) // 2)) | |
# Load the summarization pipeline with updated max_length | |
summarizer = pipeline( | |
"summarization", | |
model="fine_tuned_model", # Ensure the path to your fine-tuned model is correct | |
temperature=0.3, | |
min_length=100, | |
max_length=int(length), | |
# top_k=80, # Uncomment if you want to use top_k | |
# top_p=0.95 # Uncomment if you want to use top_p | |
) | |
llm = HuggingFacePipeline(pipeline=summarizer) | |
# Create the formatted prompt | |
formatted_prompt = prompt.format(text=extracted_text) | |
# Generate the summary | |
summary = llm(formatted_prompt) | |
st.subheader("Summary:") | |
st.write(summary) | |
elif option == "Enter Text": | |
input_text = st.text_area("Enter the text to summarize", height=300) | |
if st.button("Summarize"): | |
if input_text: | |
# Dynamic calculation for max_length based on entered text length | |
length = max(2, int(len(input_text) // 2)) | |
# Load the summarization pipeline with updated max_length | |
summarizer = pipeline( | |
"summarization", | |
model="fine_tuned_model", | |
temperature=0.3, | |
min_length=100, | |
max_length=int(length), | |
top_k=80, | |
top_p=0.95 | |
) | |
llm = HuggingFacePipeline(pipeline=summarizer) | |
# Create the formatted prompt | |
formatted_prompt = prompt.format(text=input_text) | |
# Generate the summary | |
summary = llm(formatted_prompt) | |
st.subheader("Summary:") | |
st.write(summary) | |
else: | |
st.warning("Please enter some text to summarize.") |