File size: 3,169 Bytes
ce4f606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
from transformers import pipeline
import pdfplumber
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
    with pdfplumber.open(pdf_file) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Define the prompt template
template = """
You are a medical summarization expert. Focus on the following key aspects when summarizing:

1. Patient History
2. Diagnosis
3. Treatment Recommendations
4. Follow-up Plans

Here’s the medical report to summarize:

{text}
"""

prompt = PromptTemplate(
    input_variables=["text"],
    template=template
)

# Streamlit application layout
st.title("Medical Report Summarizer")

# Option to upload PDF or enter text
option = st.selectbox("Choose Input Method", ["Upload PDF", "Enter Text"])

if option == "Upload PDF":
    uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"])
    if uploaded_file is not None:
        # Extract text from the uploaded PDF
        extracted_text = extract_text_from_pdf(uploaded_file)
        
        # Dynamic calculation for max_length based on extracted text length
        length = max(2, int(len(extracted_text) // 2))
        
        # Load the summarization pipeline with updated max_length
        summarizer = pipeline(
            "summarization",
            model="fine_tuned_model",  # Ensure the path to your fine-tuned model is correct
            temperature=0.3,
            min_length=100,
            max_length=int(length),
            # top_k=80,  # Uncomment if you want to use top_k
            # top_p=0.95  # Uncomment if you want to use top_p
        )

        llm = HuggingFacePipeline(pipeline=summarizer)
        
        # Create the formatted prompt
        formatted_prompt = prompt.format(text=extracted_text)
        
        # Generate the summary
        summary = llm(formatted_prompt)
        
        st.subheader("Summary:")
        st.write(summary)

elif option == "Enter Text":
    input_text = st.text_area("Enter the text to summarize", height=300)
    
    if st.button("Summarize"):
        if input_text:
            # Dynamic calculation for max_length based on entered text length
            length = max(2, int(len(input_text) // 2))
            
            # Load the summarization pipeline with updated max_length
            summarizer = pipeline(
                "summarization",
                model="fine_tuned_model",
                temperature=0.3,
                min_length=100,
                max_length=int(length),
                top_k=80,  
                top_p=0.95 
            )

            llm = HuggingFacePipeline(pipeline=summarizer)
            
            # Create the formatted prompt
            formatted_prompt = prompt.format(text=input_text)
            
            # Generate the summary
            summary = llm(formatted_prompt)
            
            st.subheader("Summary:")
            st.write(summary)
        else:
            st.warning("Please enter some text to summarize.")