File size: 8,048 Bytes
1b899cb
 
 
 
f5128b8
1b899cb
 
366a700
f5128b8
 
 
 
1b899cb
f5128b8
 
 
 
 
 
 
 
 
 
 
 
 
 
1b899cb
68e917e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b899cb
 
 
 
 
f5128b8
 
1b899cb
 
 
 
f5128b8
1b899cb
 
 
 
 
f5128b8
1b899cb
 
 
 
 
f5128b8
1b899cb
 
 
 
f5128b8
1b899cb
f5128b8
 
1b899cb
 
 
 
 
f5128b8
1b899cb
 
 
 
 
f5128b8
 
 
1b899cb
 
 
 
 
f5128b8
1b899cb
 
f5128b8
1b899cb
 
 
 
 
 
 
 
 
 
 
f5128b8
1b899cb
 
 
 
 
 
 
f5128b8
1b899cb
 
f5128b8
 
 
1b899cb
f5128b8
68e917e
1b899cb
f5128b8
68e917e
 
f5128b8
68e917e
 
 
 
 
 
 
f5128b8
68e917e
 
f5128b8
 
1b899cb
f5128b8
 
 
 
 
1b899cb
 
 
68e917e
1b899cb
 
68e917e
1b899cb
 
 
 
 
68e917e
1b899cb
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import gradio as gr
from Bio import Entrez
from transformers import pipeline
import spacy
import os  # For environment variables and file paths

# ---------------------------- Configuration ----------------------------
ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")  # Use environment variable, default fallback
HUGGINGFACE_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN", "HUGGINGFACE_API_TOKEN") # Use environment variable, default fallback
SUMMARIZATION_MODEL = "facebook/bart-large-cnn"
SPACY_MODEL = "en_core_web_sm"

# ---------------------------- Global Variables ----------------------------
summarizer = None
nlp = None
initialization_status = "Initializing..."  # Track initialization state

# ---------------------------- Helper Functions ----------------------------

def log_error(message: str):
    """Logs an error message to the console and a file (if possible)."""
    print(f"ERROR: {message}")
    try:
        with open("error_log.txt", "a") as f:
            f.write(f"{message}\n")
    except:
        print("Couldn't write to error log file.")  #If logging fails, still print to console

# ---------------------------- Language Model Loading ----------------------------

def load_spacy_model(model_name="en_core_web_sm"):
    """Loads the SpaCy language model, downloading it if necessary."""
    global initialization_status  # To update the initialization status

    try:
        print(f"Attempting to load SpaCy model '{model_name}'...")
        nlp_model = spacy.load(model_name)
        print(f"Successfully loaded SpaCy model '{model_name}'.")
        initialization_status += f"\nSpaCy model '{model_name}' loaded."
        return nlp_model
    except OSError:
        print(f"SpaCy model '{model_name}' not found. Downloading...")
        initialization_status += f"\nSpaCy model '{model_name}' not found. Downloading..."
        try:
            import subprocess
            subprocess.check_call(["python", "-m", "spacy", "download", model_name])
            nlp_model = spacy.load(model_name)
            print(f"Successfully loaded SpaCy model '{model_name}' after downloading.")
            initialization_status += f"\nSuccessfully loaded SpaCy model '{model_name}' after downloading."
            return nlp_model

        except Exception as e:
            log_error(f"Failed to download or load SpaCy model '{model_name}': {e}")
            initialization_status += f"\nFailed to download or load SpaCy model '{model_name}': {e}"
            return None  # Indicate failure

    except Exception as e:
        log_error(f"Error loading SpaCy model '{model_name}': {e}")
        initialization_status += f"\nError loading SpaCy model '{model_name}': {e}"
        return None

# ---------------------------- Tool Functions ----------------------------

def search_pubmed(query: str) -> list:
    """Searches PubMed and returns a list of article IDs."""
    try:
        Entrez.email = ENTREZ_EMAIL
        handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
        record = Entrez.read(handle)
        handle.close()
        return record["IdList"]
    except Exception as e:
        log_error(f"PubMed search error: {e}")
        return [f"Error during PubMed search: {e}"]

def fetch_abstract(article_id: str) -> str:
    """Fetches the abstract for a given PubMed article ID."""
    try:
        Entrez.email = ENTREZ_EMAIL
        handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
        abstract = handle.read()
        handle.close()
        return abstract
    except Exception as e:
        log_error(f"Error fetching abstract for {article_id}: {e}")
        return f"Error fetching abstract for {article_id}: {e}"

def summarize_abstract(abstract: str) -> str:
    """Summarizes an abstract using a transformer model."""
    global summarizer
    if summarizer is None:
        log_error("Summarizer not initialized.")
        return "Summarizer not initialized. Check initialization status."

    try:
        summary = summarizer(abstract, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
        return summary
    except Exception as e:
        log_error(f"Summarization error: {e}")
        return f"Error during summarization: {e}"

def extract_entities(text: str) -> list:
    """Extracts entities (simplified) using SpaCy."""
    global nlp
    if nlp is None:
        log_error("SpaCy model not initialized.")
        return "SpaCy model not initialized. Check initialization status."
    try:
        doc = nlp(text)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        return entities
    except Exception as e:
        log_error(f"Entity extraction error: {e}")
        return [f"Error during entity extraction: {e}"]

# ---------------------------- Agent Function ----------------------------

def medai_agent(query: str) -> str:
    """Orchestrates the medical literature review and summarization."""
    article_ids = search_pubmed(query)

    if isinstance(article_ids, list) and article_ids:
        results = []
        for article_id in article_ids:
            abstract = fetch_abstract(article_id)
            if "Error" not in abstract:
                summary = summarize_abstract(abstract)
                entities = extract_entities(abstract)
                results.append(f"**Article ID:** {article_id}\n\n**Summary:** {summary}\n\n**Entities:** {entities}\n\n---\n")
            else:
                results.append(f"Error processing article {article_id}: {abstract}\n\n---\n")
        return "\n".join(results)
    else:
        return f"No articles found or error occurred: {article_ids}"

# ---------------------------- Initialization and Setup ----------------------------

def setup():
    """Initializes the summarization model and SpaCy model."""
    global summarizer, nlp, initialization_status
    initialization_status = "Initializing..."
    try:
        print("Initializing summarization pipeline...")
        initialization_status += "\nInitializing summarization pipeline..."
        summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL, token=HUGGINGFACE_API_TOKEN)
        print("Summarization pipeline initialized.")
        initialization_status += "\nSummarization pipeline initialized."

        print("Loading SpaCy model...")
        initialization_status += "\nLoading SpaCy model..."
        global nlp
        nlp = load_spacy_model()  # Call the SpaCy loading function.
        if nlp is None:
            initialization_status += "\nSpaCy model failed to load. Check the error log."
            return initialization_status

        print("SpaCy model loaded.")
        initialization_status += "\nSpaCy model loaded."

        initialization_status = "MedAI Agent initialized successfully!"
        return initialization_status  # Return the status message
    except Exception as e:
        initialization_status = f"Initialization error: {e}"
        log_error(initialization_status)
        return initialization_status  # Return the error message

# ---------------------------- Gradio Interface ----------------------------

def launch_gradio():
    """Launches the Gradio interface."""
    global initialization_status  # Allows the function to modify global variable
    with gr.Blocks() as iface:
        gr.Markdown("# MedAI: Medical Literature Review and Summarization")
        status_display = gr.Textbox(value=initialization_status, interactive=False)  # Displays initialization status
        query_input = gr.Textbox(lines=3, placeholder="Enter your medical query (e.g., 'new treatments for diabetes')...")
        submit_button = gr.Button("Submit")
        output_results = gr.Markdown()

        submit_button.click(medai_agent, inputs=query_input, outputs=output_results)
        status_display.value = setup()  # Set the status after running setup

    iface.launch()

# ---------------------------- Main Execution ----------------------------

if __name__ == "__main__":
    launch_gradio()