Spaces:
Sleeping
Sleeping
File size: 8,048 Bytes
1b899cb f5128b8 1b899cb 366a700 f5128b8 1b899cb f5128b8 1b899cb 68e917e 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 1b899cb f5128b8 68e917e 1b899cb f5128b8 68e917e f5128b8 68e917e f5128b8 68e917e f5128b8 1b899cb f5128b8 1b899cb 68e917e 1b899cb 68e917e 1b899cb 68e917e 1b899cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import gradio as gr
from Bio import Entrez
from transformers import pipeline
import spacy
import os # For environment variables and file paths
# ---------------------------- Configuration ----------------------------
ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]") # Use environment variable, default fallback
HUGGINGFACE_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN", "HUGGINGFACE_API_TOKEN") # Use environment variable, default fallback
SUMMARIZATION_MODEL = "facebook/bart-large-cnn"
SPACY_MODEL = "en_core_web_sm"
# ---------------------------- Global Variables ----------------------------
summarizer = None
nlp = None
initialization_status = "Initializing..." # Track initialization state
# ---------------------------- Helper Functions ----------------------------
def log_error(message: str):
"""Logs an error message to the console and a file (if possible)."""
print(f"ERROR: {message}")
try:
with open("error_log.txt", "a") as f:
f.write(f"{message}\n")
except:
print("Couldn't write to error log file.") #If logging fails, still print to console
# ---------------------------- Language Model Loading ----------------------------
def load_spacy_model(model_name="en_core_web_sm"):
"""Loads the SpaCy language model, downloading it if necessary."""
global initialization_status # To update the initialization status
try:
print(f"Attempting to load SpaCy model '{model_name}'...")
nlp_model = spacy.load(model_name)
print(f"Successfully loaded SpaCy model '{model_name}'.")
initialization_status += f"\nSpaCy model '{model_name}' loaded."
return nlp_model
except OSError:
print(f"SpaCy model '{model_name}' not found. Downloading...")
initialization_status += f"\nSpaCy model '{model_name}' not found. Downloading..."
try:
import subprocess
subprocess.check_call(["python", "-m", "spacy", "download", model_name])
nlp_model = spacy.load(model_name)
print(f"Successfully loaded SpaCy model '{model_name}' after downloading.")
initialization_status += f"\nSuccessfully loaded SpaCy model '{model_name}' after downloading."
return nlp_model
except Exception as e:
log_error(f"Failed to download or load SpaCy model '{model_name}': {e}")
initialization_status += f"\nFailed to download or load SpaCy model '{model_name}': {e}"
return None # Indicate failure
except Exception as e:
log_error(f"Error loading SpaCy model '{model_name}': {e}")
initialization_status += f"\nError loading SpaCy model '{model_name}': {e}"
return None
# ---------------------------- Tool Functions ----------------------------
def search_pubmed(query: str) -> list:
"""Searches PubMed and returns a list of article IDs."""
try:
Entrez.email = ENTREZ_EMAIL
handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
record = Entrez.read(handle)
handle.close()
return record["IdList"]
except Exception as e:
log_error(f"PubMed search error: {e}")
return [f"Error during PubMed search: {e}"]
def fetch_abstract(article_id: str) -> str:
"""Fetches the abstract for a given PubMed article ID."""
try:
Entrez.email = ENTREZ_EMAIL
handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
abstract = handle.read()
handle.close()
return abstract
except Exception as e:
log_error(f"Error fetching abstract for {article_id}: {e}")
return f"Error fetching abstract for {article_id}: {e}"
def summarize_abstract(abstract: str) -> str:
"""Summarizes an abstract using a transformer model."""
global summarizer
if summarizer is None:
log_error("Summarizer not initialized.")
return "Summarizer not initialized. Check initialization status."
try:
summary = summarizer(abstract, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
return summary
except Exception as e:
log_error(f"Summarization error: {e}")
return f"Error during summarization: {e}"
def extract_entities(text: str) -> list:
"""Extracts entities (simplified) using SpaCy."""
global nlp
if nlp is None:
log_error("SpaCy model not initialized.")
return "SpaCy model not initialized. Check initialization status."
try:
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
return entities
except Exception as e:
log_error(f"Entity extraction error: {e}")
return [f"Error during entity extraction: {e}"]
# ---------------------------- Agent Function ----------------------------
def medai_agent(query: str) -> str:
"""Orchestrates the medical literature review and summarization."""
article_ids = search_pubmed(query)
if isinstance(article_ids, list) and article_ids:
results = []
for article_id in article_ids:
abstract = fetch_abstract(article_id)
if "Error" not in abstract:
summary = summarize_abstract(abstract)
entities = extract_entities(abstract)
results.append(f"**Article ID:** {article_id}\n\n**Summary:** {summary}\n\n**Entities:** {entities}\n\n---\n")
else:
results.append(f"Error processing article {article_id}: {abstract}\n\n---\n")
return "\n".join(results)
else:
return f"No articles found or error occurred: {article_ids}"
# ---------------------------- Initialization and Setup ----------------------------
def setup():
"""Initializes the summarization model and SpaCy model."""
global summarizer, nlp, initialization_status
initialization_status = "Initializing..."
try:
print("Initializing summarization pipeline...")
initialization_status += "\nInitializing summarization pipeline..."
summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL, token=HUGGINGFACE_API_TOKEN)
print("Summarization pipeline initialized.")
initialization_status += "\nSummarization pipeline initialized."
print("Loading SpaCy model...")
initialization_status += "\nLoading SpaCy model..."
global nlp
nlp = load_spacy_model() # Call the SpaCy loading function.
if nlp is None:
initialization_status += "\nSpaCy model failed to load. Check the error log."
return initialization_status
print("SpaCy model loaded.")
initialization_status += "\nSpaCy model loaded."
initialization_status = "MedAI Agent initialized successfully!"
return initialization_status # Return the status message
except Exception as e:
initialization_status = f"Initialization error: {e}"
log_error(initialization_status)
return initialization_status # Return the error message
# ---------------------------- Gradio Interface ----------------------------
def launch_gradio():
"""Launches the Gradio interface."""
global initialization_status # Allows the function to modify global variable
with gr.Blocks() as iface:
gr.Markdown("# MedAI: Medical Literature Review and Summarization")
status_display = gr.Textbox(value=initialization_status, interactive=False) # Displays initialization status
query_input = gr.Textbox(lines=3, placeholder="Enter your medical query (e.g., 'new treatments for diabetes')...")
submit_button = gr.Button("Submit")
output_results = gr.Markdown()
submit_button.click(medai_agent, inputs=query_input, outputs=output_results)
status_display.value = setup() # Set the status after running setup
iface.launch()
# ---------------------------- Main Execution ----------------------------
if __name__ == "__main__":
launch_gradio() |