Spaces:

CCockrum
/

Oncological-Literature-Mining-Agent

Sleeping

App Files Files Community

Oncological-Literature-Mining-Agent / app.py

CCockrum

Update app.py

63cc071 verified about 2 months ago

raw

history blame contribute delete

26.7 kB

	import gradio as gr
	import requests
	import json
	import pandas as pd
	from datetime import datetime, timedelta
	import re
	from typing import List, Dict, Tuple
	import xml.etree.ElementTree as ET
	from collections import Counter
	import plotly.express as px
	import plotly.graph_objects as go
	from transformers import pipeline
	import numpy as np

	class VeterinaryLiteratureMiner:
	def __init__(self):
	# Initialize NLP pipelines
	try:
	self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	except Exception as e:
	print(f"Warning: Could not load transformers models: {e}")
	self.summarizer = None
	self.classifier = None

	# Veterinary research categories for classification
	self.veterinary_categories = [
	"oncology", "cardiology", "dermatology", "neurology", "orthopedics",
	"infectious diseases", "parasitology", "pharmacology", "toxicology",
	"surgery", "anesthesia", "emergency medicine", "internal medicine",
	"pathology", "radiology", "nutrition", "behavior", "reproduction",
	"public health", "zoonoses", "immunology", "genetics", "epidemiology"
	]

	# Animal species categories
	self.animal_species = [
	"canine", "dog", "dogs", "feline", "cat", "cats", "equine", "horse", "horses",
	"bovine", "cattle", "cow", "cows", "porcine", "pig", "pigs", "swine",
	"ovine", "sheep", "caprine", "goat", "goats", "avian", "bird", "birds",
	"poultry", "chicken", "chickens", "rabbit", "rabbits", "ferret", "ferrets",
	"reptile", "reptiles", "fish", "aquatic", "wildlife", "zoo", "exotic",
	"laboratory animals", "mouse", "mice", "rat", "rats"
	]

	# Veterinary specialties and procedures
	self.vet_procedures = [
	"vaccination", "spay", "neuter", "castration", "ovariohysterectomy",
	"amputation", "biopsy", "endoscopy", "laparoscopy", "arthroscopy",
	"radiography", "ultrasound", "CT", "MRI", "chemotherapy", "radiation",
	"physical therapy", "rehabilitation", "dental", "ophthalmology"
	]

	# Common veterinary conditions
	self.vet_conditions = [
	"diabetes", "epilepsy", "heart disease", "kidney disease", "liver disease",
	"arthritis", "hip dysplasia", "allergies", "skin disease", "cancer",
	"tumor", "infection", "parasite", "heartworm", "flea", "tick",
	"obesity", "dental disease", "cataracts", "glaucoma", "IBD"
	]

	def search_veterinary_literature(self, query: str, max_results: int = 50, database: str = "pubmed") -> List[Dict]:
	"""Search veterinary literature across multiple databases"""

	if database == "pubmed":
	return self._search_pubmed(query, max_results)
	else:
	# Future: Could add other veterinary databases here
	return self._search_pubmed(query, max_results)

	def _search_pubmed(self, query: str, max_results: int) -> List[Dict]:
	"""Search PubMed for veterinary papers"""
	try:
	print(f"Searching PubMed with query: {query}") # Debug print

	# Enhance query with veterinary terms
	enhanced_query = f"({query}) AND (veterinary OR animal OR pet OR livestock OR zoo OR wildlife)"
	print(f"Enhanced query: {enhanced_query}") # Debug print

	# Search PubMed
	search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
	search_params = {
	"db": "pubmed",
	"term": enhanced_query,
	"retmax": max_results,
	"retmode": "json",
	"sort": "relevance"
	}

	print("Making search request...") # Debug print
	search_response = requests.get(search_url, params=search_params, timeout=30)
	print(f"Search response status: {search_response.status_code}") # Debug print

	if search_response.status_code != 200:
	return [{"error": f"PubMed search failed with status {search_response.status_code}"}]

	search_data = search_response.json()
	print(f"Search data received: {search_data.get('esearchresult', {}).get('count', 0)} results") # Debug print

	if "esearchresult" not in search_data:
	return [{"error": "Invalid response from PubMed"}]

	if not search_data["esearchresult"].get("idlist"):
	return [{"error": "No papers found matching your query"}]

	# Get detailed information
	ids = search_data["esearchresult"]["idlist"]
	print(f"Fetching details for {len(ids)} papers...") # Debug print

	fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
	fetch_params = {
	"db": "pubmed",
	"id": ",".join(ids),
	"retmode": "xml"
	}

	fetch_response = requests.get(fetch_url, params=fetch_params, timeout=60)
	print(f"Fetch response status: {fetch_response.status_code}") # Debug print

	if fetch_response.status_code != 200:
	return [{"error": f"Failed to fetch paper details: {fetch_response.status_code}"}]

	# Parse XML response
	papers = self._parse_pubmed_xml(fetch_response.text)
	print(f"Parsed {len(papers)} papers successfully") # Debug print

	return papers

	except requests.exceptions.Timeout:
	return [{"error": "Request timed out. Please try again with fewer results."}]
	except requests.exceptions.ConnectionError:
	return [{"error": "Connection error. Please check your internet connection."}]
	except Exception as e:
	print(f"Error in _search_pubmed: {str(e)}") # Debug print
	return [{"error": f"Search failed: {str(e)}"}]

	def _parse_pubmed_xml(self, xml_content: str) -> List[Dict]:
	"""Parse PubMed XML response"""
	papers = []
	try:
	root = ET.fromstring(xml_content)

	for article in root.findall(".//PubmedArticle"):
	paper = {}

	# Extract basic info
	medline = article.find(".//MedlineCitation")
	if medline is not None:
	pmid = medline.find(".//PMID")
	paper["pmid"] = pmid.text if pmid is not None else "N/A"

	# Extract title
	title = article.find(".//ArticleTitle")
	paper["title"] = title.text if title is not None else "N/A"

	# Extract abstract
	abstract_elem = article.find(".//Abstract/AbstractText")
	paper["abstract"] = abstract_elem.text if abstract_elem is not None else "N/A"

	# Extract authors
	authors = []
	for author in article.findall(".//Author"):
	fname = author.find(".//ForeName")
	lname = author.find(".//LastName")
	if fname is not None and lname is not None:
	authors.append(f"{fname.text} {lname.text}")
	paper["authors"] = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "")

	# Extract journal and date
	journal = article.find(".//Journal/Title")
	paper["journal"] = journal.text if journal is not None else "N/A"

	pub_date = article.find(".//PubDate/Year")
	paper["year"] = pub_date.text if pub_date is not None else "N/A"

	papers.append(paper)

	except Exception as e:
	return [{"error": f"XML parsing failed: {str(e)}"}]

	return papers

	def analyze_veterinary_papers(self, papers: List[Dict]) -> Dict:
	"""Analyze the retrieved veterinary papers for insights"""
	if not papers or papers[0].get("error"):
	return {"error": "No papers to analyze"}

	analysis = {
	"total_papers": len(papers),
	"year_distribution": {},
	"animal_species": {},
	"veterinary_specialties": {},
	"common_conditions": {},
	"procedures_mentioned": {},
	"drug_mentions": [],
	"journal_distribution": {},
	"research_trends": {}
	}

	# Keywords for different categories
	specialty_keywords = {
	"oncology": ["cancer", "tumor", "oncology", "chemotherapy", "radiation"],
	"cardiology": ["heart", "cardiac", "cardiology", "arrhythmia", "murmur"],
	"dermatology": ["skin", "dermatology", "allergy", "dermatitis", "eczema"],
	"neurology": ["brain", "neurological", "seizure", "epilepsy", "paralysis"],
	"orthopedics": ["bone", "joint", "fracture", "orthopedic", "lameness"],
	"infectious_diseases": ["infection", "bacteria", "virus", "antibiotic", "pathogen"],
	"surgery": ["surgical", "surgery", "operative", "laparoscopy", "endoscopy"],
	"internal_medicine": ["diabetes", "kidney", "liver", "endocrine", "metabolic"]
	}

	# Analyze each paper
	for paper in papers:
	# Year distribution
	year = paper.get("year", "Unknown")
	analysis["year_distribution"][year] = analysis["year_distribution"].get(year, 0) + 1

	# Journal distribution
	journal = paper.get("journal", "Unknown")
	analysis["journal_distribution"][journal] = analysis["journal_distribution"].get(journal, 0) + 1

	# Analyze abstract and title
	abstract = paper.get("abstract", "").lower()
	title = paper.get("title", "").lower()
	full_text = f"{title} {abstract}"

	# Animal species detection
	for species in self.animal_species:
	if species in full_text:
	species_key = species.replace(" ", "_")
	analysis["animal_species"][species_key] = analysis["animal_species"].get(species_key, 0) + 1

	# Veterinary specialty detection
	for specialty, keywords in specialty_keywords.items():
	for keyword in keywords:
	if keyword in full_text:
	analysis["veterinary_specialties"][specialty] = analysis["veterinary_specialties"].get(specialty, 0) + 1
	break

	# Common conditions detection
	for condition in self.vet_conditions:
	if condition in full_text:
	analysis["common_conditions"][condition] = analysis["common_conditions"].get(condition, 0) + 1

	# Procedures detection
	for procedure in self.vet_procedures:
	if procedure in full_text:
	analysis["procedures_mentioned"][procedure] = analysis["procedures_mentioned"].get(procedure, 0) + 1

	# Extract drug mentions (veterinary drugs and general pharmaceuticals)
	drugs = re.findall(r'\b[A-Z][a-z]*(?:mab\|nib\|ine\|ole\|cin\|tin\|zole\|pril\|sartan)\b', paper.get("abstract", ""))
	# Add common veterinary drugs
	vet_drugs = ["prednisolone", "dexamethasone", "amoxicillin", "cephalexin", "enrofloxacin",
	"tramadol", "gabapentin", "furosemide", "enalapril", "pimobendan"]
	for drug in vet_drugs:
	if drug in full_text:
	drugs.append(drug.title())

	analysis["drug_mentions"].extend(drugs)

	# Classify research category if classifier is available
	if self.classifier and abstract != "n/a":
	try:
	result = self.classifier(abstract[:512], self.veterinary_categories)
	top_category = result["labels"][0]
	analysis["research_trends"][top_category] = analysis["research_trends"].get(top_category, 0) + 1
	except Exception:
	pass

	# Process drug mentions
	drug_counter = Counter(analysis["drug_mentions"])
	analysis["drug_mentions"] = dict(drug_counter.most_common(15))

	return analysis

	def generate_veterinary_summary(self, papers: List[Dict], analysis: Dict) -> str:
	"""Generate a comprehensive summary of veterinary literature findings"""
	if not papers or papers[0].get("error"):
	return "No papers found or error in retrieval."

	summary = f"""
	# Veterinary Literature Mining Summary

	## Overview
	- Total Papers Found: {analysis['total_papers']}
	- Search Date: {datetime.now().strftime('%Y-%m-%d')}

	## Key Insights

	### Most Studied Animal Species
	"""

	# Top animal species
	if analysis["animal_species"]:
	top_species = sorted(analysis["animal_species"].items(), key=lambda x: x[1], reverse=True)[:8]
	for species, count in top_species:
	formatted_species = species.replace("_", " ").title()
	summary += f"- {formatted_species}: {count} papers\n"

	summary += "\n### Veterinary Specialties Focus\n"

	# Veterinary specialties
	if analysis["veterinary_specialties"]:
	top_specialties = sorted(analysis["veterinary_specialties"].items(), key=lambda x: x[1], reverse=True)[:6]
	for specialty, count in top_specialties:
	formatted_specialty = specialty.replace("_", " ").title()
	summary += f"- {formatted_specialty}: {count} papers\n"

	summary += "\n### Common Conditions Studied\n"

	# Common conditions
	if analysis["common_conditions"]:
	top_conditions = sorted(analysis["common_conditions"].items(), key=lambda x: x[1], reverse=True)[:8]
	for condition, count in top_conditions:
	summary += f"- {condition.title()}: {count} papers\n"

	summary += "\n### Frequently Mentioned Treatments/Drugs\n"

	# Drug mentions
	if analysis["drug_mentions"]:
	for drug, count in list(analysis["drug_mentions"].items())[:8]:
	summary += f"- {drug}: {count} mentions\n"

	summary += "\n### Top Veterinary Journals\n"

	# Journal distribution
	if analysis["journal_distribution"]:
	top_journals = sorted(analysis["journal_distribution"].items(), key=lambda x: x[1], reverse=True)[:5]
	for journal, count in top_journals:
	summary += f"- {journal}: {count} papers\n"

	summary += "\n### Recent Research Highlights\n"

	# Recent papers (last 3 years)
	current_year = datetime.now().year
	recent_papers = [p for p in papers if p.get("year", "").isdigit() and int(p["year"]) >= current_year - 3]

	for paper in recent_papers[:4]:
	summary += f"- {paper.get('title', 'N/A')} ({paper.get('year', 'N/A')})\n"
	summary += f" {paper.get('journal', 'N/A')}\n\n"

	return summary

	def create_veterinary_visualizations(self, analysis: Dict):
	"""Create visualization plots for veterinary data"""
	plots = {}

	# Year distribution
	if analysis["year_distribution"]:
	years = [y for y in analysis["year_distribution"].keys() if y.isdigit()]
	counts = [analysis["year_distribution"][y] for y in years]

	fig_year = px.line(
	x=years, y=counts,
	title="Veterinary Research Publications Over Time",
	labels={"x": "Year", "y": "Number of Papers"},
	markers=True
	)
	fig_year.update_layout(showlegend=False)
	plots["year_dist"] = fig_year

	# Animal species
	if analysis["animal_species"]:
	species = list(analysis["animal_species"].keys())[:12]
	species_counts = [analysis["animal_species"][s] for s in species]
	formatted_species = [s.replace("_", " ").title() for s in species]

	fig_species = px.bar(
	x=species_counts, y=formatted_species,
	orientation='h',
	title="Most Studied Animal Species",
	labels={"x": "Number of Papers", "y": "Species"}
	)
	plots["animal_species"] = fig_species

	# Veterinary specialties
	if analysis["veterinary_specialties"]:
	specialties = list(analysis["veterinary_specialties"].keys())
	spec_counts = list(analysis["veterinary_specialties"].values())
	formatted_specialties = [s.replace("_", " ").title() for s in specialties]

	fig_specialties = px.pie(
	values=spec_counts, names=formatted_specialties,
	title="Veterinary Specialty Distribution"
	)
	plots["specialties"] = fig_specialties

	# Common conditions
	if analysis["common_conditions"]:
	conditions = list(analysis["common_conditions"].keys())[:10]
	condition_counts = [analysis["common_conditions"][c] for c in conditions]

	fig_conditions = px.bar(
	x=[c.title() for c in conditions], y=condition_counts,
	title="Most Commonly Studied Conditions",
	labels={"x": "Condition", "y": "Number of Papers"}
	)
	fig_conditions.update_xaxes(tickangle=45)
	plots["conditions"] = fig_conditions

	return plots

	def create_veterinary_gradio_interface():
	"""Create the Gradio interface for veterinary literature mining"""
	miner = VeterinaryLiteratureMiner()

	def search_and_analyze_vet(query, max_results, database):
	"""Main function to search and analyze veterinary literature"""
	try:
	print(f"Starting search with query: {query}") # Debug print

	if not query.strip():
	return "Please enter a search query.", None, None, None, None, None

	# Search papers
	print("Searching papers...") # Debug print
	papers = miner.search_veterinary_literature(query, max_results, database)
	print(f"Found {len(papers) if papers else 0} papers") # Debug print

	if not papers:
	return "No papers found. Try a different search query.", None, None, None, None, None

	if papers[0].get("error"):
	error_msg = papers[0].get("error", "Unknown error occurred")
	return f"Search Error: {error_msg}", None, None, None, None, None

	# Analyze papers
	print("Analyzing papers...") # Debug print
	analysis = miner.analyze_veterinary_papers(papers)

	if analysis.get("error"):
	return f"Analysis Error: {analysis['error']}", None, None, None, None, None

	# Generate summary
	print("Generating summary...") # Debug print
	summary = miner.generate_veterinary_summary(papers, analysis)

	# Create visualizations
	print("Creating visualizations...") # Debug print
	plots = miner.create_veterinary_visualizations(analysis)

	# Create papers dataframe
	print("Creating dataframe...") # Debug print
	papers_df = pd.DataFrame([
	{
	"PMID": p.get("pmid", "N/A"),
	"Title": p.get("title", "N/A")[:100] + "..." if len(p.get("title", "")) > 100 else p.get("title", "N/A"),
	"Authors": p.get("authors", "N/A"),
	"Journal": p.get("journal", "N/A"),
	"Year": p.get("year", "N/A")
	}
	for p in papers
	])

	print("Search and analysis complete!") # Debug print
	return (
	summary,
	papers_df,
	plots.get("year_dist"),
	plots.get("animal_species"),
	plots.get("specialties"),
	plots.get("conditions")
	)

	except Exception as e:
	error_message = f"Unexpected error: {str(e)}"
	print(f"Error in search_and_analyze_vet: {error_message}") # Debug print
	return error_message, None, None, None, None, None

	# Create interface
	with gr.Blocks(title="Veterinary Literature Mining Agent", theme=gr.themes.Soft()) as interface:
	gr.Markdown("""
	# 🐾 Veterinary Literature Mining Agent

	This AI agent searches and analyzes veterinary and animal health literature across all specialties.
	It automatically extracts insights about animal species, veterinary specialties, common conditions, and treatment trends.

	Features:
	- Comprehensive veterinary literature search
	- Multi-species analysis (companion animals, livestock, wildlife, exotics)
	- Veterinary specialty categorization
	- Treatment and drug trend analysis
	- Interactive visualizations
	- Journal and publication pattern analysis
	""")

	with gr.Row():
	with gr.Column(scale=2):
	query_input = gr.Textbox(
	label="Research Query",
	placeholder="e.g., 'canine diabetes management', 'equine lameness diagnosis', 'feline kidney disease', 'wildlife conservation medicine'",
	lines=2
	)
	with gr.Row():
	max_results = gr.Slider(
	minimum=10, maximum=100, value=50, step=10,
	label="Maximum Results"
	)
	database_choice = gr.Dropdown(
	choices=["pubmed"],
	value="pubmed",
	label="Database"
	)
	search_btn = gr.Button("🔍 Search Veterinary Literature", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("""
	### Search Tips:
	- Species: dog, cat, horse, cattle, pig, bird, fish, reptile, wildlife
	- Specialties: cardiology, oncology, surgery, dermatology, neurology
	- Conditions: diabetes, arthritis, cancer, infection, allergies
	- Procedures: vaccination, surgery, imaging, therapy
	- Combine terms: "feline diabetes insulin therapy"
	""")

	with gr.Tabs():
	with gr.TabItem("📊 Analysis Summary"):
	summary_output = gr.Markdown(label="Veterinary Literature Analysis")

	with gr.TabItem("📋 Research Papers"):
	papers_output = gr.Dataframe(
	headers=["PMID", "Title", "Authors", "Journal", "Year"],
	label="Retrieved Veterinary Papers"
	)

	with gr.TabItem("📈 Research Trends"):
	with gr.Row():
	year_plot = gr.Plot(label="Publication Timeline")
	species_plot = gr.Plot(label="Animal Species")
	with gr.Row():
	specialties_plot = gr.Plot(label="Veterinary Specialties")
	conditions_plot = gr.Plot(label="Common Conditions")

	# Connect the search function with progress indicator
	search_btn.click(
	fn=search_and_analyze_vet,
	inputs=[query_input, max_results, database_choice],
	outputs=[summary_output, papers_output, year_plot, species_plot, specialties_plot, conditions_plot],
	show_progress=True
	)

	# Add examples
	gr.Examples(
	examples=[
	["canine diabetes insulin therapy", 40, "pubmed"],
	["equine lameness diagnosis imaging", 35, "pubmed"],
	["feline chronic kidney disease treatment", 45, "pubmed"],
	["bovine mastitis antibiotic resistance", 30, "pubmed"],
	["avian influenza surveillance wild birds", 35, "pubmed"],
	["exotic animal anesthesia protocols", 25, "pubmed"],
	["wildlife conservation medicine", 40, "pubmed"],
	["small animal oncology chemotherapy", 50, "pubmed"]
	],
	inputs=[query_input, max_results, database_choice]
	)

	gr.Markdown("""
	### About This Veterinary Literature Mining Agent

	This comprehensive tool is designed for veterinary professionals, researchers, and students to efficiently
	search and analyze veterinary literature across all animal species and medical specialties.

	Supported Areas:
	- Companion Animals: Dogs, cats, rabbits, ferrets, birds, reptiles, fish
	- Large Animals: Horses, cattle, pigs, sheep, goats
	- Wildlife & Zoo Medicine: All wild species and conservation medicine
	- Laboratory Animals: Research and laboratory animal medicine
	- All Veterinary Specialties: Internal medicine, surgery, oncology, cardiology, dermatology, etc.

	Data Sources: PubMed/NCBI databases with veterinary focus
	Last Updated: June 2025
	Coverage: All aspects of veterinary medicine and animal health
	""")

	return interface

	# Create and launch the interface
	if __name__ == "__main__":
	interface = create_veterinary_gradio_interface()
	interface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)