CCockrum's picture
Update app.py
63cc071 verified
import gradio as gr
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
import re
from typing import List, Dict, Tuple
import xml.etree.ElementTree as ET
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go
from transformers import pipeline
import numpy as np
class VeterinaryLiteratureMiner:
def __init__(self):
# Initialize NLP pipelines
try:
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
except Exception as e:
print(f"Warning: Could not load transformers models: {e}")
self.summarizer = None
self.classifier = None
# Veterinary research categories for classification
self.veterinary_categories = [
"oncology", "cardiology", "dermatology", "neurology", "orthopedics",
"infectious diseases", "parasitology", "pharmacology", "toxicology",
"surgery", "anesthesia", "emergency medicine", "internal medicine",
"pathology", "radiology", "nutrition", "behavior", "reproduction",
"public health", "zoonoses", "immunology", "genetics", "epidemiology"
]
# Animal species categories
self.animal_species = [
"canine", "dog", "dogs", "feline", "cat", "cats", "equine", "horse", "horses",
"bovine", "cattle", "cow", "cows", "porcine", "pig", "pigs", "swine",
"ovine", "sheep", "caprine", "goat", "goats", "avian", "bird", "birds",
"poultry", "chicken", "chickens", "rabbit", "rabbits", "ferret", "ferrets",
"reptile", "reptiles", "fish", "aquatic", "wildlife", "zoo", "exotic",
"laboratory animals", "mouse", "mice", "rat", "rats"
]
# Veterinary specialties and procedures
self.vet_procedures = [
"vaccination", "spay", "neuter", "castration", "ovariohysterectomy",
"amputation", "biopsy", "endoscopy", "laparoscopy", "arthroscopy",
"radiography", "ultrasound", "CT", "MRI", "chemotherapy", "radiation",
"physical therapy", "rehabilitation", "dental", "ophthalmology"
]
# Common veterinary conditions
self.vet_conditions = [
"diabetes", "epilepsy", "heart disease", "kidney disease", "liver disease",
"arthritis", "hip dysplasia", "allergies", "skin disease", "cancer",
"tumor", "infection", "parasite", "heartworm", "flea", "tick",
"obesity", "dental disease", "cataracts", "glaucoma", "IBD"
]
def search_veterinary_literature(self, query: str, max_results: int = 50, database: str = "pubmed") -> List[Dict]:
"""Search veterinary literature across multiple databases"""
if database == "pubmed":
return self._search_pubmed(query, max_results)
else:
# Future: Could add other veterinary databases here
return self._search_pubmed(query, max_results)
def _search_pubmed(self, query: str, max_results: int) -> List[Dict]:
"""Search PubMed for veterinary papers"""
try:
print(f"Searching PubMed with query: {query}") # Debug print
# Enhance query with veterinary terms
enhanced_query = f"({query}) AND (veterinary OR animal OR pet OR livestock OR zoo OR wildlife)"
print(f"Enhanced query: {enhanced_query}") # Debug print
# Search PubMed
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
search_params = {
"db": "pubmed",
"term": enhanced_query,
"retmax": max_results,
"retmode": "json",
"sort": "relevance"
}
print("Making search request...") # Debug print
search_response = requests.get(search_url, params=search_params, timeout=30)
print(f"Search response status: {search_response.status_code}") # Debug print
if search_response.status_code != 200:
return [{"error": f"PubMed search failed with status {search_response.status_code}"}]
search_data = search_response.json()
print(f"Search data received: {search_data.get('esearchresult', {}).get('count', 0)} results") # Debug print
if "esearchresult" not in search_data:
return [{"error": "Invalid response from PubMed"}]
if not search_data["esearchresult"].get("idlist"):
return [{"error": "No papers found matching your query"}]
# Get detailed information
ids = search_data["esearchresult"]["idlist"]
print(f"Fetching details for {len(ids)} papers...") # Debug print
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
fetch_params = {
"db": "pubmed",
"id": ",".join(ids),
"retmode": "xml"
}
fetch_response = requests.get(fetch_url, params=fetch_params, timeout=60)
print(f"Fetch response status: {fetch_response.status_code}") # Debug print
if fetch_response.status_code != 200:
return [{"error": f"Failed to fetch paper details: {fetch_response.status_code}"}]
# Parse XML response
papers = self._parse_pubmed_xml(fetch_response.text)
print(f"Parsed {len(papers)} papers successfully") # Debug print
return papers
except requests.exceptions.Timeout:
return [{"error": "Request timed out. Please try again with fewer results."}]
except requests.exceptions.ConnectionError:
return [{"error": "Connection error. Please check your internet connection."}]
except Exception as e:
print(f"Error in _search_pubmed: {str(e)}") # Debug print
return [{"error": f"Search failed: {str(e)}"}]
def _parse_pubmed_xml(self, xml_content: str) -> List[Dict]:
"""Parse PubMed XML response"""
papers = []
try:
root = ET.fromstring(xml_content)
for article in root.findall(".//PubmedArticle"):
paper = {}
# Extract basic info
medline = article.find(".//MedlineCitation")
if medline is not None:
pmid = medline.find(".//PMID")
paper["pmid"] = pmid.text if pmid is not None else "N/A"
# Extract title
title = article.find(".//ArticleTitle")
paper["title"] = title.text if title is not None else "N/A"
# Extract abstract
abstract_elem = article.find(".//Abstract/AbstractText")
paper["abstract"] = abstract_elem.text if abstract_elem is not None else "N/A"
# Extract authors
authors = []
for author in article.findall(".//Author"):
fname = author.find(".//ForeName")
lname = author.find(".//LastName")
if fname is not None and lname is not None:
authors.append(f"{fname.text} {lname.text}")
paper["authors"] = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "")
# Extract journal and date
journal = article.find(".//Journal/Title")
paper["journal"] = journal.text if journal is not None else "N/A"
pub_date = article.find(".//PubDate/Year")
paper["year"] = pub_date.text if pub_date is not None else "N/A"
papers.append(paper)
except Exception as e:
return [{"error": f"XML parsing failed: {str(e)}"}]
return papers
def analyze_veterinary_papers(self, papers: List[Dict]) -> Dict:
"""Analyze the retrieved veterinary papers for insights"""
if not papers or papers[0].get("error"):
return {"error": "No papers to analyze"}
analysis = {
"total_papers": len(papers),
"year_distribution": {},
"animal_species": {},
"veterinary_specialties": {},
"common_conditions": {},
"procedures_mentioned": {},
"drug_mentions": [],
"journal_distribution": {},
"research_trends": {}
}
# Keywords for different categories
specialty_keywords = {
"oncology": ["cancer", "tumor", "oncology", "chemotherapy", "radiation"],
"cardiology": ["heart", "cardiac", "cardiology", "arrhythmia", "murmur"],
"dermatology": ["skin", "dermatology", "allergy", "dermatitis", "eczema"],
"neurology": ["brain", "neurological", "seizure", "epilepsy", "paralysis"],
"orthopedics": ["bone", "joint", "fracture", "orthopedic", "lameness"],
"infectious_diseases": ["infection", "bacteria", "virus", "antibiotic", "pathogen"],
"surgery": ["surgical", "surgery", "operative", "laparoscopy", "endoscopy"],
"internal_medicine": ["diabetes", "kidney", "liver", "endocrine", "metabolic"]
}
# Analyze each paper
for paper in papers:
# Year distribution
year = paper.get("year", "Unknown")
analysis["year_distribution"][year] = analysis["year_distribution"].get(year, 0) + 1
# Journal distribution
journal = paper.get("journal", "Unknown")
analysis["journal_distribution"][journal] = analysis["journal_distribution"].get(journal, 0) + 1
# Analyze abstract and title
abstract = paper.get("abstract", "").lower()
title = paper.get("title", "").lower()
full_text = f"{title} {abstract}"
# Animal species detection
for species in self.animal_species:
if species in full_text:
species_key = species.replace(" ", "_")
analysis["animal_species"][species_key] = analysis["animal_species"].get(species_key, 0) + 1
# Veterinary specialty detection
for specialty, keywords in specialty_keywords.items():
for keyword in keywords:
if keyword in full_text:
analysis["veterinary_specialties"][specialty] = analysis["veterinary_specialties"].get(specialty, 0) + 1
break
# Common conditions detection
for condition in self.vet_conditions:
if condition in full_text:
analysis["common_conditions"][condition] = analysis["common_conditions"].get(condition, 0) + 1
# Procedures detection
for procedure in self.vet_procedures:
if procedure in full_text:
analysis["procedures_mentioned"][procedure] = analysis["procedures_mentioned"].get(procedure, 0) + 1
# Extract drug mentions (veterinary drugs and general pharmaceuticals)
drugs = re.findall(r'\b[A-Z][a-z]*(?:mab|nib|ine|ole|cin|tin|zole|pril|sartan)\b', paper.get("abstract", ""))
# Add common veterinary drugs
vet_drugs = ["prednisolone", "dexamethasone", "amoxicillin", "cephalexin", "enrofloxacin",
"tramadol", "gabapentin", "furosemide", "enalapril", "pimobendan"]
for drug in vet_drugs:
if drug in full_text:
drugs.append(drug.title())
analysis["drug_mentions"].extend(drugs)
# Classify research category if classifier is available
if self.classifier and abstract != "n/a":
try:
result = self.classifier(abstract[:512], self.veterinary_categories)
top_category = result["labels"][0]
analysis["research_trends"][top_category] = analysis["research_trends"].get(top_category, 0) + 1
except Exception:
pass
# Process drug mentions
drug_counter = Counter(analysis["drug_mentions"])
analysis["drug_mentions"] = dict(drug_counter.most_common(15))
return analysis
def generate_veterinary_summary(self, papers: List[Dict], analysis: Dict) -> str:
"""Generate a comprehensive summary of veterinary literature findings"""
if not papers or papers[0].get("error"):
return "No papers found or error in retrieval."
summary = f"""
# Veterinary Literature Mining Summary
## Overview
- **Total Papers Found**: {analysis['total_papers']}
- **Search Date**: {datetime.now().strftime('%Y-%m-%d')}
## Key Insights
### Most Studied Animal Species
"""
# Top animal species
if analysis["animal_species"]:
top_species = sorted(analysis["animal_species"].items(), key=lambda x: x[1], reverse=True)[:8]
for species, count in top_species:
formatted_species = species.replace("_", " ").title()
summary += f"- **{formatted_species}**: {count} papers\n"
summary += "\n### Veterinary Specialties Focus\n"
# Veterinary specialties
if analysis["veterinary_specialties"]:
top_specialties = sorted(analysis["veterinary_specialties"].items(), key=lambda x: x[1], reverse=True)[:6]
for specialty, count in top_specialties:
formatted_specialty = specialty.replace("_", " ").title()
summary += f"- **{formatted_specialty}**: {count} papers\n"
summary += "\n### Common Conditions Studied\n"
# Common conditions
if analysis["common_conditions"]:
top_conditions = sorted(analysis["common_conditions"].items(), key=lambda x: x[1], reverse=True)[:8]
for condition, count in top_conditions:
summary += f"- **{condition.title()}**: {count} papers\n"
summary += "\n### Frequently Mentioned Treatments/Drugs\n"
# Drug mentions
if analysis["drug_mentions"]:
for drug, count in list(analysis["drug_mentions"].items())[:8]:
summary += f"- **{drug}**: {count} mentions\n"
summary += "\n### Top Veterinary Journals\n"
# Journal distribution
if analysis["journal_distribution"]:
top_journals = sorted(analysis["journal_distribution"].items(), key=lambda x: x[1], reverse=True)[:5]
for journal, count in top_journals:
summary += f"- **{journal}**: {count} papers\n"
summary += "\n### Recent Research Highlights\n"
# Recent papers (last 3 years)
current_year = datetime.now().year
recent_papers = [p for p in papers if p.get("year", "").isdigit() and int(p["year"]) >= current_year - 3]
for paper in recent_papers[:4]:
summary += f"- **{paper.get('title', 'N/A')}** ({paper.get('year', 'N/A')})\n"
summary += f" *{paper.get('journal', 'N/A')}*\n\n"
return summary
def create_veterinary_visualizations(self, analysis: Dict):
"""Create visualization plots for veterinary data"""
plots = {}
# Year distribution
if analysis["year_distribution"]:
years = [y for y in analysis["year_distribution"].keys() if y.isdigit()]
counts = [analysis["year_distribution"][y] for y in years]
fig_year = px.line(
x=years, y=counts,
title="Veterinary Research Publications Over Time",
labels={"x": "Year", "y": "Number of Papers"},
markers=True
)
fig_year.update_layout(showlegend=False)
plots["year_dist"] = fig_year
# Animal species
if analysis["animal_species"]:
species = list(analysis["animal_species"].keys())[:12]
species_counts = [analysis["animal_species"][s] for s in species]
formatted_species = [s.replace("_", " ").title() for s in species]
fig_species = px.bar(
x=species_counts, y=formatted_species,
orientation='h',
title="Most Studied Animal Species",
labels={"x": "Number of Papers", "y": "Species"}
)
plots["animal_species"] = fig_species
# Veterinary specialties
if analysis["veterinary_specialties"]:
specialties = list(analysis["veterinary_specialties"].keys())
spec_counts = list(analysis["veterinary_specialties"].values())
formatted_specialties = [s.replace("_", " ").title() for s in specialties]
fig_specialties = px.pie(
values=spec_counts, names=formatted_specialties,
title="Veterinary Specialty Distribution"
)
plots["specialties"] = fig_specialties
# Common conditions
if analysis["common_conditions"]:
conditions = list(analysis["common_conditions"].keys())[:10]
condition_counts = [analysis["common_conditions"][c] for c in conditions]
fig_conditions = px.bar(
x=[c.title() for c in conditions], y=condition_counts,
title="Most Commonly Studied Conditions",
labels={"x": "Condition", "y": "Number of Papers"}
)
fig_conditions.update_xaxes(tickangle=45)
plots["conditions"] = fig_conditions
return plots
def create_veterinary_gradio_interface():
"""Create the Gradio interface for veterinary literature mining"""
miner = VeterinaryLiteratureMiner()
def search_and_analyze_vet(query, max_results, database):
"""Main function to search and analyze veterinary literature"""
try:
print(f"Starting search with query: {query}") # Debug print
if not query.strip():
return "Please enter a search query.", None, None, None, None, None
# Search papers
print("Searching papers...") # Debug print
papers = miner.search_veterinary_literature(query, max_results, database)
print(f"Found {len(papers) if papers else 0} papers") # Debug print
if not papers:
return "No papers found. Try a different search query.", None, None, None, None, None
if papers[0].get("error"):
error_msg = papers[0].get("error", "Unknown error occurred")
return f"Search Error: {error_msg}", None, None, None, None, None
# Analyze papers
print("Analyzing papers...") # Debug print
analysis = miner.analyze_veterinary_papers(papers)
if analysis.get("error"):
return f"Analysis Error: {analysis['error']}", None, None, None, None, None
# Generate summary
print("Generating summary...") # Debug print
summary = miner.generate_veterinary_summary(papers, analysis)
# Create visualizations
print("Creating visualizations...") # Debug print
plots = miner.create_veterinary_visualizations(analysis)
# Create papers dataframe
print("Creating dataframe...") # Debug print
papers_df = pd.DataFrame([
{
"PMID": p.get("pmid", "N/A"),
"Title": p.get("title", "N/A")[:100] + "..." if len(p.get("title", "")) > 100 else p.get("title", "N/A"),
"Authors": p.get("authors", "N/A"),
"Journal": p.get("journal", "N/A"),
"Year": p.get("year", "N/A")
}
for p in papers
])
print("Search and analysis complete!") # Debug print
return (
summary,
papers_df,
plots.get("year_dist"),
plots.get("animal_species"),
plots.get("specialties"),
plots.get("conditions")
)
except Exception as e:
error_message = f"Unexpected error: {str(e)}"
print(f"Error in search_and_analyze_vet: {error_message}") # Debug print
return error_message, None, None, None, None, None
# Create interface
with gr.Blocks(title="Veterinary Literature Mining Agent", theme=gr.themes.Soft()) as interface:
gr.Markdown("""
# 🐾 Veterinary Literature Mining Agent
This AI agent searches and analyzes veterinary and animal health literature across all specialties.
It automatically extracts insights about animal species, veterinary specialties, common conditions, and treatment trends.
**Features:**
- Comprehensive veterinary literature search
- Multi-species analysis (companion animals, livestock, wildlife, exotics)
- Veterinary specialty categorization
- Treatment and drug trend analysis
- Interactive visualizations
- Journal and publication pattern analysis
""")
with gr.Row():
with gr.Column(scale=2):
query_input = gr.Textbox(
label="Research Query",
placeholder="e.g., 'canine diabetes management', 'equine lameness diagnosis', 'feline kidney disease', 'wildlife conservation medicine'",
lines=2
)
with gr.Row():
max_results = gr.Slider(
minimum=10, maximum=100, value=50, step=10,
label="Maximum Results"
)
database_choice = gr.Dropdown(
choices=["pubmed"],
value="pubmed",
label="Database"
)
search_btn = gr.Button("πŸ” Search Veterinary Literature", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("""
### Search Tips:
- **Species**: dog, cat, horse, cattle, pig, bird, fish, reptile, wildlife
- **Specialties**: cardiology, oncology, surgery, dermatology, neurology
- **Conditions**: diabetes, arthritis, cancer, infection, allergies
- **Procedures**: vaccination, surgery, imaging, therapy
- **Combine terms**: "feline diabetes insulin therapy"
""")
with gr.Tabs():
with gr.TabItem("πŸ“Š Analysis Summary"):
summary_output = gr.Markdown(label="Veterinary Literature Analysis")
with gr.TabItem("πŸ“‹ Research Papers"):
papers_output = gr.Dataframe(
headers=["PMID", "Title", "Authors", "Journal", "Year"],
label="Retrieved Veterinary Papers"
)
with gr.TabItem("πŸ“ˆ Research Trends"):
with gr.Row():
year_plot = gr.Plot(label="Publication Timeline")
species_plot = gr.Plot(label="Animal Species")
with gr.Row():
specialties_plot = gr.Plot(label="Veterinary Specialties")
conditions_plot = gr.Plot(label="Common Conditions")
# Connect the search function with progress indicator
search_btn.click(
fn=search_and_analyze_vet,
inputs=[query_input, max_results, database_choice],
outputs=[summary_output, papers_output, year_plot, species_plot, specialties_plot, conditions_plot],
show_progress=True
)
# Add examples
gr.Examples(
examples=[
["canine diabetes insulin therapy", 40, "pubmed"],
["equine lameness diagnosis imaging", 35, "pubmed"],
["feline chronic kidney disease treatment", 45, "pubmed"],
["bovine mastitis antibiotic resistance", 30, "pubmed"],
["avian influenza surveillance wild birds", 35, "pubmed"],
["exotic animal anesthesia protocols", 25, "pubmed"],
["wildlife conservation medicine", 40, "pubmed"],
["small animal oncology chemotherapy", 50, "pubmed"]
],
inputs=[query_input, max_results, database_choice]
)
gr.Markdown("""
### About This Veterinary Literature Mining Agent
This comprehensive tool is designed for veterinary professionals, researchers, and students to efficiently
search and analyze veterinary literature across all animal species and medical specialties.
**Supported Areas:**
- **Companion Animals**: Dogs, cats, rabbits, ferrets, birds, reptiles, fish
- **Large Animals**: Horses, cattle, pigs, sheep, goats
- **Wildlife & Zoo Medicine**: All wild species and conservation medicine
- **Laboratory Animals**: Research and laboratory animal medicine
- **All Veterinary Specialties**: Internal medicine, surgery, oncology, cardiology, dermatology, etc.
**Data Sources:** PubMed/NCBI databases with veterinary focus
**Last Updated:** June 2025
**Coverage:** All aspects of veterinary medicine and animal health
""")
return interface
# Create and launch the interface
if __name__ == "__main__":
interface = create_veterinary_gradio_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)