|
import gradio as gr |
|
import requests |
|
import json |
|
import pandas as pd |
|
from datetime import datetime, timedelta |
|
import re |
|
from typing import List, Dict, Tuple |
|
import xml.etree.ElementTree as ET |
|
from collections import Counter |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from transformers import pipeline |
|
import numpy as np |
|
|
|
class VeterinaryLiteratureMiner: |
|
def __init__(self): |
|
|
|
try: |
|
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
|
except Exception as e: |
|
print(f"Warning: Could not load transformers models: {e}") |
|
self.summarizer = None |
|
self.classifier = None |
|
|
|
|
|
self.veterinary_categories = [ |
|
"oncology", "cardiology", "dermatology", "neurology", "orthopedics", |
|
"infectious diseases", "parasitology", "pharmacology", "toxicology", |
|
"surgery", "anesthesia", "emergency medicine", "internal medicine", |
|
"pathology", "radiology", "nutrition", "behavior", "reproduction", |
|
"public health", "zoonoses", "immunology", "genetics", "epidemiology" |
|
] |
|
|
|
|
|
self.animal_species = [ |
|
"canine", "dog", "dogs", "feline", "cat", "cats", "equine", "horse", "horses", |
|
"bovine", "cattle", "cow", "cows", "porcine", "pig", "pigs", "swine", |
|
"ovine", "sheep", "caprine", "goat", "goats", "avian", "bird", "birds", |
|
"poultry", "chicken", "chickens", "rabbit", "rabbits", "ferret", "ferrets", |
|
"reptile", "reptiles", "fish", "aquatic", "wildlife", "zoo", "exotic", |
|
"laboratory animals", "mouse", "mice", "rat", "rats" |
|
] |
|
|
|
|
|
self.vet_procedures = [ |
|
"vaccination", "spay", "neuter", "castration", "ovariohysterectomy", |
|
"amputation", "biopsy", "endoscopy", "laparoscopy", "arthroscopy", |
|
"radiography", "ultrasound", "CT", "MRI", "chemotherapy", "radiation", |
|
"physical therapy", "rehabilitation", "dental", "ophthalmology" |
|
] |
|
|
|
|
|
self.vet_conditions = [ |
|
"diabetes", "epilepsy", "heart disease", "kidney disease", "liver disease", |
|
"arthritis", "hip dysplasia", "allergies", "skin disease", "cancer", |
|
"tumor", "infection", "parasite", "heartworm", "flea", "tick", |
|
"obesity", "dental disease", "cataracts", "glaucoma", "IBD" |
|
] |
|
|
|
def search_veterinary_literature(self, query: str, max_results: int = 50, database: str = "pubmed") -> List[Dict]: |
|
"""Search veterinary literature across multiple databases""" |
|
|
|
if database == "pubmed": |
|
return self._search_pubmed(query, max_results) |
|
else: |
|
|
|
return self._search_pubmed(query, max_results) |
|
|
|
def _search_pubmed(self, query: str, max_results: int) -> List[Dict]: |
|
"""Search PubMed for veterinary papers""" |
|
try: |
|
print(f"Searching PubMed with query: {query}") |
|
|
|
|
|
enhanced_query = f"({query}) AND (veterinary OR animal OR pet OR livestock OR zoo OR wildlife)" |
|
print(f"Enhanced query: {enhanced_query}") |
|
|
|
|
|
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" |
|
search_params = { |
|
"db": "pubmed", |
|
"term": enhanced_query, |
|
"retmax": max_results, |
|
"retmode": "json", |
|
"sort": "relevance" |
|
} |
|
|
|
print("Making search request...") |
|
search_response = requests.get(search_url, params=search_params, timeout=30) |
|
print(f"Search response status: {search_response.status_code}") |
|
|
|
if search_response.status_code != 200: |
|
return [{"error": f"PubMed search failed with status {search_response.status_code}"}] |
|
|
|
search_data = search_response.json() |
|
print(f"Search data received: {search_data.get('esearchresult', {}).get('count', 0)} results") |
|
|
|
if "esearchresult" not in search_data: |
|
return [{"error": "Invalid response from PubMed"}] |
|
|
|
if not search_data["esearchresult"].get("idlist"): |
|
return [{"error": "No papers found matching your query"}] |
|
|
|
|
|
ids = search_data["esearchresult"]["idlist"] |
|
print(f"Fetching details for {len(ids)} papers...") |
|
|
|
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" |
|
fetch_params = { |
|
"db": "pubmed", |
|
"id": ",".join(ids), |
|
"retmode": "xml" |
|
} |
|
|
|
fetch_response = requests.get(fetch_url, params=fetch_params, timeout=60) |
|
print(f"Fetch response status: {fetch_response.status_code}") |
|
|
|
if fetch_response.status_code != 200: |
|
return [{"error": f"Failed to fetch paper details: {fetch_response.status_code}"}] |
|
|
|
|
|
papers = self._parse_pubmed_xml(fetch_response.text) |
|
print(f"Parsed {len(papers)} papers successfully") |
|
|
|
return papers |
|
|
|
except requests.exceptions.Timeout: |
|
return [{"error": "Request timed out. Please try again with fewer results."}] |
|
except requests.exceptions.ConnectionError: |
|
return [{"error": "Connection error. Please check your internet connection."}] |
|
except Exception as e: |
|
print(f"Error in _search_pubmed: {str(e)}") |
|
return [{"error": f"Search failed: {str(e)}"}] |
|
|
|
def _parse_pubmed_xml(self, xml_content: str) -> List[Dict]: |
|
"""Parse PubMed XML response""" |
|
papers = [] |
|
try: |
|
root = ET.fromstring(xml_content) |
|
|
|
for article in root.findall(".//PubmedArticle"): |
|
paper = {} |
|
|
|
|
|
medline = article.find(".//MedlineCitation") |
|
if medline is not None: |
|
pmid = medline.find(".//PMID") |
|
paper["pmid"] = pmid.text if pmid is not None else "N/A" |
|
|
|
|
|
title = article.find(".//ArticleTitle") |
|
paper["title"] = title.text if title is not None else "N/A" |
|
|
|
|
|
abstract_elem = article.find(".//Abstract/AbstractText") |
|
paper["abstract"] = abstract_elem.text if abstract_elem is not None else "N/A" |
|
|
|
|
|
authors = [] |
|
for author in article.findall(".//Author"): |
|
fname = author.find(".//ForeName") |
|
lname = author.find(".//LastName") |
|
if fname is not None and lname is not None: |
|
authors.append(f"{fname.text} {lname.text}") |
|
paper["authors"] = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "") |
|
|
|
|
|
journal = article.find(".//Journal/Title") |
|
paper["journal"] = journal.text if journal is not None else "N/A" |
|
|
|
pub_date = article.find(".//PubDate/Year") |
|
paper["year"] = pub_date.text if pub_date is not None else "N/A" |
|
|
|
papers.append(paper) |
|
|
|
except Exception as e: |
|
return [{"error": f"XML parsing failed: {str(e)}"}] |
|
|
|
return papers |
|
|
|
def analyze_veterinary_papers(self, papers: List[Dict]) -> Dict: |
|
"""Analyze the retrieved veterinary papers for insights""" |
|
if not papers or papers[0].get("error"): |
|
return {"error": "No papers to analyze"} |
|
|
|
analysis = { |
|
"total_papers": len(papers), |
|
"year_distribution": {}, |
|
"animal_species": {}, |
|
"veterinary_specialties": {}, |
|
"common_conditions": {}, |
|
"procedures_mentioned": {}, |
|
"drug_mentions": [], |
|
"journal_distribution": {}, |
|
"research_trends": {} |
|
} |
|
|
|
|
|
specialty_keywords = { |
|
"oncology": ["cancer", "tumor", "oncology", "chemotherapy", "radiation"], |
|
"cardiology": ["heart", "cardiac", "cardiology", "arrhythmia", "murmur"], |
|
"dermatology": ["skin", "dermatology", "allergy", "dermatitis", "eczema"], |
|
"neurology": ["brain", "neurological", "seizure", "epilepsy", "paralysis"], |
|
"orthopedics": ["bone", "joint", "fracture", "orthopedic", "lameness"], |
|
"infectious_diseases": ["infection", "bacteria", "virus", "antibiotic", "pathogen"], |
|
"surgery": ["surgical", "surgery", "operative", "laparoscopy", "endoscopy"], |
|
"internal_medicine": ["diabetes", "kidney", "liver", "endocrine", "metabolic"] |
|
} |
|
|
|
|
|
for paper in papers: |
|
|
|
year = paper.get("year", "Unknown") |
|
analysis["year_distribution"][year] = analysis["year_distribution"].get(year, 0) + 1 |
|
|
|
|
|
journal = paper.get("journal", "Unknown") |
|
analysis["journal_distribution"][journal] = analysis["journal_distribution"].get(journal, 0) + 1 |
|
|
|
|
|
abstract = paper.get("abstract", "").lower() |
|
title = paper.get("title", "").lower() |
|
full_text = f"{title} {abstract}" |
|
|
|
|
|
for species in self.animal_species: |
|
if species in full_text: |
|
species_key = species.replace(" ", "_") |
|
analysis["animal_species"][species_key] = analysis["animal_species"].get(species_key, 0) + 1 |
|
|
|
|
|
for specialty, keywords in specialty_keywords.items(): |
|
for keyword in keywords: |
|
if keyword in full_text: |
|
analysis["veterinary_specialties"][specialty] = analysis["veterinary_specialties"].get(specialty, 0) + 1 |
|
break |
|
|
|
|
|
for condition in self.vet_conditions: |
|
if condition in full_text: |
|
analysis["common_conditions"][condition] = analysis["common_conditions"].get(condition, 0) + 1 |
|
|
|
|
|
for procedure in self.vet_procedures: |
|
if procedure in full_text: |
|
analysis["procedures_mentioned"][procedure] = analysis["procedures_mentioned"].get(procedure, 0) + 1 |
|
|
|
|
|
drugs = re.findall(r'\b[A-Z][a-z]*(?:mab|nib|ine|ole|cin|tin|zole|pril|sartan)\b', paper.get("abstract", "")) |
|
|
|
vet_drugs = ["prednisolone", "dexamethasone", "amoxicillin", "cephalexin", "enrofloxacin", |
|
"tramadol", "gabapentin", "furosemide", "enalapril", "pimobendan"] |
|
for drug in vet_drugs: |
|
if drug in full_text: |
|
drugs.append(drug.title()) |
|
|
|
analysis["drug_mentions"].extend(drugs) |
|
|
|
|
|
if self.classifier and abstract != "n/a": |
|
try: |
|
result = self.classifier(abstract[:512], self.veterinary_categories) |
|
top_category = result["labels"][0] |
|
analysis["research_trends"][top_category] = analysis["research_trends"].get(top_category, 0) + 1 |
|
except Exception: |
|
pass |
|
|
|
|
|
drug_counter = Counter(analysis["drug_mentions"]) |
|
analysis["drug_mentions"] = dict(drug_counter.most_common(15)) |
|
|
|
return analysis |
|
|
|
def generate_veterinary_summary(self, papers: List[Dict], analysis: Dict) -> str: |
|
"""Generate a comprehensive summary of veterinary literature findings""" |
|
if not papers or papers[0].get("error"): |
|
return "No papers found or error in retrieval." |
|
|
|
summary = f""" |
|
# Veterinary Literature Mining Summary |
|
|
|
## Overview |
|
- **Total Papers Found**: {analysis['total_papers']} |
|
- **Search Date**: {datetime.now().strftime('%Y-%m-%d')} |
|
|
|
## Key Insights |
|
|
|
### Most Studied Animal Species |
|
""" |
|
|
|
|
|
if analysis["animal_species"]: |
|
top_species = sorted(analysis["animal_species"].items(), key=lambda x: x[1], reverse=True)[:8] |
|
for species, count in top_species: |
|
formatted_species = species.replace("_", " ").title() |
|
summary += f"- **{formatted_species}**: {count} papers\n" |
|
|
|
summary += "\n### Veterinary Specialties Focus\n" |
|
|
|
|
|
if analysis["veterinary_specialties"]: |
|
top_specialties = sorted(analysis["veterinary_specialties"].items(), key=lambda x: x[1], reverse=True)[:6] |
|
for specialty, count in top_specialties: |
|
formatted_specialty = specialty.replace("_", " ").title() |
|
summary += f"- **{formatted_specialty}**: {count} papers\n" |
|
|
|
summary += "\n### Common Conditions Studied\n" |
|
|
|
|
|
if analysis["common_conditions"]: |
|
top_conditions = sorted(analysis["common_conditions"].items(), key=lambda x: x[1], reverse=True)[:8] |
|
for condition, count in top_conditions: |
|
summary += f"- **{condition.title()}**: {count} papers\n" |
|
|
|
summary += "\n### Frequently Mentioned Treatments/Drugs\n" |
|
|
|
|
|
if analysis["drug_mentions"]: |
|
for drug, count in list(analysis["drug_mentions"].items())[:8]: |
|
summary += f"- **{drug}**: {count} mentions\n" |
|
|
|
summary += "\n### Top Veterinary Journals\n" |
|
|
|
|
|
if analysis["journal_distribution"]: |
|
top_journals = sorted(analysis["journal_distribution"].items(), key=lambda x: x[1], reverse=True)[:5] |
|
for journal, count in top_journals: |
|
summary += f"- **{journal}**: {count} papers\n" |
|
|
|
summary += "\n### Recent Research Highlights\n" |
|
|
|
|
|
current_year = datetime.now().year |
|
recent_papers = [p for p in papers if p.get("year", "").isdigit() and int(p["year"]) >= current_year - 3] |
|
|
|
for paper in recent_papers[:4]: |
|
summary += f"- **{paper.get('title', 'N/A')}** ({paper.get('year', 'N/A')})\n" |
|
summary += f" *{paper.get('journal', 'N/A')}*\n\n" |
|
|
|
return summary |
|
|
|
def create_veterinary_visualizations(self, analysis: Dict): |
|
"""Create visualization plots for veterinary data""" |
|
plots = {} |
|
|
|
|
|
if analysis["year_distribution"]: |
|
years = [y for y in analysis["year_distribution"].keys() if y.isdigit()] |
|
counts = [analysis["year_distribution"][y] for y in years] |
|
|
|
fig_year = px.line( |
|
x=years, y=counts, |
|
title="Veterinary Research Publications Over Time", |
|
labels={"x": "Year", "y": "Number of Papers"}, |
|
markers=True |
|
) |
|
fig_year.update_layout(showlegend=False) |
|
plots["year_dist"] = fig_year |
|
|
|
|
|
if analysis["animal_species"]: |
|
species = list(analysis["animal_species"].keys())[:12] |
|
species_counts = [analysis["animal_species"][s] for s in species] |
|
formatted_species = [s.replace("_", " ").title() for s in species] |
|
|
|
fig_species = px.bar( |
|
x=species_counts, y=formatted_species, |
|
orientation='h', |
|
title="Most Studied Animal Species", |
|
labels={"x": "Number of Papers", "y": "Species"} |
|
) |
|
plots["animal_species"] = fig_species |
|
|
|
|
|
if analysis["veterinary_specialties"]: |
|
specialties = list(analysis["veterinary_specialties"].keys()) |
|
spec_counts = list(analysis["veterinary_specialties"].values()) |
|
formatted_specialties = [s.replace("_", " ").title() for s in specialties] |
|
|
|
fig_specialties = px.pie( |
|
values=spec_counts, names=formatted_specialties, |
|
title="Veterinary Specialty Distribution" |
|
) |
|
plots["specialties"] = fig_specialties |
|
|
|
|
|
if analysis["common_conditions"]: |
|
conditions = list(analysis["common_conditions"].keys())[:10] |
|
condition_counts = [analysis["common_conditions"][c] for c in conditions] |
|
|
|
fig_conditions = px.bar( |
|
x=[c.title() for c in conditions], y=condition_counts, |
|
title="Most Commonly Studied Conditions", |
|
labels={"x": "Condition", "y": "Number of Papers"} |
|
) |
|
fig_conditions.update_xaxes(tickangle=45) |
|
plots["conditions"] = fig_conditions |
|
|
|
return plots |
|
|
|
def create_veterinary_gradio_interface(): |
|
"""Create the Gradio interface for veterinary literature mining""" |
|
miner = VeterinaryLiteratureMiner() |
|
|
|
def search_and_analyze_vet(query, max_results, database): |
|
"""Main function to search and analyze veterinary literature""" |
|
try: |
|
print(f"Starting search with query: {query}") |
|
|
|
if not query.strip(): |
|
return "Please enter a search query.", None, None, None, None, None |
|
|
|
|
|
print("Searching papers...") |
|
papers = miner.search_veterinary_literature(query, max_results, database) |
|
print(f"Found {len(papers) if papers else 0} papers") |
|
|
|
if not papers: |
|
return "No papers found. Try a different search query.", None, None, None, None, None |
|
|
|
if papers[0].get("error"): |
|
error_msg = papers[0].get("error", "Unknown error occurred") |
|
return f"Search Error: {error_msg}", None, None, None, None, None |
|
|
|
|
|
print("Analyzing papers...") |
|
analysis = miner.analyze_veterinary_papers(papers) |
|
|
|
if analysis.get("error"): |
|
return f"Analysis Error: {analysis['error']}", None, None, None, None, None |
|
|
|
|
|
print("Generating summary...") |
|
summary = miner.generate_veterinary_summary(papers, analysis) |
|
|
|
|
|
print("Creating visualizations...") |
|
plots = miner.create_veterinary_visualizations(analysis) |
|
|
|
|
|
print("Creating dataframe...") |
|
papers_df = pd.DataFrame([ |
|
{ |
|
"PMID": p.get("pmid", "N/A"), |
|
"Title": p.get("title", "N/A")[:100] + "..." if len(p.get("title", "")) > 100 else p.get("title", "N/A"), |
|
"Authors": p.get("authors", "N/A"), |
|
"Journal": p.get("journal", "N/A"), |
|
"Year": p.get("year", "N/A") |
|
} |
|
for p in papers |
|
]) |
|
|
|
print("Search and analysis complete!") |
|
return ( |
|
summary, |
|
papers_df, |
|
plots.get("year_dist"), |
|
plots.get("animal_species"), |
|
plots.get("specialties"), |
|
plots.get("conditions") |
|
) |
|
|
|
except Exception as e: |
|
error_message = f"Unexpected error: {str(e)}" |
|
print(f"Error in search_and_analyze_vet: {error_message}") |
|
return error_message, None, None, None, None, None |
|
|
|
|
|
with gr.Blocks(title="Veterinary Literature Mining Agent", theme=gr.themes.Soft()) as interface: |
|
gr.Markdown(""" |
|
# πΎ Veterinary Literature Mining Agent |
|
|
|
This AI agent searches and analyzes veterinary and animal health literature across all specialties. |
|
It automatically extracts insights about animal species, veterinary specialties, common conditions, and treatment trends. |
|
|
|
**Features:** |
|
- Comprehensive veterinary literature search |
|
- Multi-species analysis (companion animals, livestock, wildlife, exotics) |
|
- Veterinary specialty categorization |
|
- Treatment and drug trend analysis |
|
- Interactive visualizations |
|
- Journal and publication pattern analysis |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
query_input = gr.Textbox( |
|
label="Research Query", |
|
placeholder="e.g., 'canine diabetes management', 'equine lameness diagnosis', 'feline kidney disease', 'wildlife conservation medicine'", |
|
lines=2 |
|
) |
|
with gr.Row(): |
|
max_results = gr.Slider( |
|
minimum=10, maximum=100, value=50, step=10, |
|
label="Maximum Results" |
|
) |
|
database_choice = gr.Dropdown( |
|
choices=["pubmed"], |
|
value="pubmed", |
|
label="Database" |
|
) |
|
search_btn = gr.Button("π Search Veterinary Literature", variant="primary", size="lg") |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown(""" |
|
### Search Tips: |
|
- **Species**: dog, cat, horse, cattle, pig, bird, fish, reptile, wildlife |
|
- **Specialties**: cardiology, oncology, surgery, dermatology, neurology |
|
- **Conditions**: diabetes, arthritis, cancer, infection, allergies |
|
- **Procedures**: vaccination, surgery, imaging, therapy |
|
- **Combine terms**: "feline diabetes insulin therapy" |
|
""") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("π Analysis Summary"): |
|
summary_output = gr.Markdown(label="Veterinary Literature Analysis") |
|
|
|
with gr.TabItem("π Research Papers"): |
|
papers_output = gr.Dataframe( |
|
headers=["PMID", "Title", "Authors", "Journal", "Year"], |
|
label="Retrieved Veterinary Papers" |
|
) |
|
|
|
with gr.TabItem("π Research Trends"): |
|
with gr.Row(): |
|
year_plot = gr.Plot(label="Publication Timeline") |
|
species_plot = gr.Plot(label="Animal Species") |
|
with gr.Row(): |
|
specialties_plot = gr.Plot(label="Veterinary Specialties") |
|
conditions_plot = gr.Plot(label="Common Conditions") |
|
|
|
|
|
search_btn.click( |
|
fn=search_and_analyze_vet, |
|
inputs=[query_input, max_results, database_choice], |
|
outputs=[summary_output, papers_output, year_plot, species_plot, specialties_plot, conditions_plot], |
|
show_progress=True |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["canine diabetes insulin therapy", 40, "pubmed"], |
|
["equine lameness diagnosis imaging", 35, "pubmed"], |
|
["feline chronic kidney disease treatment", 45, "pubmed"], |
|
["bovine mastitis antibiotic resistance", 30, "pubmed"], |
|
["avian influenza surveillance wild birds", 35, "pubmed"], |
|
["exotic animal anesthesia protocols", 25, "pubmed"], |
|
["wildlife conservation medicine", 40, "pubmed"], |
|
["small animal oncology chemotherapy", 50, "pubmed"] |
|
], |
|
inputs=[query_input, max_results, database_choice] |
|
) |
|
|
|
gr.Markdown(""" |
|
### About This Veterinary Literature Mining Agent |
|
|
|
This comprehensive tool is designed for veterinary professionals, researchers, and students to efficiently |
|
search and analyze veterinary literature across all animal species and medical specialties. |
|
|
|
**Supported Areas:** |
|
- **Companion Animals**: Dogs, cats, rabbits, ferrets, birds, reptiles, fish |
|
- **Large Animals**: Horses, cattle, pigs, sheep, goats |
|
- **Wildlife & Zoo Medicine**: All wild species and conservation medicine |
|
- **Laboratory Animals**: Research and laboratory animal medicine |
|
- **All Veterinary Specialties**: Internal medicine, surgery, oncology, cardiology, dermatology, etc. |
|
|
|
**Data Sources:** PubMed/NCBI databases with veterinary focus |
|
**Last Updated:** June 2025 |
|
**Coverage:** All aspects of veterinary medicine and animal health |
|
""") |
|
|
|
return interface |
|
|
|
|
|
if __name__ == "__main__": |
|
interface = create_veterinary_gradio_interface() |
|
interface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True |
|
) |