import gradio as gr import requests import json import pandas as pd from datetime import datetime, timedelta import re from typing import List, Dict, Tuple import xml.etree.ElementTree as ET from collections import Counter import plotly.express as px import plotly.graph_objects as go from transformers import pipeline import numpy as np class VeterinaryLiteratureMiner: def __init__(self): # Initialize NLP pipelines try: self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") except Exception as e: print(f"Warning: Could not load transformers models: {e}") self.summarizer = None self.classifier = None # Veterinary research categories for classification self.veterinary_categories = [ "oncology", "cardiology", "dermatology", "neurology", "orthopedics", "infectious diseases", "parasitology", "pharmacology", "toxicology", "surgery", "anesthesia", "emergency medicine", "internal medicine", "pathology", "radiology", "nutrition", "behavior", "reproduction", "public health", "zoonoses", "immunology", "genetics", "epidemiology" ] # Animal species categories self.animal_species = [ "canine", "dog", "dogs", "feline", "cat", "cats", "equine", "horse", "horses", "bovine", "cattle", "cow", "cows", "porcine", "pig", "pigs", "swine", "ovine", "sheep", "caprine", "goat", "goats", "avian", "bird", "birds", "poultry", "chicken", "chickens", "rabbit", "rabbits", "ferret", "ferrets", "reptile", "reptiles", "fish", "aquatic", "wildlife", "zoo", "exotic", "laboratory animals", "mouse", "mice", "rat", "rats" ] # Veterinary specialties and procedures self.vet_procedures = [ "vaccination", "spay", "neuter", "castration", "ovariohysterectomy", "amputation", "biopsy", "endoscopy", "laparoscopy", "arthroscopy", "radiography", "ultrasound", "CT", "MRI", "chemotherapy", "radiation", "physical therapy", "rehabilitation", "dental", "ophthalmology" ] # Common veterinary conditions self.vet_conditions = [ "diabetes", "epilepsy", "heart disease", "kidney disease", "liver disease", "arthritis", "hip dysplasia", "allergies", "skin disease", "cancer", "tumor", "infection", "parasite", "heartworm", "flea", "tick", "obesity", "dental disease", "cataracts", "glaucoma", "IBD" ] def search_veterinary_literature(self, query: str, max_results: int = 50, database: str = "pubmed") -> List[Dict]: """Search veterinary literature across multiple databases""" if database == "pubmed": return self._search_pubmed(query, max_results) else: # Future: Could add other veterinary databases here return self._search_pubmed(query, max_results) def _search_pubmed(self, query: str, max_results: int) -> List[Dict]: """Search PubMed for veterinary papers""" try: print(f"Searching PubMed with query: {query}") # Debug print # Enhance query with veterinary terms enhanced_query = f"({query}) AND (veterinary OR animal OR pet OR livestock OR zoo OR wildlife)" print(f"Enhanced query: {enhanced_query}") # Debug print # Search PubMed search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" search_params = { "db": "pubmed", "term": enhanced_query, "retmax": max_results, "retmode": "json", "sort": "relevance" } print("Making search request...") # Debug print search_response = requests.get(search_url, params=search_params, timeout=30) print(f"Search response status: {search_response.status_code}") # Debug print if search_response.status_code != 200: return [{"error": f"PubMed search failed with status {search_response.status_code}"}] search_data = search_response.json() print(f"Search data received: {search_data.get('esearchresult', {}).get('count', 0)} results") # Debug print if "esearchresult" not in search_data: return [{"error": "Invalid response from PubMed"}] if not search_data["esearchresult"].get("idlist"): return [{"error": "No papers found matching your query"}] # Get detailed information ids = search_data["esearchresult"]["idlist"] print(f"Fetching details for {len(ids)} papers...") # Debug print fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" fetch_params = { "db": "pubmed", "id": ",".join(ids), "retmode": "xml" } fetch_response = requests.get(fetch_url, params=fetch_params, timeout=60) print(f"Fetch response status: {fetch_response.status_code}") # Debug print if fetch_response.status_code != 200: return [{"error": f"Failed to fetch paper details: {fetch_response.status_code}"}] # Parse XML response papers = self._parse_pubmed_xml(fetch_response.text) print(f"Parsed {len(papers)} papers successfully") # Debug print return papers except requests.exceptions.Timeout: return [{"error": "Request timed out. Please try again with fewer results."}] except requests.exceptions.ConnectionError: return [{"error": "Connection error. Please check your internet connection."}] except Exception as e: print(f"Error in _search_pubmed: {str(e)}") # Debug print return [{"error": f"Search failed: {str(e)}"}] def _parse_pubmed_xml(self, xml_content: str) -> List[Dict]: """Parse PubMed XML response""" papers = [] try: root = ET.fromstring(xml_content) for article in root.findall(".//PubmedArticle"): paper = {} # Extract basic info medline = article.find(".//MedlineCitation") if medline is not None: pmid = medline.find(".//PMID") paper["pmid"] = pmid.text if pmid is not None else "N/A" # Extract title title = article.find(".//ArticleTitle") paper["title"] = title.text if title is not None else "N/A" # Extract abstract abstract_elem = article.find(".//Abstract/AbstractText") paper["abstract"] = abstract_elem.text if abstract_elem is not None else "N/A" # Extract authors authors = [] for author in article.findall(".//Author"): fname = author.find(".//ForeName") lname = author.find(".//LastName") if fname is not None and lname is not None: authors.append(f"{fname.text} {lname.text}") paper["authors"] = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "") # Extract journal and date journal = article.find(".//Journal/Title") paper["journal"] = journal.text if journal is not None else "N/A" pub_date = article.find(".//PubDate/Year") paper["year"] = pub_date.text if pub_date is not None else "N/A" papers.append(paper) except Exception as e: return [{"error": f"XML parsing failed: {str(e)}"}] return papers def analyze_veterinary_papers(self, papers: List[Dict]) -> Dict: """Analyze the retrieved veterinary papers for insights""" if not papers or papers[0].get("error"): return {"error": "No papers to analyze"} analysis = { "total_papers": len(papers), "year_distribution": {}, "animal_species": {}, "veterinary_specialties": {}, "common_conditions": {}, "procedures_mentioned": {}, "drug_mentions": [], "journal_distribution": {}, "research_trends": {} } # Keywords for different categories specialty_keywords = { "oncology": ["cancer", "tumor", "oncology", "chemotherapy", "radiation"], "cardiology": ["heart", "cardiac", "cardiology", "arrhythmia", "murmur"], "dermatology": ["skin", "dermatology", "allergy", "dermatitis", "eczema"], "neurology": ["brain", "neurological", "seizure", "epilepsy", "paralysis"], "orthopedics": ["bone", "joint", "fracture", "orthopedic", "lameness"], "infectious_diseases": ["infection", "bacteria", "virus", "antibiotic", "pathogen"], "surgery": ["surgical", "surgery", "operative", "laparoscopy", "endoscopy"], "internal_medicine": ["diabetes", "kidney", "liver", "endocrine", "metabolic"] } # Analyze each paper for paper in papers: # Year distribution year = paper.get("year", "Unknown") analysis["year_distribution"][year] = analysis["year_distribution"].get(year, 0) + 1 # Journal distribution journal = paper.get("journal", "Unknown") analysis["journal_distribution"][journal] = analysis["journal_distribution"].get(journal, 0) + 1 # Analyze abstract and title abstract = paper.get("abstract", "").lower() title = paper.get("title", "").lower() full_text = f"{title} {abstract}" # Animal species detection for species in self.animal_species: if species in full_text: species_key = species.replace(" ", "_") analysis["animal_species"][species_key] = analysis["animal_species"].get(species_key, 0) + 1 # Veterinary specialty detection for specialty, keywords in specialty_keywords.items(): for keyword in keywords: if keyword in full_text: analysis["veterinary_specialties"][specialty] = analysis["veterinary_specialties"].get(specialty, 0) + 1 break # Common conditions detection for condition in self.vet_conditions: if condition in full_text: analysis["common_conditions"][condition] = analysis["common_conditions"].get(condition, 0) + 1 # Procedures detection for procedure in self.vet_procedures: if procedure in full_text: analysis["procedures_mentioned"][procedure] = analysis["procedures_mentioned"].get(procedure, 0) + 1 # Extract drug mentions (veterinary drugs and general pharmaceuticals) drugs = re.findall(r'\b[A-Z][a-z]*(?:mab|nib|ine|ole|cin|tin|zole|pril|sartan)\b', paper.get("abstract", "")) # Add common veterinary drugs vet_drugs = ["prednisolone", "dexamethasone", "amoxicillin", "cephalexin", "enrofloxacin", "tramadol", "gabapentin", "furosemide", "enalapril", "pimobendan"] for drug in vet_drugs: if drug in full_text: drugs.append(drug.title()) analysis["drug_mentions"].extend(drugs) # Classify research category if classifier is available if self.classifier and abstract != "n/a": try: result = self.classifier(abstract[:512], self.veterinary_categories) top_category = result["labels"][0] analysis["research_trends"][top_category] = analysis["research_trends"].get(top_category, 0) + 1 except Exception: pass # Process drug mentions drug_counter = Counter(analysis["drug_mentions"]) analysis["drug_mentions"] = dict(drug_counter.most_common(15)) return analysis def generate_veterinary_summary(self, papers: List[Dict], analysis: Dict) -> str: """Generate a comprehensive summary of veterinary literature findings""" if not papers or papers[0].get("error"): return "No papers found or error in retrieval." summary = f""" # Veterinary Literature Mining Summary ## Overview - **Total Papers Found**: {analysis['total_papers']} - **Search Date**: {datetime.now().strftime('%Y-%m-%d')} ## Key Insights ### Most Studied Animal Species """ # Top animal species if analysis["animal_species"]: top_species = sorted(analysis["animal_species"].items(), key=lambda x: x[1], reverse=True)[:8] for species, count in top_species: formatted_species = species.replace("_", " ").title() summary += f"- **{formatted_species}**: {count} papers\n" summary += "\n### Veterinary Specialties Focus\n" # Veterinary specialties if analysis["veterinary_specialties"]: top_specialties = sorted(analysis["veterinary_specialties"].items(), key=lambda x: x[1], reverse=True)[:6] for specialty, count in top_specialties: formatted_specialty = specialty.replace("_", " ").title() summary += f"- **{formatted_specialty}**: {count} papers\n" summary += "\n### Common Conditions Studied\n" # Common conditions if analysis["common_conditions"]: top_conditions = sorted(analysis["common_conditions"].items(), key=lambda x: x[1], reverse=True)[:8] for condition, count in top_conditions: summary += f"- **{condition.title()}**: {count} papers\n" summary += "\n### Frequently Mentioned Treatments/Drugs\n" # Drug mentions if analysis["drug_mentions"]: for drug, count in list(analysis["drug_mentions"].items())[:8]: summary += f"- **{drug}**: {count} mentions\n" summary += "\n### Top Veterinary Journals\n" # Journal distribution if analysis["journal_distribution"]: top_journals = sorted(analysis["journal_distribution"].items(), key=lambda x: x[1], reverse=True)[:5] for journal, count in top_journals: summary += f"- **{journal}**: {count} papers\n" summary += "\n### Recent Research Highlights\n" # Recent papers (last 3 years) current_year = datetime.now().year recent_papers = [p for p in papers if p.get("year", "").isdigit() and int(p["year"]) >= current_year - 3] for paper in recent_papers[:4]: summary += f"- **{paper.get('title', 'N/A')}** ({paper.get('year', 'N/A')})\n" summary += f" *{paper.get('journal', 'N/A')}*\n\n" return summary def create_veterinary_visualizations(self, analysis: Dict): """Create visualization plots for veterinary data""" plots = {} # Year distribution if analysis["year_distribution"]: years = [y for y in analysis["year_distribution"].keys() if y.isdigit()] counts = [analysis["year_distribution"][y] for y in years] fig_year = px.line( x=years, y=counts, title="Veterinary Research Publications Over Time", labels={"x": "Year", "y": "Number of Papers"}, markers=True ) fig_year.update_layout(showlegend=False) plots["year_dist"] = fig_year # Animal species if analysis["animal_species"]: species = list(analysis["animal_species"].keys())[:12] species_counts = [analysis["animal_species"][s] for s in species] formatted_species = [s.replace("_", " ").title() for s in species] fig_species = px.bar( x=species_counts, y=formatted_species, orientation='h', title="Most Studied Animal Species", labels={"x": "Number of Papers", "y": "Species"} ) plots["animal_species"] = fig_species # Veterinary specialties if analysis["veterinary_specialties"]: specialties = list(analysis["veterinary_specialties"].keys()) spec_counts = list(analysis["veterinary_specialties"].values()) formatted_specialties = [s.replace("_", " ").title() for s in specialties] fig_specialties = px.pie( values=spec_counts, names=formatted_specialties, title="Veterinary Specialty Distribution" ) plots["specialties"] = fig_specialties # Common conditions if analysis["common_conditions"]: conditions = list(analysis["common_conditions"].keys())[:10] condition_counts = [analysis["common_conditions"][c] for c in conditions] fig_conditions = px.bar( x=[c.title() for c in conditions], y=condition_counts, title="Most Commonly Studied Conditions", labels={"x": "Condition", "y": "Number of Papers"} ) fig_conditions.update_xaxes(tickangle=45) plots["conditions"] = fig_conditions return plots def create_veterinary_gradio_interface(): """Create the Gradio interface for veterinary literature mining""" miner = VeterinaryLiteratureMiner() def search_and_analyze_vet(query, max_results, database): """Main function to search and analyze veterinary literature""" try: print(f"Starting search with query: {query}") # Debug print if not query.strip(): return "Please enter a search query.", None, None, None, None, None # Search papers print("Searching papers...") # Debug print papers = miner.search_veterinary_literature(query, max_results, database) print(f"Found {len(papers) if papers else 0} papers") # Debug print if not papers: return "No papers found. Try a different search query.", None, None, None, None, None if papers[0].get("error"): error_msg = papers[0].get("error", "Unknown error occurred") return f"Search Error: {error_msg}", None, None, None, None, None # Analyze papers print("Analyzing papers...") # Debug print analysis = miner.analyze_veterinary_papers(papers) if analysis.get("error"): return f"Analysis Error: {analysis['error']}", None, None, None, None, None # Generate summary print("Generating summary...") # Debug print summary = miner.generate_veterinary_summary(papers, analysis) # Create visualizations print("Creating visualizations...") # Debug print plots = miner.create_veterinary_visualizations(analysis) # Create papers dataframe print("Creating dataframe...") # Debug print papers_df = pd.DataFrame([ { "PMID": p.get("pmid", "N/A"), "Title": p.get("title", "N/A")[:100] + "..." if len(p.get("title", "")) > 100 else p.get("title", "N/A"), "Authors": p.get("authors", "N/A"), "Journal": p.get("journal", "N/A"), "Year": p.get("year", "N/A") } for p in papers ]) print("Search and analysis complete!") # Debug print return ( summary, papers_df, plots.get("year_dist"), plots.get("animal_species"), plots.get("specialties"), plots.get("conditions") ) except Exception as e: error_message = f"Unexpected error: {str(e)}" print(f"Error in search_and_analyze_vet: {error_message}") # Debug print return error_message, None, None, None, None, None # Create interface with gr.Blocks(title="Veterinary Literature Mining Agent", theme=gr.themes.Soft()) as interface: gr.Markdown(""" # 🐾 Veterinary Literature Mining Agent This AI agent searches and analyzes veterinary and animal health literature across all specialties. It automatically extracts insights about animal species, veterinary specialties, common conditions, and treatment trends. **Features:** - Comprehensive veterinary literature search - Multi-species analysis (companion animals, livestock, wildlife, exotics) - Veterinary specialty categorization - Treatment and drug trend analysis - Interactive visualizations - Journal and publication pattern analysis """) with gr.Row(): with gr.Column(scale=2): query_input = gr.Textbox( label="Research Query", placeholder="e.g., 'canine diabetes management', 'equine lameness diagnosis', 'feline kidney disease', 'wildlife conservation medicine'", lines=2 ) with gr.Row(): max_results = gr.Slider( minimum=10, maximum=100, value=50, step=10, label="Maximum Results" ) database_choice = gr.Dropdown( choices=["pubmed"], value="pubmed", label="Database" ) search_btn = gr.Button("🔍 Search Veterinary Literature", variant="primary", size="lg") with gr.Column(scale=1): gr.Markdown(""" ### Search Tips: - **Species**: dog, cat, horse, cattle, pig, bird, fish, reptile, wildlife - **Specialties**: cardiology, oncology, surgery, dermatology, neurology - **Conditions**: diabetes, arthritis, cancer, infection, allergies - **Procedures**: vaccination, surgery, imaging, therapy - **Combine terms**: "feline diabetes insulin therapy" """) with gr.Tabs(): with gr.TabItem("📊 Analysis Summary"): summary_output = gr.Markdown(label="Veterinary Literature Analysis") with gr.TabItem("📋 Research Papers"): papers_output = gr.Dataframe( headers=["PMID", "Title", "Authors", "Journal", "Year"], label="Retrieved Veterinary Papers" ) with gr.TabItem("📈 Research Trends"): with gr.Row(): year_plot = gr.Plot(label="Publication Timeline") species_plot = gr.Plot(label="Animal Species") with gr.Row(): specialties_plot = gr.Plot(label="Veterinary Specialties") conditions_plot = gr.Plot(label="Common Conditions") # Connect the search function with progress indicator search_btn.click( fn=search_and_analyze_vet, inputs=[query_input, max_results, database_choice], outputs=[summary_output, papers_output, year_plot, species_plot, specialties_plot, conditions_plot], show_progress=True ) # Add examples gr.Examples( examples=[ ["canine diabetes insulin therapy", 40, "pubmed"], ["equine lameness diagnosis imaging", 35, "pubmed"], ["feline chronic kidney disease treatment", 45, "pubmed"], ["bovine mastitis antibiotic resistance", 30, "pubmed"], ["avian influenza surveillance wild birds", 35, "pubmed"], ["exotic animal anesthesia protocols", 25, "pubmed"], ["wildlife conservation medicine", 40, "pubmed"], ["small animal oncology chemotherapy", 50, "pubmed"] ], inputs=[query_input, max_results, database_choice] ) gr.Markdown(""" ### About This Veterinary Literature Mining Agent This comprehensive tool is designed for veterinary professionals, researchers, and students to efficiently search and analyze veterinary literature across all animal species and medical specialties. **Supported Areas:** - **Companion Animals**: Dogs, cats, rabbits, ferrets, birds, reptiles, fish - **Large Animals**: Horses, cattle, pigs, sheep, goats - **Wildlife & Zoo Medicine**: All wild species and conservation medicine - **Laboratory Animals**: Research and laboratory animal medicine - **All Veterinary Specialties**: Internal medicine, surgery, oncology, cardiology, dermatology, etc. **Data Sources:** PubMed/NCBI databases with veterinary focus **Last Updated:** June 2025 **Coverage:** All aspects of veterinary medicine and animal health """) return interface # Create and launch the interface if __name__ == "__main__": interface = create_veterinary_gradio_interface() interface.launch( server_name="0.0.0.0", server_port=7860, share=True )