import gradio as gr
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
import re
from typing import List, Dict, Tuple
import xml.etree.ElementTree as ET
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go
from transformers import pipeline
import numpy as np

class VeterinaryLiteratureMiner:
    def __init__(self):
        # Initialize NLP pipelines
        try:
            self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
            self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        except Exception as e:
            print(f"Warning: Could not load transformers models: {e}")
            self.summarizer = None
            self.classifier = None
        
        # Veterinary research categories for classification
        self.veterinary_categories = [
            "oncology", "cardiology", "dermatology", "neurology", "orthopedics",
            "infectious diseases", "parasitology", "pharmacology", "toxicology",
            "surgery", "anesthesia", "emergency medicine", "internal medicine",
            "pathology", "radiology", "nutrition", "behavior", "reproduction",
            "public health", "zoonoses", "immunology", "genetics", "epidemiology"
        ]
        
        # Animal species categories
        self.animal_species = [
            "canine", "dog", "dogs", "feline", "cat", "cats", "equine", "horse", "horses",
            "bovine", "cattle", "cow", "cows", "porcine", "pig", "pigs", "swine",
            "ovine", "sheep", "caprine", "goat", "goats", "avian", "bird", "birds",
            "poultry", "chicken", "chickens", "rabbit", "rabbits", "ferret", "ferrets",
            "reptile", "reptiles", "fish", "aquatic", "wildlife", "zoo", "exotic",
            "laboratory animals", "mouse", "mice", "rat", "rats"
        ]
        
        # Veterinary specialties and procedures
        self.vet_procedures = [
            "vaccination", "spay", "neuter", "castration", "ovariohysterectomy",
            "amputation", "biopsy", "endoscopy", "laparoscopy", "arthroscopy",
            "radiography", "ultrasound", "CT", "MRI", "chemotherapy", "radiation",
            "physical therapy", "rehabilitation", "dental", "ophthalmology"
        ]
        
        # Common veterinary conditions
        self.vet_conditions = [
            "diabetes", "epilepsy", "heart disease", "kidney disease", "liver disease",
            "arthritis", "hip dysplasia", "allergies", "skin disease", "cancer",
            "tumor", "infection", "parasite", "heartworm", "flea", "tick",
            "obesity", "dental disease", "cataracts", "glaucoma", "IBD"
        ]

    def search_veterinary_literature(self, query: str, max_results: int = 50, database: str = "pubmed") -> List[Dict]:
        """Search veterinary literature across multiple databases"""
        
        if database == "pubmed":
            return self._search_pubmed(query, max_results)
        else:
            # Future: Could add other veterinary databases here
            return self._search_pubmed(query, max_results)

    def _search_pubmed(self, query: str, max_results: int) -> List[Dict]:
        """Search PubMed for veterinary papers"""
        try:
            print(f"Searching PubMed with query: {query}")  # Debug print
            
            # Enhance query with veterinary terms
            enhanced_query = f"({query}) AND (veterinary OR animal OR pet OR livestock OR zoo OR wildlife)"
            print(f"Enhanced query: {enhanced_query}")  # Debug print
            
            # Search PubMed
            search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
            search_params = {
                "db": "pubmed",
                "term": enhanced_query,
                "retmax": max_results,
                "retmode": "json",
                "sort": "relevance"
            }
            
            print("Making search request...")  # Debug print
            search_response = requests.get(search_url, params=search_params, timeout=30)
            print(f"Search response status: {search_response.status_code}")  # Debug print
            
            if search_response.status_code != 200:
                return [{"error": f"PubMed search failed with status {search_response.status_code}"}]
            
            search_data = search_response.json()
            print(f"Search data received: {search_data.get('esearchresult', {}).get('count', 0)} results")  # Debug print
            
            if "esearchresult" not in search_data:
                return [{"error": "Invalid response from PubMed"}]
            
            if not search_data["esearchresult"].get("idlist"):
                return [{"error": "No papers found matching your query"}]
            
            # Get detailed information
            ids = search_data["esearchresult"]["idlist"]
            print(f"Fetching details for {len(ids)} papers...")  # Debug print
            
            fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
            fetch_params = {
                "db": "pubmed",
                "id": ",".join(ids),
                "retmode": "xml"
            }
            
            fetch_response = requests.get(fetch_url, params=fetch_params, timeout=60)
            print(f"Fetch response status: {fetch_response.status_code}")  # Debug print
            
            if fetch_response.status_code != 200:
                return [{"error": f"Failed to fetch paper details: {fetch_response.status_code}"}]
            
            # Parse XML response
            papers = self._parse_pubmed_xml(fetch_response.text)
            print(f"Parsed {len(papers)} papers successfully")  # Debug print
            
            return papers
            
        except requests.exceptions.Timeout:
            return [{"error": "Request timed out. Please try again with fewer results."}]
        except requests.exceptions.ConnectionError:
            return [{"error": "Connection error. Please check your internet connection."}]
        except Exception as e:
            print(f"Error in _search_pubmed: {str(e)}")  # Debug print
            return [{"error": f"Search failed: {str(e)}"}]

    def _parse_pubmed_xml(self, xml_content: str) -> List[Dict]:
        """Parse PubMed XML response"""
        papers = []
        try:
            root = ET.fromstring(xml_content)
            
            for article in root.findall(".//PubmedArticle"):
                paper = {}
                
                # Extract basic info
                medline = article.find(".//MedlineCitation")
                if medline is not None:
                    pmid = medline.find(".//PMID")
                    paper["pmid"] = pmid.text if pmid is not None else "N/A"
                
                # Extract title
                title = article.find(".//ArticleTitle")
                paper["title"] = title.text if title is not None else "N/A"
                
                # Extract abstract
                abstract_elem = article.find(".//Abstract/AbstractText")
                paper["abstract"] = abstract_elem.text if abstract_elem is not None else "N/A"
                
                # Extract authors
                authors = []
                for author in article.findall(".//Author"):
                    fname = author.find(".//ForeName")
                    lname = author.find(".//LastName")
                    if fname is not None and lname is not None:
                        authors.append(f"{fname.text} {lname.text}")
                paper["authors"] = ", ".join(authors[:3]) + ("..." if len(authors) > 3 else "")
                
                # Extract journal and date
                journal = article.find(".//Journal/Title")
                paper["journal"] = journal.text if journal is not None else "N/A"
                
                pub_date = article.find(".//PubDate/Year")
                paper["year"] = pub_date.text if pub_date is not None else "N/A"
                
                papers.append(paper)
                
        except Exception as e:
            return [{"error": f"XML parsing failed: {str(e)}"}]
        
        return papers

    def analyze_veterinary_papers(self, papers: List[Dict]) -> Dict:
        """Analyze the retrieved veterinary papers for insights"""
        if not papers or papers[0].get("error"):
            return {"error": "No papers to analyze"}
        
        analysis = {
            "total_papers": len(papers),
            "year_distribution": {},
            "animal_species": {},
            "veterinary_specialties": {},
            "common_conditions": {},
            "procedures_mentioned": {},
            "drug_mentions": [],
            "journal_distribution": {},
            "research_trends": {}
        }
        
        # Keywords for different categories
        specialty_keywords = {
            "oncology": ["cancer", "tumor", "oncology", "chemotherapy", "radiation"],
            "cardiology": ["heart", "cardiac", "cardiology", "arrhythmia", "murmur"],
            "dermatology": ["skin", "dermatology", "allergy", "dermatitis", "eczema"],
            "neurology": ["brain", "neurological", "seizure", "epilepsy", "paralysis"],
            "orthopedics": ["bone", "joint", "fracture", "orthopedic", "lameness"],
            "infectious_diseases": ["infection", "bacteria", "virus", "antibiotic", "pathogen"],
            "surgery": ["surgical", "surgery", "operative", "laparoscopy", "endoscopy"],
            "internal_medicine": ["diabetes", "kidney", "liver", "endocrine", "metabolic"]
        }
        
        # Analyze each paper
        for paper in papers:
            # Year distribution
            year = paper.get("year", "Unknown")
            analysis["year_distribution"][year] = analysis["year_distribution"].get(year, 0) + 1
            
            # Journal distribution
            journal = paper.get("journal", "Unknown")
            analysis["journal_distribution"][journal] = analysis["journal_distribution"].get(journal, 0) + 1
            
            # Analyze abstract and title
            abstract = paper.get("abstract", "").lower()
            title = paper.get("title", "").lower()
            full_text = f"{title} {abstract}"
            
            # Animal species detection
            for species in self.animal_species:
                if species in full_text:
                    species_key = species.replace(" ", "_")
                    analysis["animal_species"][species_key] = analysis["animal_species"].get(species_key, 0) + 1
            
            # Veterinary specialty detection
            for specialty, keywords in specialty_keywords.items():
                for keyword in keywords:
                    if keyword in full_text:
                        analysis["veterinary_specialties"][specialty] = analysis["veterinary_specialties"].get(specialty, 0) + 1
                        break
            
            # Common conditions detection
            for condition in self.vet_conditions:
                if condition in full_text:
                    analysis["common_conditions"][condition] = analysis["common_conditions"].get(condition, 0) + 1
            
            # Procedures detection
            for procedure in self.vet_procedures:
                if procedure in full_text:
                    analysis["procedures_mentioned"][procedure] = analysis["procedures_mentioned"].get(procedure, 0) + 1
            
            # Extract drug mentions (veterinary drugs and general pharmaceuticals)
            drugs = re.findall(r'\b[A-Z][a-z]*(?:mab|nib|ine|ole|cin|tin|zole|pril|sartan)\b', paper.get("abstract", ""))
            # Add common veterinary drugs
            vet_drugs = ["prednisolone", "dexamethasone", "amoxicillin", "cephalexin", "enrofloxacin", 
                        "tramadol", "gabapentin", "furosemide", "enalapril", "pimobendan"]
            for drug in vet_drugs:
                if drug in full_text:
                    drugs.append(drug.title())
            
            analysis["drug_mentions"].extend(drugs)
            
            # Classify research category if classifier is available
            if self.classifier and abstract != "n/a":
                try:
                    result = self.classifier(abstract[:512], self.veterinary_categories)
                    top_category = result["labels"][0]
                    analysis["research_trends"][top_category] = analysis["research_trends"].get(top_category, 0) + 1
                except Exception:
                    pass
        
        # Process drug mentions
        drug_counter = Counter(analysis["drug_mentions"])
        analysis["drug_mentions"] = dict(drug_counter.most_common(15))
        
        return analysis

    def generate_veterinary_summary(self, papers: List[Dict], analysis: Dict) -> str:
        """Generate a comprehensive summary of veterinary literature findings"""
        if not papers or papers[0].get("error"):
            return "No papers found or error in retrieval."
        
        summary = f"""
# Veterinary Literature Mining Summary

## Overview
- **Total Papers Found**: {analysis['total_papers']}
- **Search Date**: {datetime.now().strftime('%Y-%m-%d')}

## Key Insights

### Most Studied Animal Species
"""
        
        # Top animal species
        if analysis["animal_species"]:
            top_species = sorted(analysis["animal_species"].items(), key=lambda x: x[1], reverse=True)[:8]
            for species, count in top_species:
                formatted_species = species.replace("_", " ").title()
                summary += f"- **{formatted_species}**: {count} papers\n"
        
        summary += "\n### Veterinary Specialties Focus\n"
        
        # Veterinary specialties
        if analysis["veterinary_specialties"]:
            top_specialties = sorted(analysis["veterinary_specialties"].items(), key=lambda x: x[1], reverse=True)[:6]
            for specialty, count in top_specialties:
                formatted_specialty = specialty.replace("_", " ").title()
                summary += f"- **{formatted_specialty}**: {count} papers\n"
        
        summary += "\n### Common Conditions Studied\n"
        
        # Common conditions
        if analysis["common_conditions"]:
            top_conditions = sorted(analysis["common_conditions"].items(), key=lambda x: x[1], reverse=True)[:8]
            for condition, count in top_conditions:
                summary += f"- **{condition.title()}**: {count} papers\n"
        
        summary += "\n### Frequently Mentioned Treatments/Drugs\n"
        
        # Drug mentions
        if analysis["drug_mentions"]:
            for drug, count in list(analysis["drug_mentions"].items())[:8]:
                summary += f"- **{drug}**: {count} mentions\n"
        
        summary += "\n### Top Veterinary Journals\n"
        
        # Journal distribution
        if analysis["journal_distribution"]:
            top_journals = sorted(analysis["journal_distribution"].items(), key=lambda x: x[1], reverse=True)[:5]
            for journal, count in top_journals:
                summary += f"- **{journal}**: {count} papers\n"
        
        summary += "\n### Recent Research Highlights\n"
        
        # Recent papers (last 3 years)
        current_year = datetime.now().year
        recent_papers = [p for p in papers if p.get("year", "").isdigit() and int(p["year"]) >= current_year - 3]
        
        for paper in recent_papers[:4]:
            summary += f"- **{paper.get('title', 'N/A')}** ({paper.get('year', 'N/A')})\n"
            summary += f"  *{paper.get('journal', 'N/A')}*\n\n"
        
        return summary

    def create_veterinary_visualizations(self, analysis: Dict):
        """Create visualization plots for veterinary data"""
        plots = {}
        
        # Year distribution
        if analysis["year_distribution"]:
            years = [y for y in analysis["year_distribution"].keys() if y.isdigit()]
            counts = [analysis["year_distribution"][y] for y in years]
            
            fig_year = px.line(
                x=years, y=counts,
                title="Veterinary Research Publications Over Time",
                labels={"x": "Year", "y": "Number of Papers"},
                markers=True
            )
            fig_year.update_layout(showlegend=False)
            plots["year_dist"] = fig_year
        
        # Animal species
        if analysis["animal_species"]:
            species = list(analysis["animal_species"].keys())[:12]
            species_counts = [analysis["animal_species"][s] for s in species]
            formatted_species = [s.replace("_", " ").title() for s in species]
            
            fig_species = px.bar(
                x=species_counts, y=formatted_species,
                orientation='h',
                title="Most Studied Animal Species",
                labels={"x": "Number of Papers", "y": "Species"}
            )
            plots["animal_species"] = fig_species
        
        # Veterinary specialties
        if analysis["veterinary_specialties"]:
            specialties = list(analysis["veterinary_specialties"].keys())
            spec_counts = list(analysis["veterinary_specialties"].values())
            formatted_specialties = [s.replace("_", " ").title() for s in specialties]
            
            fig_specialties = px.pie(
                values=spec_counts, names=formatted_specialties,
                title="Veterinary Specialty Distribution"
            )
            plots["specialties"] = fig_specialties
        
        # Common conditions
        if analysis["common_conditions"]:
            conditions = list(analysis["common_conditions"].keys())[:10]
            condition_counts = [analysis["common_conditions"][c] for c in conditions]
            
            fig_conditions = px.bar(
                x=[c.title() for c in conditions], y=condition_counts,
                title="Most Commonly Studied Conditions",
                labels={"x": "Condition", "y": "Number of Papers"}
            )
            fig_conditions.update_xaxes(tickangle=45)
            plots["conditions"] = fig_conditions
        
        return plots

def create_veterinary_gradio_interface():
    """Create the Gradio interface for veterinary literature mining"""
    miner = VeterinaryLiteratureMiner()
    
    def search_and_analyze_vet(query, max_results, database):
        """Main function to search and analyze veterinary literature"""
        try:
            print(f"Starting search with query: {query}")  # Debug print
            
            if not query.strip():
                return "Please enter a search query.", None, None, None, None, None
            
            # Search papers
            print("Searching papers...")  # Debug print
            papers = miner.search_veterinary_literature(query, max_results, database)
            print(f"Found {len(papers) if papers else 0} papers")  # Debug print
            
            if not papers:
                return "No papers found. Try a different search query.", None, None, None, None, None
            
            if papers[0].get("error"):
                error_msg = papers[0].get("error", "Unknown error occurred")
                return f"Search Error: {error_msg}", None, None, None, None, None
            
            # Analyze papers
            print("Analyzing papers...")  # Debug print
            analysis = miner.analyze_veterinary_papers(papers)
            
            if analysis.get("error"):
                return f"Analysis Error: {analysis['error']}", None, None, None, None, None
            
            # Generate summary
            print("Generating summary...")  # Debug print
            summary = miner.generate_veterinary_summary(papers, analysis)
            
            # Create visualizations
            print("Creating visualizations...")  # Debug print
            plots = miner.create_veterinary_visualizations(analysis)
            
            # Create papers dataframe
            print("Creating dataframe...")  # Debug print
            papers_df = pd.DataFrame([
                {
                    "PMID": p.get("pmid", "N/A"),
                    "Title": p.get("title", "N/A")[:100] + "..." if len(p.get("title", "")) > 100 else p.get("title", "N/A"),
                    "Authors": p.get("authors", "N/A"),
                    "Journal": p.get("journal", "N/A"),
                    "Year": p.get("year", "N/A")
                }
                for p in papers
            ])
            
            print("Search and analysis complete!")  # Debug print
            return (
                summary,
                papers_df,
                plots.get("year_dist"),
                plots.get("animal_species"),
                plots.get("specialties"),
                plots.get("conditions")
            )
            
        except Exception as e:
            error_message = f"Unexpected error: {str(e)}"
            print(f"Error in search_and_analyze_vet: {error_message}")  # Debug print
            return error_message, None, None, None, None, None
    
    # Create interface
    with gr.Blocks(title="Veterinary Literature Mining Agent", theme=gr.themes.Soft()) as interface:
        gr.Markdown("""
        # 🐾 Veterinary Literature Mining Agent
        
        This AI agent searches and analyzes veterinary and animal health literature across all specialties.
        It automatically extracts insights about animal species, veterinary specialties, common conditions, and treatment trends.
        
        **Features:**
        - Comprehensive veterinary literature search
        - Multi-species analysis (companion animals, livestock, wildlife, exotics)
        - Veterinary specialty categorization
        - Treatment and drug trend analysis
        - Interactive visualizations
        - Journal and publication pattern analysis
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                query_input = gr.Textbox(
                    label="Research Query",
                    placeholder="e.g., 'canine diabetes management', 'equine lameness diagnosis', 'feline kidney disease', 'wildlife conservation medicine'",
                    lines=2
                )
                with gr.Row():
                    max_results = gr.Slider(
                        minimum=10, maximum=100, value=50, step=10,
                        label="Maximum Results"
                    )
                    database_choice = gr.Dropdown(
                        choices=["pubmed"],
                        value="pubmed",
                        label="Database"
                    )
                search_btn = gr.Button("🔍 Search Veterinary Literature", variant="primary", size="lg")
            
            with gr.Column(scale=1):
                gr.Markdown("""
                ### Search Tips:
                - **Species**: dog, cat, horse, cattle, pig, bird, fish, reptile, wildlife
                - **Specialties**: cardiology, oncology, surgery, dermatology, neurology
                - **Conditions**: diabetes, arthritis, cancer, infection, allergies
                - **Procedures**: vaccination, surgery, imaging, therapy
                - **Combine terms**: "feline diabetes insulin therapy"
                """)
        
        with gr.Tabs():
            with gr.TabItem("📊 Analysis Summary"):
                summary_output = gr.Markdown(label="Veterinary Literature Analysis")
            
            with gr.TabItem("📋 Research Papers"):
                papers_output = gr.Dataframe(
                    headers=["PMID", "Title", "Authors", "Journal", "Year"],
                    label="Retrieved Veterinary Papers"
                )
            
            with gr.TabItem("📈 Research Trends"):
                with gr.Row():
                    year_plot = gr.Plot(label="Publication Timeline")
                    species_plot = gr.Plot(label="Animal Species")
                with gr.Row():
                    specialties_plot = gr.Plot(label="Veterinary Specialties")
                    conditions_plot = gr.Plot(label="Common Conditions")
        
        # Connect the search function with progress indicator
        search_btn.click(
            fn=search_and_analyze_vet,
            inputs=[query_input, max_results, database_choice],
            outputs=[summary_output, papers_output, year_plot, species_plot, specialties_plot, conditions_plot],
            show_progress=True
        )
        
        # Add examples
        gr.Examples(
            examples=[
                ["canine diabetes insulin therapy", 40, "pubmed"],
                ["equine lameness diagnosis imaging", 35, "pubmed"],
                ["feline chronic kidney disease treatment", 45, "pubmed"],
                ["bovine mastitis antibiotic resistance", 30, "pubmed"],
                ["avian influenza surveillance wild birds", 35, "pubmed"],
                ["exotic animal anesthesia protocols", 25, "pubmed"],
                ["wildlife conservation medicine", 40, "pubmed"],
                ["small animal oncology chemotherapy", 50, "pubmed"]
            ],
            inputs=[query_input, max_results, database_choice]
        )
        
        gr.Markdown("""
        ### About This Veterinary Literature Mining Agent
        
        This comprehensive tool is designed for veterinary professionals, researchers, and students to efficiently 
        search and analyze veterinary literature across all animal species and medical specialties.
        
        **Supported Areas:**
        - **Companion Animals**: Dogs, cats, rabbits, ferrets, birds, reptiles, fish
        - **Large Animals**: Horses, cattle, pigs, sheep, goats
        - **Wildlife & Zoo Medicine**: All wild species and conservation medicine
        - **Laboratory Animals**: Research and laboratory animal medicine
        - **All Veterinary Specialties**: Internal medicine, surgery, oncology, cardiology, dermatology, etc.
        
        **Data Sources:** PubMed/NCBI databases with veterinary focus  
        **Last Updated:** June 2025  
        **Coverage:** All aspects of veterinary medicine and animal health
        """)
    
    return interface

# Create and launch the interface
if __name__ == "__main__":
    interface = create_veterinary_gradio_interface()
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )