import os
import json
import tempfile
from datetime import datetime
from flask import Flask, render_template, request, jsonify, session, redirect, url_for
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
# Removed ChromaDB and added Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, Filter, FieldCondition, MatchValue, PointStruct, SearchParams
# LangChain splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
import arxiv
import PyPDF2
from docx import Document
import requests
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
import uuid
import re
from bs4 import BeautifulSoup
import logging
import numpy as np

# Load environment variables
load_dotenv()

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
app.secret_key = os.getenv('SECRET_KEY', 'research-radar-secret-key-2024')

# Configuration
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx'}
MAX_CONTENT_LENGTH = 16 * 1024 * 1024  # 16MB max file size

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH

# Ensure directories exist
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Initialize models and services
try:
    # Configure Gemini API
    gemini_api_key = os.getenv('GEMINI_API_KEY')
    if gemini_api_key:
        genai.configure(api_key=gemini_api_key)
        gemini_model = genai.GenerativeModel('gemini-2.5-flash')
        logger.info("✅ Gemini API initialized successfully")
    else:
        gemini_model = None
        logger.warning("⚠️ Gemini API key not found. AI features will be limited.")
    
    # Initialize sentence transformer for embeddings (local model)
    from config import Config
    local_model_path = Config.LOCAL_MODEL_PATH
    
    if os.path.exists(local_model_path):
        embedding_model = SentenceTransformer(local_model_path)
        logger.info(f"✅ Local sentence transformer model loaded from: {local_model_path}")
    else:
        # Fallback to downloading if local model not found
        embedding_model = SentenceTransformer(Config.EMBEDDING_MODEL)
        logger.warning(f"⚠️ Local model not found at {local_model_path}, downloading {Config.EMBEDDING_MODEL} from HuggingFace")

    # Determine vector size dynamically
    try:
        _probe_vec = embedding_model.encode(["probe text"])
        VECTOR_SIZE = int(_probe_vec.shape[-1]) if hasattr(_probe_vec, 'shape') else len(_probe_vec[0])
    except Exception:
        VECTOR_SIZE = 384  # fallback for all-MiniLM-L6-v2

    # Initialize Qdrant client
    qdrant_url = os.getenv('QDRANT_URL')
    qdrant_api_key = os.getenv('QDRANT_API_KEY')
    qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key, timeout=120)
    
    logger.info("✅ Qdrant client initialized")

    # Ensure default collection exists
    def ensure_qdrant_collection(collection_name: str, vector_size: int) -> None:
        try:
            qdrant_client.get_collection(collection_name)
        except Exception:
            qdrant_client.recreate_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
            )
            logger.info(f"✅ Created Qdrant collection: {collection_name}")
        # Ensure payload index for document_id exists
        try:
            qdrant_client.create_payload_index(
                collection_name=collection_name,
                field_name="document_id",
                field_schema="keyword"
            )
            logger.info("✅ Ensured payload index for 'document_id'")
        except Exception:
            # Likely already exists
            pass

    ensure_qdrant_collection('research_papers', VECTOR_SIZE)

except Exception as e:
    logger.error(f"❌ Initialization error: {e}")
    embedding_model = None
    gemini_model = None
    qdrant_client = None
    VECTOR_SIZE = None


def allowed_file(filename):
    """Check if file extension is allowed"""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


def extract_text_from_pdf(file_path):
    """Extract text from PDF file"""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
            return text
    except Exception as e:
        print(f"PDF extraction error: {e}")
        return ""


def extract_text_from_docx(file_path):
    """Extract text from DOCX file"""
    try:
        doc = Document(file_path)
        text = ""
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
        return text
    except Exception as e:
        print(f"DOCX extraction error: {e}")
        return ""


def extract_text_from_txt(file_path):
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"TXT extraction error: {e}")
        return ""


def process_document(file_path, filename):
    """Process uploaded document and extract text"""
    file_extension = filename.rsplit('.', 1)[1].lower()
    
    if file_extension == 'pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == 'docx':
        return extract_text_from_docx(file_path)
    elif file_extension == 'txt':
        return extract_text_from_txt(file_path)
    else:
        return ""


def search_arxiv_papers(query, max_results=10):
    """Search arXiv papers"""
    try:
        client = arxiv.Client()
        search = arxiv.Search(
            query=query,
            max_results=max_results,
            sort_by=arxiv.SortCriterion.Relevance
        )
        
        papers = []
        for result in client.results(search):
            paper = {
                'title': result.title,
                'authors': [author.name for author in result.authors],
                'summary': result.summary,
                'url': result.entry_id,
                'pdf_url': result.pdf_url,
                'published': result.published.strftime('%Y-%m-%d'),
                'category': result.primary_category
            }
            papers.append(paper)
        
        return papers
    except Exception as e:
        print(f"arXiv search error: {e}")
        return []


def generate_summary(text, max_length=500):
    """Generate summary using Gemini API"""
    try:
        if not gemini_model:
            return "Summary generation unavailable - API not configured"
        
        prompt = f"""
        Please provide a comprehensive summary of this research paper/document in approximately {max_length} words. 
        Focus on:
        1. Main research question/objective
        2. Key methodology
        3. Important findings
        4. Conclusions and implications
        
        Text to summarize:
        {text[:80000]}
        """
        
        response = gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        logger.error(f"Summary generation error: {e}")
        return "Error generating summary. Please try again."

# Text chunking using LangChain

def chunk_text(text: str, chunk_size: int = 1000, chunk_overlap: int = 200):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", " ", ""]
    )
    return splitter.split_text(text)

# Qdrant helpers

def ensure_qdrant_collection(collection_name: str, vector_size: int) -> None:
    """Create Qdrant collection if it doesn't exist"""
    if not qdrant_client:
        return
    try:
        qdrant_client.get_collection(collection_name)
    except Exception:
        qdrant_client.recreate_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
        )
    # Ensure payload index for document_id exists for efficient filtering/scrolling
    try:
        qdrant_client.create_payload_index(
            collection_name=collection_name,
            field_name="document_id",
            field_schema="keyword"
        )
    except Exception:
        pass


def add_document_to_vector_db(text, metadata, doc_id, collection_name="research_papers"):
    """Add chunked document vectors to Qdrant for chat functionality"""
    try:
        if not embedding_model or not qdrant_client or not VECTOR_SIZE:
            return False
        
        ensure_qdrant_collection(collection_name, VECTOR_SIZE)
        
        # Split text using recursive text splitter
        chunks = chunk_text(text, chunk_size=1200, chunk_overlap=250)
        if not chunks:
            return False
        
        embeddings = embedding_model.encode(chunks)
        vectors = embeddings.tolist() if hasattr(embeddings, 'tolist') else embeddings
        
        points = []
        for i, (chunk, vector) in enumerate(zip(chunks, vectors)):
            payload = dict(metadata or {})
            payload.update({
                'document_id': doc_id,
                'chunk_index': i,
                'total_chunks': len(chunks),
                'content': chunk,
            })
            points.append(
                PointStruct(
                    id=str(uuid.uuid4()),
                    vector=vector,
                    payload=payload
                )
            )
        
        qdrant_client.upsert(collection_name=collection_name, points=points, wait=True)
        return True
    except Exception as e:
        print(f"Vector DB error: {e}")
        return False


def query_vector_db(query, doc_id, collection_name="research_papers", n_results=3):
    """Query Qdrant for similar chunks for the given document_id"""
    try:
        if not embedding_model or not qdrant_client or not VECTOR_SIZE:
            return []
        
        ensure_qdrant_collection(collection_name, VECTOR_SIZE)
        
        query_embedding = embedding_model.encode([query])
        query_vector = query_embedding[0].tolist() if hasattr(query_embedding, 'tolist') else list(query_embedding[0])
        
        flt = Filter(must=[FieldCondition(key="document_id", match=MatchValue(value=doc_id))])
        results = qdrant_client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=n_results,
            query_filter=flt,
            with_payload=True,
            with_vectors=False
        )
        
        documents = []
        for r in results or []:
            payload = getattr(r, 'payload', None) or {}
            documents.append(payload.get('content', ''))
        return {'documents': [documents]}
    except Exception as e:
        print(f"Vector DB query error: {e}")
        return []


def get_all_chunks_for_document(doc_id: str, collection_name: str = "research_papers"):
    """Retrieve all chunks for a document from Qdrant, ordered by chunk_index"""
    try:
        all_points = []
        next_offset = None
        flt = Filter(must=[FieldCondition(key="document_id", match=MatchValue(value=doc_id))])
        while True:
            points, next_offset = qdrant_client.scroll(
                collection_name=collection_name,
                scroll_filter=flt,
                limit=500,
                offset=next_offset,
                with_payload=True,
                with_vectors=False
            )
            all_points.extend(points)
            if not next_offset:
                break
        # Order by chunk_index
        all_points.sort(key=lambda p: p.payload.get('chunk_index', 0))
        return [p.payload.get('content', '') for p in all_points]
    except Exception as e:
        print(f"Qdrant scroll error: {e}")
        return []


def get_all_documents(collection_name: str = "research_papers"):
    """Get all unique documents from Qdrant with their metadata"""
    try:
        if not qdrant_client:
            return []
        
        # Get all points to extract unique documents
        all_points = []
        next_offset = None
        while True:
            points, next_offset = qdrant_client.scroll(
                collection_name=collection_name,
                limit=1000,
                offset=next_offset,
                with_payload=True,
                with_vectors=False
            )
            all_points.extend(points)
            if not next_offset:
                break
        
        # Group by document_id and extract metadata
        documents = {}
        for point in all_points:
            payload = point.payload or {}
            doc_id = payload.get('document_id')
            if not doc_id:
                continue
                
            if doc_id not in documents:
                # Create document metadata from first chunk
                doc_type = payload.get('type', 'document')
                
                # Generate proper title based on type
                title = payload.get('title', 'Untitled Document')
                if doc_type == 'arxiv_paper' and payload.get('pdf_url'):
                    # Extract arXiv ID from URL for better title
                    pdf_url = payload.get('pdf_url', '')
                    if 'arxiv.org/pdf/' in pdf_url:
                        arxiv_id = pdf_url.split('/')[-1].replace('.pdf', '')
                        title = f"arXiv:{arxiv_id}"
                    elif 'arxiv.org/abs/' in pdf_url:
                        arxiv_id = pdf_url.split('/')[-1]
                        title = f"arXiv:{arxiv_id}"
                elif doc_type == 'uploaded_document' and payload.get('filename'):
                    title = payload.get('filename')
                
                documents[doc_id] = {
                    'document_id': doc_id,
                    'title': title,
                    'authors': payload.get('authors', ['Unknown']),
                    'published': payload.get('published', 'Unknown Date'),
                    'category': payload.get('category', 'Research'),
                    'filename': payload.get('filename', ''),
                    'pdf_url': payload.get('pdf_url', ''),
                    'type': doc_type,
                    'upload_date': payload.get('upload_date', ''),
                    'total_chunks': payload.get('total_chunks', 0),
                    'word_count': payload.get('word_count', 0)
                }
        
        # Convert to list and sort by upload date (newest first)
        doc_list = list(documents.values())
        doc_list.sort(key=lambda x: x.get('upload_date', ''), reverse=True)
        
        return doc_list
    except Exception as e:
        print(f"Error getting documents: {e}")
        return []


def get_document_metadata(doc_id: str, collection_name: str = "research_papers"):
    """Get metadata for a specific document"""
    try:
        if not qdrant_client:
            return None
        
        # Get first chunk to extract metadata
        flt = Filter(must=[FieldCondition(key="document_id", match=MatchValue(value=doc_id))])
        results = qdrant_client.scroll(
            collection_name=collection_name,
            scroll_filter=flt,
            limit=1,
            with_payload=True,
            with_vectors=False
        )
        
        if results and results[0]:
            payload = results[0][0].payload or {}
            return {
                'document_id': doc_id,
                'title': payload.get('title', 'Untitled Document'),
                'authors': payload.get('authors', ['Unknown']),
                'published': payload.get('published', 'Unknown Date'),
                'category': payload.get('category', 'Research'),
                'filename': payload.get('filename', ''),
                'pdf_url': payload.get('pdf_url', ''),
                'type': payload.get('type', 'document'),
                'upload_date': payload.get('upload_date', ''),
                'total_chunks': payload.get('total_chunks', 0),
                'word_count': payload.get('word_count', 0)
            }
        return None
    except Exception as e:
        print(f"Error getting document metadata: {e}")
        return None

# Paper ingestion helpers

def resolve_pdf_url(url_or_pdf: str) -> str:
    if not url_or_pdf:
        return ''
    if 'arxiv.org/pdf/' in url_or_pdf and url_or_pdf.endswith('.pdf'):
        return url_or_pdf
    # convert arXiv abs to pdf
    m = re.search(r"arxiv\.org/(abs|pdf)/([\w\.-]+)", url_or_pdf)
    if m:
        arxiv_id = m.group(2)
        if not arxiv_id.endswith('.pdf'):
            return f"https://arxiv.org/pdf/{arxiv_id}.pdf"
        return f"https://arxiv.org/pdf/{arxiv_id}"
    return url_or_pdf


def download_pdf_to_temp(pdf_url: str) -> str:
    r = requests.get(pdf_url, stream=True, timeout=30)
    r.raise_for_status()
    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                tmp.write(chunk)
        return tmp.name


def ingest_paper(pdf_url: str, paper_meta: dict = None) -> tuple:
    """Download PDF, extract text, chunk, embed and store in Qdrant. Returns (doc_id, word_count)."""
    pdf_url = resolve_pdf_url(pdf_url)
    doc_id = str(uuid.uuid4())
    tmp_path = None
    try:
        tmp_path = download_pdf_to_temp(pdf_url)
        text_content = extract_text_from_pdf(tmp_path)
        if not text_content.strip():
            return None, 0
        metadata = {
            'source': 'arxiv',
            'pdf_url': pdf_url,
            'type': 'arxiv_paper'
        }
        if paper_meta:
            metadata.update(paper_meta)
        ok = add_document_to_vector_db(text_content, metadata, doc_id)
        if not ok:
            return None, 0
        # set active document
        session['active_document_id'] = doc_id
        return doc_id, len(text_content.split())
    finally:
        if tmp_path and os.path.exists(tmp_path):
            try:
                os.remove(tmp_path)
            except Exception:
                pass


def generate_summary_from_qdrant(doc_id: str, max_chars: int = 80000) -> str:
    chunks = get_all_chunks_for_document(doc_id)
    if not chunks:
        return "No content available to summarize."
    # Concatenate up to max_chars
    full_text = ''
    for chunk in chunks:
        if len(full_text) + len(chunk) > max_chars:
            break
        full_text += (chunk + '\n')
    return generate_summary(full_text)


def generate_chat_response(question, context_docs):
    """Generate chat response using Gemini with context"""
    try:
        if not gemini_model:
            return "Chat functionality unavailable - API not configured"
        
        context = "\n\n".join(context_docs) if context_docs else ""
        
        prompt = f"""
        You are a research assistant helping users understand academic papers. 
        Answer the following question based on the provided context from research papers.
        If the context doesn't contain relevant information, say so politely and suggest what information would be needed.
        
        Context from research papers:
        {context}
        
        Question: {question}
        
        Please provide a clear, accurate, and helpful response.
        """
        
        response = gemini_model.generate_content(prompt)
        return response.text
    except Exception as e:
        logger.error(f"Chat response error: {e}")
        return "Error generating response. Please try again."

# Routes
@app.route('/')
def index():
    """Main page"""
    return render_template('index.html')

@app.route('/search', methods=['POST'])
def search_papers():
    """Search arXiv papers"""
    try:
        data = request.get_json()
        query = data.get('query', '').strip()
        
        if not query:
            return jsonify({'error': 'Query is required'}), 400
        
        papers = search_arxiv_papers(query, max_results=10)
        return jsonify({'papers': papers})
    
    except Exception as e:
        return jsonify({'error': f'Search failed: {str(e)}'}), 500

@app.route('/ingest-paper', methods=['POST'])
def ingest_paper_endpoint():
    """Ingest a paper PDF by URL: download, chunk, embed, store in Qdrant."""
    try:
        data = request.get_json()
        pdf_url = data.get('pdf_url') or data.get('url')
        title = data.get('title')
        authors = data.get('authors')
        published = data.get('published')
        if not pdf_url:
            return jsonify({'error': 'pdf_url is required'}), 400
        doc_id, word_count = ingest_paper(pdf_url, paper_meta={'title': title, 'authors': authors, 'published': published})
        if not doc_id:
            return jsonify({'error': 'Failed to ingest paper'}), 500
        return jsonify({'success': True, 'doc_id': doc_id, 'word_count': word_count})
    except Exception as e:
        logger.error(f"Ingestion failed: {e}", exc_info=True)
        return jsonify({'error': f'Ingestion failed: {str(e)}'}), 500

@app.route('/upload', methods=['POST'])
def upload_file():
    """Handle file upload"""
    try:
        if 'file' not in request.files:
            return jsonify({'error': 'No file selected'}), 400
        
        file = request.files['file']
        if file.filename == '':
            return jsonify({'error': 'No file selected'}), 400
        
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            
            # Generate a unique ID for this document session
            doc_id = str(uuid.uuid4())
            
            # Use a temporary file to avoid cluttering the upload folder
            with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
                file.save(tmp_file.name)
                tmp_file_path = tmp_file.name

            # Extract text from document
            text_content = process_document(tmp_file_path, filename)
            
            # Clean up temporary file immediately
            os.remove(tmp_file_path)

            if not text_content.strip():
                return jsonify({'error': 'Could not extract text from file'}), 400
            
            # Generate summary
            summary = generate_summary(text_content)
            
            # Add to vector database for chat
            metadata = {
                'filename': file.filename,
                'upload_date': datetime.now().isoformat(),
                'type': 'uploaded_document'
            }
            add_document_to_vector_db(text_content, metadata, doc_id)
            
            # Store the active document ID in the session
            session['active_document_id'] = doc_id
            
            return jsonify({
                'success': True,
                'filename': file.filename,
                'summary': summary,
                'word_count': len(text_content.split()),
                'doc_id': doc_id # Send doc_id to frontend
            })
        
        return jsonify({'error': 'Invalid file type'}), 400
    
    except Exception as e:
        logger.error(f"Upload failed: {e}", exc_info=True)
        return jsonify({'error': f'Upload failed: {str(e)}'}), 500

@app.route('/summarize-paper', methods=['POST'])
def summarize_paper():
    """Summarize paper: if doc_id provided, summarize from Qdrant; else ingest then summarize."""
    try:
        data = request.get_json()
        doc_id = data.get('doc_id')
        paper_url = data.get('url', '').strip()
        pdf_url = data.get('pdf_url')
        
        if not doc_id and not (paper_url or pdf_url):
            return jsonify({'error': 'doc_id or url/pdf_url is required'}), 400
        
        # If doc_id not provided, ingest first
        paper_data = None
        if not doc_id:
            # If only abs URL provided, try resolve via arxiv client for metadata
            try:
                # Extract arXiv ID from URL
                arxiv_id = None
                if paper_url:
                    arxiv_id = paper_url.split('/')[-1].replace('.pdf', '')
                if arxiv_id:
                    client = arxiv.Client()
                    search = arxiv.Search(id_list=[arxiv_id])
                    for result in client.results(search):
                        paper_data = {
                            'title': result.title,
                            'authors': [author.name for author in result.authors],
                            'summary': result.summary,
                            'url': result.entry_id,
                            'pdf_url': result.pdf_url,
                            'published': result.published.strftime('%Y-%m-%d')
                        }
                        break
            except Exception:
                paper_data = None
            ingest_pdf = pdf_url or (paper_data['pdf_url'] if paper_data and paper_data.get('pdf_url') else resolve_pdf_url(paper_url))
            new_doc_id, _ = ingest_paper(ingest_pdf, paper_meta=paper_data or {})
            if not new_doc_id:
                return jsonify({'error': 'Failed to ingest paper'}), 500
            doc_id = new_doc_id
            session['active_document_id'] = doc_id
        
        # Summarize from Qdrant chunks
        summary = generate_summary_from_qdrant(doc_id)
        
        return jsonify({
            'success': True,
            'summary': summary,
            'doc_id': doc_id,
            'paper': paper_data
        })
    except Exception as e:
        return jsonify({'error': f'Request failed: {str(e)}'}), 500

@app.route('/chat', methods=['POST'])
def chat():
    """Handle chat queries for the active document"""
    try:
        data = request.get_json()
        # Accept both 'message' and 'question' for backward compatibility
        question = data.get('message', data.get('question', '')).strip()
        doc_id = session.get('active_document_id')
        
        if not question:
            return jsonify({'error': 'Message is required'}), 400
        
        # If no active document, provide general help
        if not doc_id:
            if not gemini_model:
                return jsonify({'error': 'AI service is not available. Please check your API configuration.'}), 500
            
            # Generate a general response without document context
            try:
                prompt = f"""
                You are a helpful AI research assistant for Research Radar. The user asked: "{question}"
                
                Since no document is currently loaded, provide a helpful response about:
                1. How to use Research Radar (search papers, upload documents, chat features)
                2. General research guidance if the question is research-related
                3. Suggest they upload a document or search for papers to get more specific help
                
                Keep your response friendly and informative.
                """
                
                response = gemini_model.generate_content(prompt)
                return jsonify({
                    'success': True,
                    'response': response.text,
                    'context_found': False,
                    'no_document': True
                })
            except Exception as e:
                return jsonify({
                    'success': True,
                    'response': "Hello! I'm your AI research assistant. To get started, please upload a document or search for papers using the navigation above. Then I can help you analyze content, answer questions, and provide insights about your research materials.",
                    'context_found': False,
                    'no_document': True
                })
        
        # Query vector database for relevant context from the active document
        search_results = query_vector_db(question, doc_id)
        
        context_docs = []
        if search_results and isinstance(search_results, dict) and 'documents' in search_results:
            context_docs = search_results['documents'][0]
        
        # Generate response
        response = generate_chat_response(question, context_docs)
        
        return jsonify({
            'success': True,
            'response': response,
            'context_found': len(context_docs) > 0
        })
    
    except Exception as e:
        return jsonify({'error': f'Chat failed: {str(e)}'}), 500

@app.route('/documents', methods=['GET'])
def get_documents():
    """Get all documents from the vector database"""
    try:
        documents = get_all_documents()
        return jsonify({'success': True, 'documents': documents})
    except Exception as e:
        return jsonify({'error': f'Failed to get documents: {str(e)}'}), 500


@app.route('/documents/<doc_id>', methods=['GET'])
def get_document(doc_id):
    """Get a specific document's metadata"""
    try:
        metadata = get_document_metadata(doc_id)
        if not metadata:
            return jsonify({'error': 'Document not found'}), 404
        return jsonify({'success': True, 'document': metadata})
    except Exception as e:
        return jsonify({'error': f'Failed to get document: {str(e)}'}), 500


@app.route('/documents/<doc_id>/summary', methods=['GET'])
def get_document_summary(doc_id):
    """Get summary for a specific document"""
    try:
        summary = generate_summary_from_qdrant(doc_id)
        metadata = get_document_metadata(doc_id)
        if not metadata:
            return jsonify({'error': 'Document not found'}), 404
        return jsonify({
            'success': True, 
            'summary': summary, 
            'document': metadata
        })
    except Exception as e:
        return jsonify({'error': f'Failed to get summary: {str(e)}'}), 500


@app.route('/documents/<doc_id>/activate', methods=['POST'])
def activate_document(doc_id):
    """Set a document as the active document for chat"""
    try:
        metadata = get_document_metadata(doc_id)
        if not metadata:
            return jsonify({'error': 'Document not found'}), 404
        
        session['active_document_id'] = doc_id
        return jsonify({
            'success': True, 
            'message': 'Document activated',
            'document': metadata
        })
    except Exception as e:
        return jsonify({'error': f'Failed to activate document: {str(e)}'}), 500


@app.route('/documents/<doc_id>', methods=['DELETE'])
def delete_document(doc_id):
    """Delete a document from Qdrant"""
    try:
        if not qdrant_client:
            return jsonify({'error': 'Vector database not available'}), 500
        
        # Delete all points for this document
        flt = Filter(must=[FieldCondition(key="document_id", match=MatchValue(value=doc_id))])
        qdrant_client.delete(
            collection_name="research_papers",
            points_selector=flt
        )
        
        return jsonify({
            'success': True, 
            'message': 'Document deleted successfully'
        })
    except Exception as e:
        return jsonify({'error': f'Failed to delete document: {str(e)}'}), 500


@app.route('/documents', methods=['DELETE'])
def clear_all_documents():
    """Clear all documents from Qdrant"""
    try:
        if not qdrant_client:
            return jsonify({'error': 'Vector database not available'}), 500
        
        # Delete all points
        qdrant_client.delete(
            collection_name="research_papers",
            points_selector=None
        )
        
        return jsonify({
            'success': True, 
            'message': 'All documents cleared successfully'
        })
    except Exception as e:
        return jsonify({'error': f'Failed to clear documents: {str(e)}'}), 500


@app.route('/clear-session', methods=['POST'])
def clear_session():
    """Clear the active document from the session"""
    session.pop('active_document_id', None)
    return jsonify({'success': True, 'message': 'Session cleared.'})

@app.route('/health')
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'gemini_available': gemini_model is not None,
        'embeddings_available': embedding_model is not None,
        'vector_db_available': qdrant_client is not None
    })

if __name__ == '__main__':
    print("🚀 Research Radar - Starting Flask Application...")
    print("📚 Features: arXiv search, document upload, AI summaries, chat functionality")
    print("🔑 Make sure to set GEMINI_API_KEY in your .env file")
    print("🗄  Using Qdrant as Vector DB. Ensure Qdrant is reachable via QDRANT_URL")
    
    # Get port from environment variable (for Hugging Face Spaces)
    port = int(os.environ.get('PORT', 5000))
    debug = os.environ.get('FLASK_ENV') == 'development'
    
    print(f"🌐 Access the app at: http://localhost:{port}")
    
    app.run(debug=debug, host='0.0.0.0', port=port)