# main.py (HF Space FastAPI) from contextlib import asynccontextmanager from fastapi import FastAPI from document_processor import DocumentProcessor from vector_store import vector_store from models import * import time import hashlib import os import google.generativeai as genai # Initialize processors processor = DocumentProcessor() # Initialize Gemini genai.configure(api_key=os.getenv('GEMINI_API_KEY')) @asynccontextmanager async def lifespan(app: FastAPI): # Startup events print("🚀 Initializing Document Processor...") await processor.initialize() print("📚 Initializing Vector Store...") vector_store.clause_tagger = processor.clause_tagger print("✅ Application startup complete") yield print("🛑 Shutting down application...") # Create FastAPI app app = FastAPI( title="Legal Document Analysis API", version="1.0.0", lifespan=lifespan ) @app.post("/analyze_document") async def analyze_document(data: AnalyzeDocumentInput): """Unified endpoint for complete document analysis WITH optimized vector storage""" try: start_time = time.time() if not data.document_text: return {"error": "No document text provided"} # Generate document ID doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16] # Process document completely with pre-computed embeddings result, chunk_data = await processor.process_document(data.document_text, doc_id) # Save embeddings to Pinecone using pre-computed vectors (NO RE-EMBEDDING) try: success = vector_store.save_document_embeddings_optimized( chunk_data=chunk_data, document_id=doc_id, analysis_results=result ) if success: result["vector_storage"] = "success" result["chat_ready"] = True print(f"✅ Embeddings saved for doc {doc_id}") else: result["vector_storage"] = "failed" result["chat_ready"] = False except Exception as e: print(f"⚠️ Vector storage failed: {e}") result["vector_storage"] = "failed" result["chat_ready"] = False processing_time = time.time() - start_time result["total_processing_time"] = f"{processing_time:.2f}s" return result except Exception as e: return {"error": str(e)} async def generate_response_with_context(user_question: str, relevant_context: str, document_id: str): """Send relevant chunks to Gemini for response generation""" try: prompt = f"""You are a legal document assistant. Answer the user's question based ONLY on the provided context from their legal document. Context from document {document_id}: {relevant_context} User Question: {user_question} Instructions: - Provide a clear, accurate answer based on the context above - If the answer isn't in the context, say "I cannot find information about this in the provided document" - Include specific quotes from the document when relevant - Keep your answer focused on legal implications and key details Answer:""" model = genai.GenerativeModel('gemini-1.5-flash') response = model.generate_content(prompt) return response.text except Exception as e: return f"Error generating response: {str(e)}" @app.post("/chat") async def chat_with_document(data: ChatInput): """Chat with a specific legal document using RAG""" try: if not data.message or not data.document_id: return {"error": "Message and document_id are required"} # Get retriever for specific document retriever = vector_store.get_retriever( clause_tagger=processor.clause_tagger, document_id=data.document_id ) if not retriever: return {"error": "Failed to create retriever or document not found"} # Get relevant chunks based on similarity relevant_chunks = retriever.get_relevant_documents(data.message) if not relevant_chunks: return { "response": "I couldn't find relevant information in the document to answer your question.", "sources": [], "document_id": data.document_id } # Prepare context from relevant chunks context = "\n\n".join([doc.page_content for doc in relevant_chunks]) # Generate response using Gemini llm_response = await generate_response_with_context( user_question=data.message, relevant_context=context, document_id=data.document_id ) # Prepare sources sources = [] for doc in relevant_chunks: sources.append({ "chunk_index": doc.metadata.get("chunk_index", 0), "similarity_score": doc.metadata.get("similarity_score", 0), "text_preview": doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content }) return { "response": llm_response, "sources": sources, "document_id": data.document_id, "chunks_used": len(relevant_chunks) } except Exception as e: return {"error": f"Chat failed: {str(e)}"} # Keep backward compatibility endpoints @app.post("/chunk") def chunk_text(data: ChunkInput): return processor.chunk_text(data) @app.post("/summarize_batch") def summarize_batch(data: SummarizeBatchInput): return processor.summarize_batch(data) @app.get("/health") def health_check(): return { "status": "healthy", "services": { "document_processor": "active", "vector_store": "active", "gemini_llm": "active" } } @app.get("/cache_stats") def get_cache_stats(): return processor.get_cache_stats() if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)