Spaces:

sagar008
/

unified-analysis-for-legal-docs

Sleeping

App Files Files Community

sagar008 commited on Aug 10

Commit

64e01bf

verified ·

1 Parent(s): 2ecb6b7

Update main.py

Browse files

Files changed (1) hide show

main.py +14 -12

main.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from document_processor import DocumentProcessor
-from vector_store import LegalDocumentVectorStore
 from models import *
 import time
 import hashlib
@@ -11,7 +11,6 @@ import google.generativeai as genai
 # Initialize processors
 processor = DocumentProcessor()
-vector_store = LegalDocumentVectorStore()
 # Initialize Gemini
 genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
@@ -36,7 +35,7 @@ app = FastAPI(
 @app.post("/analyze_document")
 async def analyze_document(data: AnalyzeDocumentInput):
-    """Unified endpoint for complete document analysis WITH vector storage"""
     try:
         start_time = time.time()
@@ -46,16 +45,15 @@ async def analyze_document(data: AnalyzeDocumentInput):
         # Generate document ID
         doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
-        # Process document completely
-        result = await processor.process_document(data.document_text, doc_id)
-        # Save embeddings to Pinecone for chat functionality
         try:
-            success = vector_store.save_document_embeddings(
-                document_text=data.document_text,
                 document_id=doc_id,
-                analysis_results=result,
-                clause_tagger=processor.clause_tagger
             )
             if success:
                 result["vector_storage"] = "success"
@@ -70,8 +68,7 @@ async def analyze_document(data: AnalyzeDocumentInput):
             result["chat_ready"] = False
         processing_time = time.time() - start_time
-        result["processing_time"] = f"{processing_time:.2f}s"
-        result["doc_id"] = doc_id
         return result
@@ -178,6 +175,11 @@ def health_check():
         }
     }
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from document_processor import DocumentProcessor
+from vector_store import vector_store
 from models import *
 import time
 import hashlib
 # Initialize processors
 processor = DocumentProcessor()
 # Initialize Gemini
 genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
 @app.post("/analyze_document")
 async def analyze_document(data: AnalyzeDocumentInput):
+    """Unified endpoint for complete document analysis WITH optimized vector storage"""
     try:
         start_time = time.time()
         # Generate document ID
         doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
+        # Process document completely with pre-computed embeddings
+        result, chunk_data = await processor.process_document(data.document_text, doc_id)
+        # Save embeddings to Pinecone using pre-computed vectors (NO RE-EMBEDDING)
         try:
+            success = vector_store.save_document_embeddings_optimized(
+                chunk_data=chunk_data,
                 document_id=doc_id,
+                analysis_results=result
             )
             if success:
                 result["vector_storage"] = "success"
             result["chat_ready"] = False
         processing_time = time.time() - start_time
+        result["total_processing_time"] = f"{processing_time:.2f}s"
         return result
         }
     }
+@app.get("/cache_stats")
+def get_cache_stats():
+    return processor.get_cache_stats()
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)