sagar008 commited on
Commit
64e01bf
·
verified ·
1 Parent(s): 2ecb6b7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +14 -12
main.py CHANGED
@@ -2,7 +2,7 @@
2
  from contextlib import asynccontextmanager
3
  from fastapi import FastAPI
4
  from document_processor import DocumentProcessor
5
- from vector_store import LegalDocumentVectorStore
6
  from models import *
7
  import time
8
  import hashlib
@@ -11,7 +11,6 @@ import google.generativeai as genai
11
 
12
  # Initialize processors
13
  processor = DocumentProcessor()
14
- vector_store = LegalDocumentVectorStore()
15
 
16
  # Initialize Gemini
17
  genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
@@ -36,7 +35,7 @@ app = FastAPI(
36
 
37
  @app.post("/analyze_document")
38
  async def analyze_document(data: AnalyzeDocumentInput):
39
- """Unified endpoint for complete document analysis WITH vector storage"""
40
  try:
41
  start_time = time.time()
42
 
@@ -46,16 +45,15 @@ async def analyze_document(data: AnalyzeDocumentInput):
46
  # Generate document ID
47
  doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
48
 
49
- # Process document completely
50
- result = await processor.process_document(data.document_text, doc_id)
51
 
52
- # Save embeddings to Pinecone for chat functionality
53
  try:
54
- success = vector_store.save_document_embeddings(
55
- document_text=data.document_text,
56
  document_id=doc_id,
57
- analysis_results=result,
58
- clause_tagger=processor.clause_tagger
59
  )
60
  if success:
61
  result["vector_storage"] = "success"
@@ -70,8 +68,7 @@ async def analyze_document(data: AnalyzeDocumentInput):
70
  result["chat_ready"] = False
71
 
72
  processing_time = time.time() - start_time
73
- result["processing_time"] = f"{processing_time:.2f}s"
74
- result["doc_id"] = doc_id
75
 
76
  return result
77
 
@@ -178,6 +175,11 @@ def health_check():
178
  }
179
  }
180
 
 
 
 
 
181
  if __name__ == "__main__":
182
  import uvicorn
183
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
2
  from contextlib import asynccontextmanager
3
  from fastapi import FastAPI
4
  from document_processor import DocumentProcessor
5
+ from vector_store import vector_store
6
  from models import *
7
  import time
8
  import hashlib
 
11
 
12
  # Initialize processors
13
  processor = DocumentProcessor()
 
14
 
15
  # Initialize Gemini
16
  genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
 
35
 
36
  @app.post("/analyze_document")
37
  async def analyze_document(data: AnalyzeDocumentInput):
38
+ """Unified endpoint for complete document analysis WITH optimized vector storage"""
39
  try:
40
  start_time = time.time()
41
 
 
45
  # Generate document ID
46
  doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
47
 
48
+ # Process document completely with pre-computed embeddings
49
+ result, chunk_data = await processor.process_document(data.document_text, doc_id)
50
 
51
+ # Save embeddings to Pinecone using pre-computed vectors (NO RE-EMBEDDING)
52
  try:
53
+ success = vector_store.save_document_embeddings_optimized(
54
+ chunk_data=chunk_data,
55
  document_id=doc_id,
56
+ analysis_results=result
 
57
  )
58
  if success:
59
  result["vector_storage"] = "success"
 
68
  result["chat_ready"] = False
69
 
70
  processing_time = time.time() - start_time
71
+ result["total_processing_time"] = f"{processing_time:.2f}s"
 
72
 
73
  return result
74
 
 
175
  }
176
  }
177
 
178
+ @app.get("/cache_stats")
179
+ def get_cache_stats():
180
+ return processor.get_cache_stats()
181
+
182
  if __name__ == "__main__":
183
  import uvicorn
184
  uvicorn.run(app, host="0.0.0.0", port=7860)
185
+