Update main.py
Browse files
main.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
from contextlib import asynccontextmanager
|
3 |
from fastapi import FastAPI
|
4 |
from document_processor import DocumentProcessor
|
5 |
-
from vector_store import
|
6 |
from models import *
|
7 |
import time
|
8 |
import hashlib
|
@@ -11,7 +11,6 @@ import google.generativeai as genai
|
|
11 |
|
12 |
# Initialize processors
|
13 |
processor = DocumentProcessor()
|
14 |
-
vector_store = LegalDocumentVectorStore()
|
15 |
|
16 |
# Initialize Gemini
|
17 |
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
@@ -36,7 +35,7 @@ app = FastAPI(
|
|
36 |
|
37 |
@app.post("/analyze_document")
|
38 |
async def analyze_document(data: AnalyzeDocumentInput):
|
39 |
-
"""Unified endpoint for complete document analysis WITH vector storage"""
|
40 |
try:
|
41 |
start_time = time.time()
|
42 |
|
@@ -46,16 +45,15 @@ async def analyze_document(data: AnalyzeDocumentInput):
|
|
46 |
# Generate document ID
|
47 |
doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
|
48 |
|
49 |
-
# Process document completely
|
50 |
-
result = await processor.process_document(data.document_text, doc_id)
|
51 |
|
52 |
-
# Save embeddings to Pinecone
|
53 |
try:
|
54 |
-
success = vector_store.
|
55 |
-
|
56 |
document_id=doc_id,
|
57 |
-
analysis_results=result
|
58 |
-
clause_tagger=processor.clause_tagger
|
59 |
)
|
60 |
if success:
|
61 |
result["vector_storage"] = "success"
|
@@ -70,8 +68,7 @@ async def analyze_document(data: AnalyzeDocumentInput):
|
|
70 |
result["chat_ready"] = False
|
71 |
|
72 |
processing_time = time.time() - start_time
|
73 |
-
result["
|
74 |
-
result["doc_id"] = doc_id
|
75 |
|
76 |
return result
|
77 |
|
@@ -178,6 +175,11 @@ def health_check():
|
|
178 |
}
|
179 |
}
|
180 |
|
|
|
|
|
|
|
|
|
181 |
if __name__ == "__main__":
|
182 |
import uvicorn
|
183 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
2 |
from contextlib import asynccontextmanager
|
3 |
from fastapi import FastAPI
|
4 |
from document_processor import DocumentProcessor
|
5 |
+
from vector_store import vector_store
|
6 |
from models import *
|
7 |
import time
|
8 |
import hashlib
|
|
|
11 |
|
12 |
# Initialize processors
|
13 |
processor = DocumentProcessor()
|
|
|
14 |
|
15 |
# Initialize Gemini
|
16 |
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
|
|
35 |
|
36 |
@app.post("/analyze_document")
|
37 |
async def analyze_document(data: AnalyzeDocumentInput):
|
38 |
+
"""Unified endpoint for complete document analysis WITH optimized vector storage"""
|
39 |
try:
|
40 |
start_time = time.time()
|
41 |
|
|
|
45 |
# Generate document ID
|
46 |
doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
|
47 |
|
48 |
+
# Process document completely with pre-computed embeddings
|
49 |
+
result, chunk_data = await processor.process_document(data.document_text, doc_id)
|
50 |
|
51 |
+
# Save embeddings to Pinecone using pre-computed vectors (NO RE-EMBEDDING)
|
52 |
try:
|
53 |
+
success = vector_store.save_document_embeddings_optimized(
|
54 |
+
chunk_data=chunk_data,
|
55 |
document_id=doc_id,
|
56 |
+
analysis_results=result
|
|
|
57 |
)
|
58 |
if success:
|
59 |
result["vector_storage"] = "success"
|
|
|
68 |
result["chat_ready"] = False
|
69 |
|
70 |
processing_time = time.time() - start_time
|
71 |
+
result["total_processing_time"] = f"{processing_time:.2f}s"
|
|
|
72 |
|
73 |
return result
|
74 |
|
|
|
175 |
}
|
176 |
}
|
177 |
|
178 |
+
@app.get("/cache_stats")
|
179 |
+
def get_cache_stats():
|
180 |
+
return processor.get_cache_stats()
|
181 |
+
|
182 |
if __name__ == "__main__":
|
183 |
import uvicorn
|
184 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
185 |
+
|