File size: 6,190 Bytes
1491ee4
3f1356f
f1b862b
 
64e01bf
f1b862b
3f1356f
 
1491ee4
 
3f1356f
1491ee4
caca0ca
1491ee4
 
 
f1b862b
caca0ca
 
 
 
 
1491ee4
 
caca0ca
 
 
3f1356f
1491ee4
caca0ca
 
 
1491ee4
caca0ca
f1b862b
 
 
64e01bf
f1b862b
 
 
 
 
 
1491ee4
f1b862b
 
64e01bf
 
f1b862b
64e01bf
1491ee4
64e01bf
 
1491ee4
64e01bf
1491ee4
 
 
 
 
 
 
 
 
 
 
 
 
f1b862b
64e01bf
f1b862b
 
 
 
 
 
1491ee4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1b862b
 
 
 
 
 
 
 
 
1491ee4
 
 
 
 
 
 
 
 
 
 
64e01bf
 
 
 
f1b862b
caca0ca
f1b862b
64e01bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# main.py (HF Space FastAPI)
from contextlib import asynccontextmanager
from fastapi import FastAPI
from document_processor import DocumentProcessor
from vector_store import vector_store
from models import *
import time
import hashlib
import os
import google.generativeai as genai

# Initialize processors
processor = DocumentProcessor()

# Initialize Gemini
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup events
    print("πŸš€ Initializing Document Processor...")
    await processor.initialize()
    print("πŸ“š Initializing Vector Store...")
    vector_store.clause_tagger = processor.clause_tagger
    print("βœ… Application startup complete")
    yield
    print("πŸ›‘ Shutting down application...")

# Create FastAPI app
app = FastAPI(
    title="Legal Document Analysis API", 
    version="1.0.0", 
    lifespan=lifespan
)

@app.post("/analyze_document")
async def analyze_document(data: AnalyzeDocumentInput):
    """Unified endpoint for complete document analysis WITH optimized vector storage"""
    try:
        start_time = time.time()
        
        if not data.document_text:
            return {"error": "No document text provided"}
        
        # Generate document ID
        doc_id = hashlib.sha256(data.document_text.encode()).hexdigest()[:16]
        
        # Process document completely with pre-computed embeddings
        result, chunk_data = await processor.process_document(data.document_text, doc_id)
        
        # Save embeddings to Pinecone using pre-computed vectors (NO RE-EMBEDDING)
        try:
            success = vector_store.save_document_embeddings_optimized(
                chunk_data=chunk_data,
                document_id=doc_id,
                analysis_results=result
            )
            if success:
                result["vector_storage"] = "success"
                result["chat_ready"] = True
                print(f"βœ… Embeddings saved for doc {doc_id}")
            else:
                result["vector_storage"] = "failed"
                result["chat_ready"] = False
        except Exception as e:
            print(f"⚠️ Vector storage failed: {e}")
            result["vector_storage"] = "failed"
            result["chat_ready"] = False
        
        processing_time = time.time() - start_time
        result["total_processing_time"] = f"{processing_time:.2f}s"
        
        return result
        
    except Exception as e:
        return {"error": str(e)}

async def generate_response_with_context(user_question: str, relevant_context: str, document_id: str):
    """Send relevant chunks to Gemini for response generation"""
    try:
        prompt = f"""You are a legal document assistant. Answer the user's question based ONLY on the provided context from their legal document.

Context from document {document_id}:
{relevant_context}

User Question: {user_question}

Instructions:
- Provide a clear, accurate answer based on the context above
- If the answer isn't in the context, say "I cannot find information about this in the provided document"
- Include specific quotes from the document when relevant
- Keep your answer focused on legal implications and key details

Answer:"""
        
        model = genai.GenerativeModel('gemini-1.5-flash')
        response = model.generate_content(prompt)
        return response.text
        
    except Exception as e:
        return f"Error generating response: {str(e)}"

@app.post("/chat")
async def chat_with_document(data: ChatInput):
    """Chat with a specific legal document using RAG"""
    try:
        if not data.message or not data.document_id:
            return {"error": "Message and document_id are required"}
        
        # Get retriever for specific document
        retriever = vector_store.get_retriever(
            clause_tagger=processor.clause_tagger,
            document_id=data.document_id
        )
        
        if not retriever:
            return {"error": "Failed to create retriever or document not found"}
        
        # Get relevant chunks based on similarity
        relevant_chunks = retriever.get_relevant_documents(data.message)
        
        if not relevant_chunks:
            return {
                "response": "I couldn't find relevant information in the document to answer your question.",
                "sources": [],
                "document_id": data.document_id
            }
        
        # Prepare context from relevant chunks
        context = "\n\n".join([doc.page_content for doc in relevant_chunks])
        
        # Generate response using Gemini
        llm_response = await generate_response_with_context(
            user_question=data.message,
            relevant_context=context,
            document_id=data.document_id
        )
        
        # Prepare sources
        sources = []
        for doc in relevant_chunks:
            sources.append({
                "chunk_index": doc.metadata.get("chunk_index", 0),
                "similarity_score": doc.metadata.get("similarity_score", 0),
                "text_preview": doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
            })
        
        return {
            "response": llm_response,
            "sources": sources,
            "document_id": data.document_id,
            "chunks_used": len(relevant_chunks)
        }
        
    except Exception as e:
        return {"error": f"Chat failed: {str(e)}"}

# Keep backward compatibility endpoints
@app.post("/chunk")
def chunk_text(data: ChunkInput):
    return processor.chunk_text(data)

@app.post("/summarize_batch")
def summarize_batch(data: SummarizeBatchInput):
    return processor.summarize_batch(data)

@app.get("/health")
def health_check():
    return {
        "status": "healthy",
        "services": {
            "document_processor": "active",
            "vector_store": "active",
            "gemini_llm": "active"
        }
    }

@app.get("/cache_stats")
def get_cache_stats():
    return processor.get_cache_stats()

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)