Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -133,99 +133,4 @@
|
|
133 |
# if __name__ == "__main__":
|
134 |
# main()
|
135 |
|
136 |
-
from sentence_transformers import SentenceTransformer
|
137 |
-
from transformers import pipeline
|
138 |
-
import faiss
|
139 |
-
import numpy as np
|
140 |
-
import streamlit as st
|
141 |
-
import os
|
142 |
-
from dotenv import load_dotenv
|
143 |
-
import google.generativeai as genai
|
144 |
-
import torch
|
145 |
-
|
146 |
-
# Set device to CPU
|
147 |
-
device = "cpu"
|
148 |
-
|
149 |
-
# Load models once
|
150 |
-
embedding_model = SentenceTransformer("all-MiniLM-L6-v2-int8", device=device) # Removed normalize_embeddings
|
151 |
-
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)
|
152 |
-
|
153 |
-
# Load API Key
|
154 |
-
load_dotenv()
|
155 |
-
api_key = os.getenv("API_KEY")
|
156 |
-
genai.configure(api_key=api_key)
|
157 |
-
gemini_model = genai.GenerativeModel(model_name="gemini-2.0-flash")
|
158 |
-
|
159 |
-
# Cache FAISS index & document texts
|
160 |
-
@st.cache_resource
|
161 |
-
def load_faiss_index():
|
162 |
-
if not os.path.exists("faiss_index.idx") or not os.path.exists("doc_texts.npy"):
|
163 |
-
return None, None
|
164 |
-
index = faiss.read_index("faiss_index.idx")
|
165 |
-
doc_texts = np.load("doc_texts.npy", allow_pickle=True)
|
166 |
-
return index, doc_texts
|
167 |
-
|
168 |
-
faiss_index, doc_texts = load_faiss_index()
|
169 |
-
|
170 |
-
# Query Legal Documents
|
171 |
-
def query_legal_documents(query: str, top_k=3):
|
172 |
-
if faiss_index is None or doc_texts is None:
|
173 |
-
st.error("FAISS index or document data not found.")
|
174 |
-
return []
|
175 |
-
|
176 |
-
query_embedding = embedding_model.encode([query])
|
177 |
-
faiss.normalize_L2(query_embedding) # Normalize embeddings manually
|
178 |
-
|
179 |
-
distances, indices = faiss_index.search(query_embedding, top_k)
|
180 |
-
|
181 |
-
return [doc_texts[i] for i in indices[0] if i < len(doc_texts)]
|
182 |
-
|
183 |
-
# Summarization Agent (Batch Processing)
|
184 |
-
def summarize_text(text_sections):
|
185 |
-
texts = [section for section in text_sections]
|
186 |
-
summaries = summarizer(texts, max_length=100, min_length=30, do_sample=False)
|
187 |
-
return [summary["summary_text"] for summary in summaries]
|
188 |
-
|
189 |
-
# LLM Agent (Skip if Summaries are Sufficient)
|
190 |
-
def generate_llm_response(summary_text):
|
191 |
-
if len(summary_text) < 200:
|
192 |
-
return summary_text # Skip LLM for short summaries
|
193 |
-
response = gemini_model.generate_content(summary_text)
|
194 |
-
return response.text
|
195 |
-
|
196 |
-
# Streamlit App
|
197 |
-
def main():
|
198 |
-
st.set_page_config(page_title="Legal Chatbot", layout="wide")
|
199 |
-
st.title("π§ββοΈ Legal Chatbot")
|
200 |
-
user_query = st.text_input("Enter your legal query:")
|
201 |
-
|
202 |
-
if st.button("Submit"):
|
203 |
-
if user_query:
|
204 |
-
st.info("Processing your request...")
|
205 |
-
retrieved_sections = query_legal_documents(user_query)
|
206 |
-
|
207 |
-
if not retrieved_sections:
|
208 |
-
st.warning("No relevant legal documents found.")
|
209 |
-
return
|
210 |
-
|
211 |
-
summarized_sections = summarize_text(retrieved_sections)
|
212 |
-
combined_summary = "\n".join(summarized_sections)
|
213 |
-
final_response = generate_llm_response(combined_summary)
|
214 |
-
|
215 |
-
st.markdown("### π Retrieved Data")
|
216 |
-
for section in retrieved_sections:
|
217 |
-
st.markdown(f"πΉ {section}")
|
218 |
-
|
219 |
-
st.markdown("### β¨ Summarized Response")
|
220 |
-
for summary in summarized_sections:
|
221 |
-
st.markdown(f"β
{summary}")
|
222 |
-
|
223 |
-
st.markdown("### π€ AI-Enhanced Response")
|
224 |
-
st.text_area("Final Answer:", final_response, height=150)
|
225 |
-
|
226 |
-
if __name__ == "__main__":
|
227 |
-
main()
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
|
|
|
133 |
# if __name__ == "__main__":
|
134 |
# main()
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|