piyushmadhukar commited on
Commit
7c22b31
·
verified ·
1 Parent(s): 580f51d

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index.idx filter=lfs diff=lfs merge=lfs -text
37
+ Guide_to_Litigation_India.pdf filter=lfs diff=lfs merge=lfs -text
38
+ Legal_Compliance_Corporate_Laws_ICAI.pdf filter=lfs diff=lfs merge=lfs -text
Guide_to_Litigation_India.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9f97e58c194f220d8a41e91b9d8fc429cff11a397e61e192d3be3443830d67
3
+ size 2083921
Legal_Compliance_Corporate_Laws_ICAI.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4622640ff9442adbb70fcffc5ac56756c7dfc7bffba2219fde8d646498efd1
3
+ size 913275
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from transformers import pipeline
3
+ from pydantic import BaseModel
4
+ import faiss
5
+ import numpy as np
6
+ import streamlit as st
7
+ from typing import List
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import google.generativeai as genai
11
+ import torch
12
+ import asyncio
13
+
14
+
15
+ try:
16
+ asyncio.get_running_loop()
17
+ except RuntimeError:
18
+ asyncio.set_event_loop(asyncio.new_event_loop())
19
+
20
+
21
+ device = torch.device("cpu")
22
+ print("Device set to use CPU")
23
+
24
+
25
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
26
+
27
+
28
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1) # -1 forces CPU usage
29
+
30
+
31
+ load_dotenv()
32
+ api_key = os.getenv("API_KEY")
33
+
34
+ genai.configure(api_key=api_key)
35
+
36
+
37
+ gemini_model = genai.GenerativeModel(model_name="gemini-2.0-flash")
38
+
39
+
40
+ class UserQuery(BaseModel):
41
+ query: str
42
+
43
+ class RetrievedSection(BaseModel):
44
+ text: str
45
+
46
+ class SummarizedResponse(BaseModel):
47
+ summary: str
48
+
49
+ class FinalLLMResponse(BaseModel):
50
+ response: str
51
+
52
+ # Query Agent
53
+ def query_legal_documents(query: UserQuery, top_k=3) -> List[RetrievedSection]:
54
+ if not os.path.exists("faiss_index.idx") or not os.path.exists("doc_texts.npy"):
55
+ st.error("FAISS index or document data not found.")
56
+ return []
57
+
58
+
59
+ index = faiss.read_index("faiss_index.idx")
60
+ doc_texts = np.load("doc_texts.npy", allow_pickle=True)
61
+
62
+
63
+ query_embedding = embedding_model.encode([query.query], convert_to_numpy=True)
64
+
65
+
66
+ distances, indices = index.search(query_embedding, top_k)
67
+
68
+
69
+ retrieved_sections = [
70
+ RetrievedSection(text=doc_texts[i]) for i in indices[0] if i < len(doc_texts)
71
+ ]
72
+
73
+ return retrieved_sections
74
+
75
+ # Summarization Agent
76
+ def summarize_text(text_sections: List[RetrievedSection]) -> List[SummarizedResponse]:
77
+ summarized_results = [
78
+ SummarizedResponse(
79
+ summary=summarizer(section.text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
80
+ )
81
+ for section in text_sections
82
+ ]
83
+ return summarized_results
84
+
85
+ # LLM Agent to refine response
86
+ def generate_llm_response(summary_text: str) -> FinalLLMResponse:
87
+ response = gemini_model.generate_content(f"Provide a **brief** response. Do not use any special formatting like **. Here is the input:\n\n{summary_text}")
88
+ return FinalLLMResponse(response=response.text)
89
+
90
+
91
+ def main():
92
+ st.set_page_config(page_title="Legal Chatbot", layout="wide")
93
+
94
+
95
+ st.sidebar.title("Legal Chatbot Settings")
96
+ st.sidebar.write("This chatbot helps with legal queries by retrieving relevant legal documents, summarizing them, and generating AI-enhanced responses.")
97
+
98
+
99
+ st.title("🧑‍⚖️ Legal Chatbot")
100
+ st.markdown("### Ask your legal question below:")
101
+
102
+ user_query = st.text_input("Enter your legal query:")
103
+
104
+ if st.button("Submit", use_container_width=True):
105
+ if user_query:
106
+ st.info("Processing your request...")
107
+
108
+ query_obj = UserQuery(query=user_query)
109
+ retrieved_sections = query_legal_documents(query_obj)
110
+
111
+ if not retrieved_sections:
112
+ st.warning("No relevant legal documents found. Try refining your query.")
113
+ return
114
+
115
+ summarized_sections = summarize_text(retrieved_sections)
116
+
117
+ # Combine summaries for LLM
118
+ combined_summary = "\n".join([res.summary for res in summarized_sections])
119
+ llm_response = generate_llm_response(combined_summary)
120
+
121
+ # Display results
122
+ st.markdown("### 📖 Retrieved Data from Knowledge Base")
123
+ for section in retrieved_sections:
124
+ st.markdown(f"🔹 {section.text}")
125
+
126
+ st.markdown("### ✨ Summarized Response")
127
+ for res in summarized_sections:
128
+ st.markdown(f"✅ {res.summary}")
129
+
130
+ st.markdown("### 🤖 AI-Enhanced Response")
131
+ st.text_area("Final Answer:", llm_response.response, height=150)
132
+
133
+ if __name__ == "__main__":
134
+ main()
create_faiss_index.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import faiss
4
+ import numpy as np
5
+ import streamlit as st
6
+ from sentence_transformers import SentenceTransformer
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+
9
+ from pydantic import BaseModel
10
+ from typing import List, Tuple
11
+
12
+
13
+
14
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+
17
+ def extract_text_from_pdf(pdf_path):
18
+ text = ""
19
+ with open(pdf_path, "rb") as file:
20
+ reader = PyPDF2.PdfReader(file)
21
+ for page in reader.pages:
22
+ text += page.extract_text() + "\n"
23
+ return text
24
+
25
+
26
+ def chunk_text(text, chunk_size=500, chunk_overlap=50):
27
+ splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap)
29
+ return splitter.split_text(text)
30
+
31
+
32
+ legal_docs = ["Guide_to_Litigation_India.pdf", "Legal_Compliance_Corporate_Laws_ICAI.pdf"]
33
+ doc_texts = []
34
+ for doc in legal_docs:
35
+ text = extract_text_from_pdf(doc)
36
+ doc_texts.extend(chunk_text(text))
37
+
38
+
39
+ embeddings = embedding_model.encode(doc_texts, convert_to_numpy=True)
40
+
41
+
42
+ d = embeddings.shape[1]
43
+ index = faiss.IndexFlatL2(d)
44
+ index.add(embeddings)
45
+
46
+
47
+ faiss.write_index(index, "faiss_index.idx")
48
+ np.save("doc_texts.npy", np.array(doc_texts, dtype=object))
49
+
50
+ print("Document processing completed. FAISS index saved.")
doc_texts.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67dcdd129e320bf371100377ef7c8916ebe407901c4dbca0d1e7d7629b4cc2e4
3
+ size 461168
faiss_index.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a568597e10ddd927c36258b2afff49192dea86829e252eb07aa2d1321762a9
3
+ size 1665069
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ faiss-cpu
2
+ numpy
3
+ pypdf2
4
+ streamlit
5
+ sentence-transformers
6
+ langchain
7
+ transformers
8
+ pydantic
9
+ google-generativeai
10
+ python-dotenv
11
+ requests
12
+ langchain_google_genai
13
+ langchain-community
14
+ torch