DigitalLawyer / checkvectordb.py
vijayksagi's picture
Upload 4 files
068e0bd verified
import os
from flask import Flask, request,render_template, send_from_directory
from flask import Flask, request, jsonify, render_template_string
import openai
import langchain
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain import OpenAI
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import re
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTTextContainer, LTChar
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import json
import pdfplumber
import fitz # PyMuPDF
import chromadb
from chromadb.config import Settings
app = Flask(__name__)
os.environ["OPENAI_API_KEY"] = "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"
app.config['UPLOAD_FOLDER'] = 'uploads'
openapi_key="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"
fpath="./SCHOOL ADMISSI0N TEST 2025-2026.pdf"
text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50,length_function=len,separators=["\n\n","\n"," "])
global_var = "I am global"
def loadmiyapurcourtorders(pathtosjon):
try:
with open(pathtosjon, "r", encoding="utf-8") as f:
print("--processing pls wait---")
courtcontent = json.load(f)
docs = []
for content in courtcontent:
content1 = f"""
Court Level: {content["court_level"]}
Case No: {content["caseno"]}
Year: {content["year"]}
Prayer: {content["prayer"]}
Verdict: {content["verdict"]}
Judgement Date: {content["verdictdate"]}
Status: {content["status"]}
Case Type: {content["casetype"]}
petitioner: {content["petitioner"]}
respondent 1: {content["Respondent 1"]}
judge: {content["judge"]}
"""
metadata = {
"case no": str(content["caseno"]),
"year": content["year"]
}
docs.append(Document(page_content=content1, metadata=metadata))
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
# store all IPC section Articles here
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./MiyapurCase_db1")
except Exception as er:
print("--exception---",er)
return jsonify("exception occured while processing...")
return "Miyapur court cases are loaded sucessfully"
def loadIPCsections(pathtosjon):
try:
with open(pathtosjon, "r", encoding="utf-8") as f:
print("--processing pls wait---")
ipc_sections = json.load(f)
docs = []
for section in ipc_sections:
content = f"Section {section['Section']}: {section['section_title']}\n{section['section_desc']}"
metadata = {
"chapter": str(section["chapter"]),
"chapter_title": section["chapter_title"],
"section": str(section["Section"]),
"section_title": section["section_title"]
}
docs.append(Document(page_content=content, metadata=metadata))
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
# store all IPC section Articles here
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./ChromaIPC_db")
except Exception as er:
print("--exception---",er)
return jsonify("expcetion occured while processing...")
return "IPC sections are loaded sucessfully"
def trainpdf1(fpath1):
print("- fpath1---",fpath1)
try:
filepath = os.path.join(app.config['UPLOAD_FOLDER'], fpath1.filename)
fpath1.save(filepath)
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
loader = PyPDFLoader(filepath)
pages = loader.load() # Returns list of Document objects
court_text = "\n".join([page.page_content for page in pages])
# Wrap as LangChain document
doc = Document(
page_content=court_text,
metadata={"source": "court order"}
)
print("- filepath---",filepath)
#text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=800,
# chunk_overlap=200
#)
#documents = text_splitter.split_documents(pages)
# store all court order documents here
vectorstore = Chroma.from_documents([doc], embeddings, persist_directory="./ChromaCOURT_db")
vectorstore.persist()
llm = ChatOpenAI(model="gpt-4", temperature=0)
prompt = PromptTemplate.from_template("""
You are a legal assistant. Given the following court order, list the top 5 relevant legal issues or areas that this case involves (e.g., property rights, public nuisance, fundamental rights, illegal construction, etc.)
Court Order:
{order}
List 5 legal areas:
""")
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(order=court_text)
IPCsearch = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings )
areas = [area.strip("1234567890. ").strip() for area in response.split('\n') if area.strip()]
ipc_matches = []
ipc_results = []
for area in areas[:5]: # limit to top 5 areas
results = IPCsearch.similarity_search(area, k=1)
if results:
ipc_matches.append((area, results[0]))
# πŸ–¨οΈ Print matched IPC sections
for topic, doc in ipc_matches:
print(f"\nπŸ“˜ Legal Area: {topic}")
print(f"πŸ”— IPC Section: {doc.metadata.get('section')} - {doc.metadata.get('section_title')}")
print(f"πŸ“„ Description: {doc.page_content}")
for topic, doc in ipc_matches:
ipc_results.append({
"legal_area": topic,
"ipc_section": doc.metadata.get("section"),
"section_title": doc.metadata.get("section_title"),
"description": doc.page_content,
"Orderdocumentation":response
})
except Exception as er:
print("--exception---",er)
return jsonify("This pdf cannot be trained")
return ipc_results
def getdata(query):
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
os.environ['OPENAI_API_KEY'] = openapi_key
your_case_db = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings)
your_case_text = your_case_db.similarity_search("relavant IPC section for bribe", k=1)[0].page_content
print("---your_case_text-----",your_case_text)
# Load SC case database
supreme_db = Chroma(persist_directory="./MiyapurCase_db", embedding_function=embeddings)
retriever = supreme_db.as_retriever(search_kwargs={"k": 5})
# Ask for relevant judgments
llm = ChatOpenAI(model="gpt-4", temperature=1)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
response = qa_chain.run(query)
return response
if __name__ == '__main__':
app.run(port=8080)