Spaces:
Sleeping
Sleeping
import os | |
from flask import Flask, request,render_template, send_from_directory | |
from flask import Flask, request, jsonify, render_template_string | |
import openai | |
import langchain | |
import os | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain import OpenAI | |
from langchain.schema import Document | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
import re | |
from pdfminer.high_level import extract_pages | |
from pdfminer.layout import LTTextContainer, LTChar | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
import json | |
import pdfplumber | |
import fitz # PyMuPDF | |
import chromadb | |
from chromadb.config import Settings | |
app = Flask(__name__) | |
os.environ["OPENAI_API_KEY"] = "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" | |
app.config['UPLOAD_FOLDER'] = 'uploads' | |
openapi_key="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" | |
fpath="./SCHOOL ADMISSI0N TEST 2025-2026.pdf" | |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50,length_function=len,separators=["\n\n","\n"," "]) | |
global_var = "I am global" | |
def loadmiyapurcourtorders(pathtosjon): | |
try: | |
with open(pathtosjon, "r", encoding="utf-8") as f: | |
print("--processing pls wait---") | |
courtcontent = json.load(f) | |
docs = [] | |
for content in courtcontent: | |
content1 = f""" | |
Court Level: {content["court_level"]} | |
Case No: {content["caseno"]} | |
Year: {content["year"]} | |
Prayer: {content["prayer"]} | |
Verdict: {content["verdict"]} | |
Judgement Date: {content["verdictdate"]} | |
Status: {content["status"]} | |
Case Type: {content["casetype"]} | |
petitioner: {content["petitioner"]} | |
respondent 1: {content["Respondent 1"]} | |
judge: {content["judge"]} | |
""" | |
metadata = { | |
"case no": str(content["caseno"]), | |
"year": content["year"] | |
} | |
docs.append(Document(page_content=content1, metadata=metadata)) | |
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
# store all IPC section Articles here | |
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./MiyapurCase_db1") | |
except Exception as er: | |
print("--exception---",er) | |
return jsonify("exception occured while processing...") | |
return "Miyapur court cases are loaded sucessfully" | |
def loadIPCsections(pathtosjon): | |
try: | |
with open(pathtosjon, "r", encoding="utf-8") as f: | |
print("--processing pls wait---") | |
ipc_sections = json.load(f) | |
docs = [] | |
for section in ipc_sections: | |
content = f"Section {section['Section']}: {section['section_title']}\n{section['section_desc']}" | |
metadata = { | |
"chapter": str(section["chapter"]), | |
"chapter_title": section["chapter_title"], | |
"section": str(section["Section"]), | |
"section_title": section["section_title"] | |
} | |
docs.append(Document(page_content=content, metadata=metadata)) | |
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
# store all IPC section Articles here | |
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./ChromaIPC_db") | |
except Exception as er: | |
print("--exception---",er) | |
return jsonify("expcetion occured while processing...") | |
return "IPC sections are loaded sucessfully" | |
def trainpdf1(fpath1): | |
print("- fpath1---",fpath1) | |
try: | |
filepath = os.path.join(app.config['UPLOAD_FOLDER'], fpath1.filename) | |
fpath1.save(filepath) | |
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
loader = PyPDFLoader(filepath) | |
pages = loader.load() # Returns list of Document objects | |
court_text = "\n".join([page.page_content for page in pages]) | |
# Wrap as LangChain document | |
doc = Document( | |
page_content=court_text, | |
metadata={"source": "court order"} | |
) | |
print("- filepath---",filepath) | |
#text_splitter = RecursiveCharacterTextSplitter( | |
# chunk_size=800, | |
# chunk_overlap=200 | |
#) | |
#documents = text_splitter.split_documents(pages) | |
# store all court order documents here | |
vectorstore = Chroma.from_documents([doc], embeddings, persist_directory="./ChromaCOURT_db") | |
vectorstore.persist() | |
llm = ChatOpenAI(model="gpt-4", temperature=0) | |
prompt = PromptTemplate.from_template(""" | |
You are a legal assistant. Given the following court order, list the top 5 relevant legal issues or areas that this case involves (e.g., property rights, public nuisance, fundamental rights, illegal construction, etc.) | |
Court Order: | |
{order} | |
List 5 legal areas: | |
""") | |
chain = LLMChain(llm=llm, prompt=prompt) | |
response = chain.run(order=court_text) | |
IPCsearch = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings ) | |
areas = [area.strip("1234567890. ").strip() for area in response.split('\n') if area.strip()] | |
ipc_matches = [] | |
ipc_results = [] | |
for area in areas[:5]: # limit to top 5 areas | |
results = IPCsearch.similarity_search(area, k=1) | |
if results: | |
ipc_matches.append((area, results[0])) | |
# π¨οΈ Print matched IPC sections | |
for topic, doc in ipc_matches: | |
print(f"\nπ Legal Area: {topic}") | |
print(f"π IPC Section: {doc.metadata.get('section')} - {doc.metadata.get('section_title')}") | |
print(f"π Description: {doc.page_content}") | |
for topic, doc in ipc_matches: | |
ipc_results.append({ | |
"legal_area": topic, | |
"ipc_section": doc.metadata.get("section"), | |
"section_title": doc.metadata.get("section_title"), | |
"description": doc.page_content, | |
"Orderdocumentation":response | |
}) | |
except Exception as er: | |
print("--exception---",er) | |
return jsonify("This pdf cannot be trained") | |
return ipc_results | |
def getdata(query): | |
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
os.environ['OPENAI_API_KEY'] = openapi_key | |
your_case_db = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings) | |
your_case_text = your_case_db.similarity_search("relavant IPC section for bribe", k=1)[0].page_content | |
print("---your_case_text-----",your_case_text) | |
# Load SC case database | |
supreme_db = Chroma(persist_directory="./MiyapurCase_db", embedding_function=embeddings) | |
retriever = supreme_db.as_retriever(search_kwargs={"k": 5}) | |
# Ask for relevant judgments | |
llm = ChatOpenAI(model="gpt-4", temperature=1) | |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
response = qa_chain.run(query) | |
return response | |
if __name__ == '__main__': | |
app.run(port=8080) | |