import os from flask import Flask, request,render_template, send_from_directory from flask import Flask, request, jsonify, render_template_string import openai import langchain import os from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain import OpenAI from langchain.schema import Document from langchain.chains.question_answering import load_qa_chain from langchain.chains import RetrievalQA from langchain.chat_models import ChatOpenAI import re from pdfminer.high_level import extract_pages from pdfminer.layout import LTTextContainer, LTChar from langchain.prompts import PromptTemplate from langchain.chains import LLMChain import json import pdfplumber import fitz # PyMuPDF import chromadb from chromadb.config import Settings app = Flask(__name__) os.environ["OPENAI_API_KEY"] = "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" app.config['UPLOAD_FOLDER'] = 'uploads' openapi_key="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" fpath="./SCHOOL ADMISSI0N TEST 2025-2026.pdf" text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50,length_function=len,separators=["\n\n","\n"," "]) global_var = "I am global" def loadmiyapurcourtorders(pathtosjon): try: with open(pathtosjon, "r", encoding="utf-8") as f: print("--processing pls wait---") courtcontent = json.load(f) docs = [] for content in courtcontent: content1 = f""" Court Level: {content["court_level"]} Case No: {content["caseno"]} Year: {content["year"]} Prayer: {content["prayer"]} Verdict: {content["verdict"]} Judgement Date: {content["verdictdate"]} Status: {content["status"]} Case Type: {content["casetype"]} petitioner: {content["petitioner"]} respondent 1: {content["Respondent 1"]} judge: {content["judge"]} """ metadata = { "case no": str(content["caseno"]), "year": content["year"] } docs.append(Document(page_content=content1, metadata=metadata)) embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) # store all IPC section Articles here vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./MiyapurCase_db1") except Exception as er: print("--exception---",er) return jsonify("exception occured while processing...") return "Miyapur court cases are loaded sucessfully" def loadIPCsections(pathtosjon): try: with open(pathtosjon, "r", encoding="utf-8") as f: print("--processing pls wait---") ipc_sections = json.load(f) docs = [] for section in ipc_sections: content = f"Section {section['Section']}: {section['section_title']}\n{section['section_desc']}" metadata = { "chapter": str(section["chapter"]), "chapter_title": section["chapter_title"], "section": str(section["Section"]), "section_title": section["section_title"] } docs.append(Document(page_content=content, metadata=metadata)) embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) # store all IPC section Articles here vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./ChromaIPC_db") except Exception as er: print("--exception---",er) return jsonify("expcetion occured while processing...") return "IPC sections are loaded sucessfully" def trainpdf1(fpath1): print("- fpath1---",fpath1) try: filepath = os.path.join(app.config['UPLOAD_FOLDER'], fpath1.filename) fpath1.save(filepath) embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) loader = PyPDFLoader(filepath) pages = loader.load() # Returns list of Document objects court_text = "\n".join([page.page_content for page in pages]) # Wrap as LangChain document doc = Document( page_content=court_text, metadata={"source": "court order"} ) print("- filepath---",filepath) #text_splitter = RecursiveCharacterTextSplitter( # chunk_size=800, # chunk_overlap=200 #) #documents = text_splitter.split_documents(pages) # store all court order documents here vectorstore = Chroma.from_documents([doc], embeddings, persist_directory="./ChromaCOURT_db") vectorstore.persist() llm = ChatOpenAI(model="gpt-4", temperature=0) prompt = PromptTemplate.from_template(""" You are a legal assistant. Given the following court order, list the top 5 relevant legal issues or areas that this case involves (e.g., property rights, public nuisance, fundamental rights, illegal construction, etc.) Court Order: {order} List 5 legal areas: """) chain = LLMChain(llm=llm, prompt=prompt) response = chain.run(order=court_text) IPCsearch = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings ) areas = [area.strip("1234567890. ").strip() for area in response.split('\n') if area.strip()] ipc_matches = [] ipc_results = [] for area in areas[:5]: # limit to top 5 areas results = IPCsearch.similarity_search(area, k=1) if results: ipc_matches.append((area, results[0])) # šŸ–Øļø Print matched IPC sections for topic, doc in ipc_matches: print(f"\nšŸ“˜ Legal Area: {topic}") print(f"šŸ”— IPC Section: {doc.metadata.get('section')} - {doc.metadata.get('section_title')}") print(f"šŸ“„ Description: {doc.page_content}") for topic, doc in ipc_matches: ipc_results.append({ "legal_area": topic, "ipc_section": doc.metadata.get("section"), "section_title": doc.metadata.get("section_title"), "description": doc.page_content, "Orderdocumentation":response }) except Exception as er: print("--exception---",er) return jsonify("This pdf cannot be trained") return ipc_results def getdata(query): embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) os.environ['OPENAI_API_KEY'] = openapi_key your_case_db = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings) your_case_text = your_case_db.similarity_search("relavant IPC section for bribe", k=1)[0].page_content print("---your_case_text-----",your_case_text) # Load SC case database supreme_db = Chroma(persist_directory="./MiyapurCase_db", embedding_function=embeddings) retriever = supreme_db.as_retriever(search_kwargs={"k": 5}) # Ask for relevant judgments llm = ChatOpenAI(model="gpt-4", temperature=1) qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) response = qa_chain.run(query) return response if __name__ == '__main__': app.run(port=8080)