Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Upload 7 files
Browse files- AI_legal/.streamlit/config.toml +6 -0
- AI_legal/Ingest.py +61 -0
- AI_legal/app.py +127 -0
- AI_legal/data/ipc_law.txt +0 -0
- AI_legal/footer.py +68 -0
- AI_legal/images/banner.png +0 -0
- AI_legal/requirements.txt +13 -0
    	
        AI_legal/.streamlit/config.toml
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [theme]
         | 
| 2 | 
            +
            base="dark"
         | 
| 3 | 
            +
            primaryColor="#4ba3ff"
         | 
| 4 | 
            +
            backgroundColor="#0e1721"
         | 
| 5 | 
            +
            secondaryBackgroundColor="#142433"
         | 
| 6 | 
            +
            font = "sans serif"
         | 
    	
        AI_legal/Ingest.py
    ADDED
    
    | @@ -0,0 +1,61 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import ray
         | 
| 2 | 
            +
            import logging
         | 
| 3 | 
            +
            from langchain_community.document_loaders import DirectoryLoader
         | 
| 4 | 
            +
            from langchain_community.embeddings import HuggingFaceEmbeddings
         | 
| 5 | 
            +
            from langchain.text_splitter import RecursiveCharacterTextSplitter
         | 
| 6 | 
            +
            from langchain_community.vectorstores import FAISS
         | 
| 7 | 
            +
            from faiss import IndexFlatL2  # Assuming using L2 distance for simplicity
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Initialize Ray
         | 
| 10 | 
            +
            ray.init()
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # Set up basic configuration for logging
         | 
| 13 | 
            +
            logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            # Load documents with logging
         | 
| 16 | 
            +
            logging.info("Loading documents...")
         | 
| 17 | 
            +
            loader = DirectoryLoader('data', glob="./*.txt")
         | 
| 18 | 
            +
            documents = loader.load()
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # Extract text from documents and split into manageable texts with logging
         | 
| 21 | 
            +
            logging.info("Extracting and splitting texts from documents...")
         | 
| 22 | 
            +
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
         | 
| 23 | 
            +
            texts = []
         | 
| 24 | 
            +
            for document in documents:
         | 
| 25 | 
            +
                if hasattr(document, 'get_text'):
         | 
| 26 | 
            +
                    text_content = document.get_text()  # Adjust according to actual method
         | 
| 27 | 
            +
                else:
         | 
| 28 | 
            +
                    text_content = ""  # Default to empty string if no text method is available
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                texts.extend(text_splitter.split_text(text_content))
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            # Define embedding function
         | 
| 33 | 
            +
            def embedding_function(text):
         | 
| 34 | 
            +
                embeddings_model = HuggingFaceEmbeddings(model_name="law-ai/InLegalBERT")
         | 
| 35 | 
            +
                return embeddings_model.embed_query(text)
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            # Create FAISS index for embeddings
         | 
| 38 | 
            +
            index = IndexFlatL2(768)  # Dimension of embeddings, adjust as needed
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            # Assuming docstore as a simple dictionary to store document texts
         | 
| 41 | 
            +
            docstore = {i: text for i, text in enumerate(texts)}
         | 
| 42 | 
            +
            index_to_docstore_id = {i: i for i in range(len(texts))}
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            # Initialize FAISS
         | 
| 45 | 
            +
            faiss_db = FAISS(embedding_function, index, docstore, index_to_docstore_id)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            # Process and store embeddings
         | 
| 48 | 
            +
            logging.info("Storing embeddings in FAISS...")
         | 
| 49 | 
            +
            for i, text in enumerate(texts):
         | 
| 50 | 
            +
                embedding = embedding_function(text)
         | 
| 51 | 
            +
                faiss_db.add_documents([embedding])
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            # Exporting the vector embeddings database with logging
         | 
| 54 | 
            +
            logging.info("Exporting the vector embeddings database...")
         | 
| 55 | 
            +
            faiss_db.save_local("ipc_embed_db")
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            # Log a message to indicate the completion of the process
         | 
| 58 | 
            +
            logging.info("Process completed successfully.")
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            # Shutdown Ray after the process
         | 
| 61 | 
            +
            ray.shutdown()
         | 
    	
        AI_legal/app.py
    ADDED
    
    | @@ -0,0 +1,127 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import time
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            import streamlit as st
         | 
| 4 | 
            +
            from langchain_community.vectorstores import FAISS
         | 
| 5 | 
            +
            from langchain_community.embeddings import HuggingFaceEmbeddings
         | 
| 6 | 
            +
            from langchain.prompts import PromptTemplate
         | 
| 7 | 
            +
            from langchain.memory import ConversationBufferWindowMemory
         | 
| 8 | 
            +
            from langchain.chains import ConversationalRetrievalChain
         | 
| 9 | 
            +
            from langchain_together import Together
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            from footer import footer
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # Set the Streamlit page configuration and theme
         | 
| 14 | 
            +
            st.set_page_config(page_title="BharatLAW", layout="centered")
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # Display the logo image
         | 
| 17 | 
            +
            col1, col2, col3 = st.columns([1, 30, 1])
         | 
| 18 | 
            +
            with col2:
         | 
| 19 | 
            +
                st.image("https://github.com/Nike-one/BharatLAW/blob/master/images/banner.png?raw=true", use_column_width=True)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            def hide_hamburger_menu():
         | 
| 22 | 
            +
                st.markdown("""
         | 
| 23 | 
            +
                    <style>
         | 
| 24 | 
            +
                        #MainMenu {visibility: hidden;}
         | 
| 25 | 
            +
                        footer {visibility: hidden;}
         | 
| 26 | 
            +
                    </style>
         | 
| 27 | 
            +
                    """, unsafe_allow_html=True)
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            hide_hamburger_menu()
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # Initialize session state for messages and memory
         | 
| 32 | 
            +
            if "messages" not in st.session_state:
         | 
| 33 | 
            +
                st.session_state.messages = []
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            if "memory" not in st.session_state:
         | 
| 36 | 
            +
                st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            @st.cache_resource
         | 
| 39 | 
            +
            def load_embeddings():
         | 
| 40 | 
            +
                """Load and cache the embeddings model."""
         | 
| 41 | 
            +
                return HuggingFaceEmbeddings(model_name="law-ai/InLegalBERT")
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            embeddings = load_embeddings()
         | 
| 44 | 
            +
            db = FAISS.load_local("ipc_embed_db", embeddings, allow_dangerous_deserialization=True)
         | 
| 45 | 
            +
            db_retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            prompt_template = """
         | 
| 48 | 
            +
            <s>[INST]
         | 
| 49 | 
            +
            As a legal chatbot specializing in the Indian Penal Code, you are tasked with providing highly accurate and contextually appropriate responses. Ensure your answers meet these criteria:
         | 
| 50 | 
            +
            - Respond in a bullet-point format to clearly delineate distinct aspects of the legal query.
         | 
| 51 | 
            +
            - Each point should accurately reflect the breadth of the legal provision in question, avoiding over-specificity unless directly relevant to the user's query.
         | 
| 52 | 
            +
            - Clarify the general applicability of the legal rules or sections mentioned, highlighting any common misconceptions or frequently misunderstood aspects.
         | 
| 53 | 
            +
            - Limit responses to essential information that directly addresses the user's question, providing concise yet comprehensive explanations.
         | 
| 54 | 
            +
            - Avoid assuming specific contexts or details not provided in the query, focusing on delivering universally applicable legal interpretations unless otherwise specified.
         | 
| 55 | 
            +
            - Conclude with a brief summary that captures the essence of the legal discussion and corrects any common misinterpretations related to the topic.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            CONTEXT: {context}
         | 
| 58 | 
            +
            CHAT HISTORY: {chat_history}
         | 
| 59 | 
            +
            QUESTION: {question}
         | 
| 60 | 
            +
            ANSWER:
         | 
| 61 | 
            +
            - [Detail the first key aspect of the law, ensuring it reflects general application]
         | 
| 62 | 
            +
            - [Provide a concise explanation of how the law is typically interpreted or applied]
         | 
| 63 | 
            +
            - [Correct a common misconception or clarify a frequently misunderstood aspect]
         | 
| 64 | 
            +
            - [Detail any exceptions to the general rule, if applicable]
         | 
| 65 | 
            +
            - [Include any additional relevant information that directly relates to the user's query]
         | 
| 66 | 
            +
            </s>[INST]
         | 
| 67 | 
            +
            """
         | 
| 68 | 
            +
             | 
| 69 | 
            +
             | 
| 70 | 
            +
             | 
| 71 | 
            +
            prompt = PromptTemplate(template=prompt_template,
         | 
| 72 | 
            +
                                    input_variables=['context', 'question', 'chat_history'])
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            api_key = os.getenv('TOGETHER_API_KEY')
         | 
| 75 | 
            +
            llm = Together(model="mistralai/Mixtral-8x22B-Instruct-v0.1", temperature=0.5, max_tokens=1024, together_api_key=api_key)
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            qa = ConversationalRetrievalChain.from_llm(llm=llm, memory=st.session_state.memory, retriever=db_retriever, combine_docs_chain_kwargs={'prompt': prompt})
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            def extract_answer(full_response):
         | 
| 80 | 
            +
                """Extracts the answer from the LLM's full response by removing the instructional text."""
         | 
| 81 | 
            +
                answer_start = full_response.find("Response:")
         | 
| 82 | 
            +
                if answer_start != -1:
         | 
| 83 | 
            +
                    answer_start += len("Response:")
         | 
| 84 | 
            +
                    answer_end = len(full_response)
         | 
| 85 | 
            +
                    return full_response[answer_start:answer_end].strip()
         | 
| 86 | 
            +
                return full_response
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            def reset_conversation():
         | 
| 89 | 
            +
                st.session_state.messages = []
         | 
| 90 | 
            +
                st.session_state.memory.clear()
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            for message in st.session_state.messages:
         | 
| 93 | 
            +
                with st.chat_message(message["role"]):
         | 
| 94 | 
            +
                    st.write(message["content"])
         | 
| 95 | 
            +
             | 
| 96 | 
            +
             | 
| 97 | 
            +
            input_prompt = st.chat_input("Say something...")
         | 
| 98 | 
            +
            if input_prompt:
         | 
| 99 | 
            +
                with st.chat_message("user"):
         | 
| 100 | 
            +
                    st.markdown(f"**You:** {input_prompt}")
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                st.session_state.messages.append({"role": "user", "content": input_prompt})
         | 
| 103 | 
            +
                with st.chat_message("assistant"):
         | 
| 104 | 
            +
                    with st.spinner("Thinking 💡..."):
         | 
| 105 | 
            +
                        result = qa.invoke(input=input_prompt)
         | 
| 106 | 
            +
                        message_placeholder = st.empty()
         | 
| 107 | 
            +
                        answer = extract_answer(result["answer"])
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                        # Initialize the response message
         | 
| 110 | 
            +
                        full_response = "⚠️ **_Gentle reminder: We generally ensure precise information, but do double-check._** \n\n\n"
         | 
| 111 | 
            +
                        for chunk in answer:
         | 
| 112 | 
            +
                            # Simulate typing by appending chunks of the response over time
         | 
| 113 | 
            +
                            full_response += chunk
         | 
| 114 | 
            +
                            time.sleep(0.02)  # Adjust the sleep time to control the "typing" speed
         | 
| 115 | 
            +
                            message_placeholder.markdown(full_response + " |", unsafe_allow_html=True)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    st.session_state.messages.append({"role": "assistant", "content": answer})
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                    if st.button('🗑️ Reset All Chat', on_click=reset_conversation):
         | 
| 120 | 
            +
                        st.experimental_rerun()
         | 
| 121 | 
            +
             | 
| 122 | 
            +
             | 
| 123 | 
            +
             | 
| 124 | 
            +
            # Define the CSS to style the footer
         | 
| 125 | 
            +
            footer()
         | 
| 126 | 
            +
             | 
| 127 | 
            +
             | 
    	
        AI_legal/data/ipc_law.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        AI_legal/footer.py
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import streamlit as st
         | 
| 2 | 
            +
            from htbuilder import HtmlElement, div, a, p, img, styles
         | 
| 3 | 
            +
            from htbuilder.units import percent, px
         | 
| 4 | 
            +
             | 
| 5 | 
            +
             | 
| 6 | 
            +
            def image(src_as_string, **style):
         | 
| 7 | 
            +
                return img(src=src_as_string, style=styles(**style))
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            def link(link, text, **style):
         | 
| 11 | 
            +
                return a(_href=link, _target="_blank", style=styles(**style))(text)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            def layout(*args):
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                style = """
         | 
| 17 | 
            +
                <style>
         | 
| 18 | 
            +
                  # MainMenu {visibility: hidden;}
         | 
| 19 | 
            +
                  footer {visibility: hidden;}
         | 
| 20 | 
            +
                 .stApp { bottom: 40px; }
         | 
| 21 | 
            +
                 .st-emotion-cache-139wi93 {
         | 
| 22 | 
            +
                width: 100%;
         | 
| 23 | 
            +
                padding: 1rem 1rem 15px;
         | 
| 24 | 
            +
                max-width: 46rem;
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
                </style>
         | 
| 27 | 
            +
                """
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                style_div = styles(
         | 
| 30 | 
            +
                    position="fixed",
         | 
| 31 | 
            +
                    left=0,
         | 
| 32 | 
            +
                    bottom=0,
         | 
| 33 | 
            +
                    margin=px(0, 0, 0, 0),
         | 
| 34 | 
            +
                    width=percent(100),
         | 
| 35 | 
            +
                    color="white",
         | 
| 36 | 
            +
                    text_align="center",
         | 
| 37 | 
            +
                    height="auto",
         | 
| 38 | 
            +
                    opacity=1
         | 
| 39 | 
            +
                )
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                body = p()
         | 
| 42 | 
            +
                foot = div(
         | 
| 43 | 
            +
                    style=style_div
         | 
| 44 | 
            +
                )(
         | 
| 45 | 
            +
                    body
         | 
| 46 | 
            +
                )
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                st.markdown(style, unsafe_allow_html=True)
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                for arg in args:
         | 
| 51 | 
            +
                    if isinstance(arg, str):
         | 
| 52 | 
            +
                        body(arg)
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    elif isinstance(arg, HtmlElement):
         | 
| 55 | 
            +
                        body(arg)
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                st.markdown(str(foot), unsafe_allow_html=True)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
             | 
| 60 | 
            +
            def footer():
         | 
| 61 | 
            +
                myargs = [
         | 
| 62 | 
            +
                    "Made with ❤️ by Nikhil, Mihir, Nilay",
         | 
| 63 | 
            +
                ]
         | 
| 64 | 
            +
                layout(*myargs)
         | 
| 65 | 
            +
             | 
| 66 | 
            +
             | 
| 67 | 
            +
            if __name__ == "__main__":
         | 
| 68 | 
            +
                footer()
         | 
    	
        AI_legal/images/banner.png
    ADDED
    
    |   | 
    	
        AI_legal/requirements.txt
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            langchain==0.1.15
         | 
| 2 | 
            +
            pypdf
         | 
| 3 | 
            +
            transformers==4.39.3
         | 
| 4 | 
            +
            sentence-transformers
         | 
| 5 | 
            +
            accelerate
         | 
| 6 | 
            +
            faiss-cpu
         | 
| 7 | 
            +
            streamlit==1.33.0
         | 
| 8 | 
            +
            langchain-fireworks
         | 
| 9 | 
            +
            einops
         | 
| 10 | 
            +
            langchain-together
         | 
| 11 | 
            +
            ray==2.10.0
         | 
| 12 | 
            +
            unstructured
         | 
| 13 | 
            +
            htbuilder
         | 
