Spaces:

rajeshthangaraj1
/

smart_network_planning

Running

App Files Files Community

rajeshthangaraj1 commited on Jan 11

Commit

6866239

verified ·

1 Parent(s): a440dc6

milvus update

Browse files

Files changed (5) hide show

.gitignore +107 -0
app.py +145 -130
chat_handler.py +34 -29
file_handler.py +55 -22
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,107 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Ignore Streamlit secrets
+.streamlit/secrets.toml
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+*.egg
+*.egg-info/
+dist/
+build/
+eggs/
+lib/
+libs/
+parts/
+var/
+wheels/
+share/python-wheels/
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# PyInstaller
+#  Usually these files are written by a Python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.log
+# Environments
+.env
+.venv
+ENV/
+env/
+venv/
+# PyCharm
+.idea/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery
+celerybeat-schedule
+*.pid
+# SageMath
+*.sage.py
+# Encrypted credentials in the dev environment
+*.key
+# log files
+*.log
+# vscode settings
+.vscode/
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# profiler
+profiler.log
+# Other files and directories
+*.swp
+*~
+*.bak
+*.tmp
+*.temp
+*.orig
+*.lock
+*.log
+# Backup files
+*_backup.*

app.py CHANGED Viewed

@@ -1,130 +1,145 @@
-import streamlit as st
-import os
-from dotenv import load_dotenv
-from file_handler import FileHandler
-from chat_handler import ChatHandler
-# Load environment variables
-load_dotenv()
-# Static credentials
-USERNAME = os.environ.get("USERNAME")
-PASSWORD = os.environ.get("PASSWORD")
-# Initialize Handlers
-VECTOR_DB_PATH = os.environ.get("VECTOR_DB_PATH_DB")
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
-HUGGINGFACE_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
-GROQ_API_KEY_TOKEN = os.environ.get("GROQ_API_KEY")
-os.makedirs(VECTOR_DB_PATH, exist_ok=True)
-file_handler = FileHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN)
-chat_handler = ChatHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN, OPENAI_API_KEY,GROQ_API_KEY_TOKEN)
-# Streamlit UI
-st.set_page_config(layout="wide", page_title="AI Connect - Smarter Network Planning for the Future")
-# Session state to track login status
-if "logged_in" not in st.session_state:
-    st.session_state["logged_in"] = False
-# Login page
-# Refined Login Page
-if not st.session_state["logged_in"]:
-    # Customize page title
-    st.markdown(
-        """
-        <style>
-        .title {
-            font-size: 2.5rem;
-            color: #1f77b4;
-            font-weight: bold;
-            text-align: center;
-            margin-bottom: 10px;
-        }
-        .subtitle {
-            font-size: 1.2rem;
-            color: #555;
-            text-align: center;
-            margin-bottom: 20px;
-        }
-        .login-box {
-            margin: auto;
-            width: 50%;
-            padding: 20px;
-            background: #f9f9f9;
-            border: 1px solid #ddd;
-            border-radius: 10px;
-        }
-        .login-box input {
-            margin-bottom: 10px;
-        }
-        </style>
-        <div>
-            <div class="title">Welcome to AI Connect</div>
-            <div class="subtitle">Smarter Network Planning for the Future</div>
-        </div>
-        """,
-        unsafe_allow_html=True,
-    )
-    # Centered Login Box
-    # st.markdown('<div class="login-box">', unsafe_allow_html=True)
-    st.subheader("Login to Continue")
-    username = st.text_input("Username")
-    password = st.text_input("Password", type="password")
-    if st.button("Login"):
-        if username == USERNAME and password == PASSWORD:
-            st.session_state["logged_in"] = True
-            st.success("Login successful!")
-            st.rerun()
-        else:
-            st.error("Invalid username or password.")
-    st.markdown("</div>", unsafe_allow_html=True)
-else:
-    # Main app (Chat Interface)
-    st.title("Chatbot - Smarter Network Planning for the Future")
-    st.sidebar.header("Upload Documents")
-    uploaded_file = st.sidebar.file_uploader("Upload PDF, Excel, Docx, or Txt", type=["pdf", "xlsx", "docx", "txt", "csv"])
-    document_name = st.sidebar.text_input("Document Name", "")
-    document_description = st.sidebar.text_area("Document Description", "")
-    if st.sidebar.button("Process File"):
-        if uploaded_file:
-            with st.spinner("Processing your file..."):
-                response = file_handler.handle_file_upload(
-                    file=uploaded_file,
-                    document_name=document_name,
-                    document_description=document_description,
-                )
-                st.sidebar.success(f"File processed: {response['message']}")
-        else:
-            st.sidebar.warning("Please upload a file before processing.")
-    # Chat Interface
-    if "messages" not in st.session_state:
-        st.session_state["messages"] = []
-    # Display chat messages from history
-    for message in st.session_state["messages"]:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    # Accept user input
-    if prompt := st.chat_input("Type your question here..."):
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        st.session_state["messages"].append({"role": "user", "content": prompt})
-        with st.spinner("Processing your question..."):
-            response = chat_handler.answer_question(prompt)
-        with st.chat_message("assistant"):
-            st.markdown(response)
-        st.session_state["messages"].append({"role": "assistant", "content": response})
-# Logout button
-if st.session_state["logged_in"]:
-    if st.sidebar.button("Logout"):
-        st.session_state["logged_in"] = False
-        st.rerun()

+import logging
+import streamlit as st
+import os
+from dotenv import load_dotenv
+from file_handler import FileHandler
+from chat_handler import ChatHandler
+# Initialize Milvus connection
+from pymilvus import connections
+# Load environment variables
+load_dotenv()
+# Static credentials for login
+USERNAME = os.environ.get("USERNAME")
+PASSWORD = os.environ.get("PASSWORD")
+# Configure logging
+LOG_PATH = os.environ.get("LOG_PATH")
+os.makedirs(LOG_PATH, exist_ok=True)
+LOG_FILE = os.path.join(LOG_PATH, "chatbot.log")
+logging.basicConfig(
+    filename=LOG_FILE,
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("AI_Connect")
+# Initialize Handlers
+MILVUS_HOST = os.environ.get("MILVUS_HOST")
+MILVUS_PORT = os.environ.get("MILVUS_PORT")
+HUGGINGFACE_API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
+GROQ_API_KEY_TOKEN = os.environ.get("GROQ_API_KEY")
+connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
+file_handler = FileHandler(HUGGINGFACE_API_TOKEN,logger)
+chat_handler = ChatHandler(HUGGINGFACE_API_TOKEN,GROQ_API_KEY_TOKEN,logger)
+# Streamlit UI
+st.set_page_config(layout="wide", page_title="AI Connect - Smarter Network Planning for the Future")
+# Session state to track login status
+if "logged_in" not in st.session_state:
+    st.session_state["logged_in"] = False
+# Login page
+# Refined Login Page
+if not st.session_state["logged_in"]:
+    # Customize page title
+    st.markdown(
+        """
+        <style>
+        .title {
+            font-size: 2.5rem;
+            color: #1f77b4;
+            font-weight: bold;
+            text-align: center;
+            margin-bottom: 10px;
+        }
+        .subtitle {
+            font-size: 1.2rem;
+            color: #555;
+            text-align: center;
+            margin-bottom: 20px;
+        }
+        .login-box {
+            margin: auto;
+            width: 50%;
+            padding: 20px;
+            background: #f9f9f9;
+            border: 1px solid #ddd;
+            border-radius: 10px;
+        }
+        .login-box input {
+            margin-bottom: 10px;
+        }
+        </style>
+        <div>
+            <div class="title">Welcome to AI Connect</div>
+            <div class="subtitle">Smarter Network Planning for the Future</div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    # Centered Login Box
+    st.subheader("Login to Continue")
+    username = st.text_input("Username")
+    password = st.text_input("Password", type="password")
+    if st.button("Login"):
+        if username == USERNAME and password == PASSWORD:
+            st.session_state["logged_in"] = True
+            st.success("Login successful!")
+            logger.info("User Logged Successfully")
+            st.rerun()
+        else:
+            st.error("Invalid username or password.")
+    st.markdown("</div>", unsafe_allow_html=True)
+else:
+    # Main app (Chat Interface)
+    st.title("Chatbot - Smarter Network Planning for the Future")
+    st.sidebar.header("Upload Documents")
+    uploaded_file = st.sidebar.file_uploader("Upload PDF, Excel, Docx, or Txt", type=["pdf", "xlsx", "docx", "txt", "csv"])
+    document_name = st.sidebar.text_input("Document Name", "")
+    document_description = st.sidebar.text_area("Document Description", "")
+    if st.sidebar.button("Process File"):
+        if uploaded_file:
+            with st.spinner("Processing your file..."):
+                response = file_handler.handle_file_upload(
+                    file=uploaded_file,
+                    document_name=document_name,
+                    document_description=document_description,
+                )
+                st.sidebar.success(f"File processed: {response['message']}")
+        else:
+            st.sidebar.warning("Please upload a file before processing.")
+    # Chat Interface
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = []
+    # Display chat messages from history
+    for message in st.session_state["messages"]:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Accept user input
+    if prompt := st.chat_input("Type your question here..."):
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        st.session_state["messages"].append({"role": "user", "content": prompt})
+        with st.spinner("Processing your question..."):
+            response = chat_handler.answer_question(prompt)
+        with st.chat_message("assistant"):
+            st.markdown(response)
+        st.session_state["messages"].append({"role": "assistant", "content": response})
+# Logout button
+if st.session_state["logged_in"]:
+    if st.sidebar.button("Logout"):
+        st.session_state["logged_in"] = False
+        st.rerun()

chat_handler.py CHANGED Viewed

@@ -1,26 +1,19 @@
 import os
-from langchain_community.vectorstores import FAISS
-from langchain_openai import ChatOpenAI
 from langchain_huggingface import HuggingFaceEmbeddings
 from groq import Groq
-import requests
 class ChatHandler:
-    def __init__(self, vector_db_path,api_token,open_api_key,grok_api_token):
-        self.vector_db_path = vector_db_path
         self.groq_client = Groq(api_key=grok_api_token)
         # Initialize the embedding model using Hugging Face
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={"token": api_token},
         )
-        self.llm = ChatOpenAI(
-            model_name="gpt-4",
-            api_key=open_api_key,
-            max_tokens=500,
-            temperature=0.2,
-        )
     def _query_groq_model(self, prompt):
         """
         Query Groq's Llama model using the SDK.
@@ -33,29 +26,38 @@ class ChatHandler:
             # Return the assistant's response
             return chat_completion.choices[0].message.content
         except Exception as e:
             return f"Error querying Groq API: {e}"
     def answer_question(self, question):
         # Generate embedding for the question
         responses = []
-        for root, dirs, files in os.walk(self.vector_db_path):
-            for dir in dirs:
-                index_path = os.path.join(root, dir, "index.faiss")
-                if os.path.exists(index_path):
-                    vector_store = FAISS.load_local(
-                        os.path.join(root, dir), self.embeddings, allow_dangerous_deserialization=True
-                    )
-                    response_with_scores = vector_store.similarity_search_with_relevance_scores(question, k=100)
-                    filtered_responses = [doc.page_content for doc, score in response_with_scores]
-                    responses.extend(filtered_responses)
         if responses:
-            prompt = self._generate_prompt(question, responses)
-            # response = self.llm.invoke(prompt)
-            # if hasattr(response, "content"):
-            #     return response.content.strip()  # Ensure clean output
-            # else:
-            #     return "Error: 'content' attribute not found in the AI's response."
             response = self._query_groq_model(prompt)
             return response
@@ -68,7 +70,10 @@ class ChatHandler:
         and answer questions effectively using the provided documents.
         """
         context = "\n".join(
-            [f"Document {i + 1}:\n{doc.strip()}" for i, doc in enumerate(documents[:5])]
         )
         prompt = f"""

 import os
 from langchain_huggingface import HuggingFaceEmbeddings
 from groq import Groq
+from pymilvus import connections, Collection
 class ChatHandler:
+    def __init__(self,api_token,grok_api_token,logger):
+        self.logger = logger
+        self.logger.info("Initializing ChatHandler...")
         self.groq_client = Groq(api_key=grok_api_token)
         # Initialize the embedding model using Hugging Face
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
             model_kwargs={"token": api_token},
         )
     def _query_groq_model(self, prompt):
         """
         Query Groq's Llama model using the SDK.
             # Return the assistant's response
             return chat_completion.choices[0].message.content
         except Exception as e:
+            self.logger.error(f"Error querying Groq API: {e}")
             return f"Error querying Groq API: {e}"
     def answer_question(self, question):
         # Generate embedding for the question
+        self.logger.info(f"Received question: {question}")
+        collections = connections._fetch_handler().list_collections()
         responses = []
+        for collection_name in collections:
+            collection = Collection(name=collection_name)
+            embeddings = self.embeddings.embed_query(question)
+            search_params = {
+                "metric_type": "IP",
+                "params": {"nprobe": 10},
+            }
+            results = collection.search(
+                data=[embeddings],
+                anns_field="embedding",
+                param=search_params,
+                limit=5,
+            )
+            # Extract the embeddings or metadata (if needed)
+            for res in results[0]:
+                # Store the ID or use res.distance if needed for similarity score
+                responses.append({"id": res.id, "distance": res.distance,"content":res.entity})
         if responses:
+            sorted_responses = sorted(responses, key=lambda x: x["distance"], reverse=True)
+            prompt = self._generate_prompt(question, sorted_responses[:5])
             response = self._query_groq_model(prompt)
             return response
         and answer questions effectively using the provided documents.
         """
         context = "\n".join(
+            [
+                f"Document {i + 1}:\nID: {doc['id']}\nSimilarity: {doc['distance']:.4f}\nContent: {doc['content']}"
+                for i, doc in enumerate(documents[:5])
+            ]
         )
         prompt = f"""

file_handler.py CHANGED Viewed

@@ -1,16 +1,17 @@
 import os
 import hashlib
 import io
-import json
 import pandas as pd
-from langchain_community.vectorstores import FAISS
 from PyPDF2 import PdfReader
 from docx import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 class FileHandler:
-    def __init__(self, vector_db_path,api_token):
-        self.vector_db_path = vector_db_path
         # Initialize the embedding model using Hugging Face
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
@@ -21,12 +22,11 @@ class FileHandler:
         try:
             content = file.read()
             file_hash = hashlib.md5(content).hexdigest()
-            file_key = f"{file.name}_{file_hash}"
-            vector_store_dir = os.path.join(self.vector_db_path, file_key)
-            os.makedirs(vector_store_dir, exist_ok=True)
-            vector_store_path = os.path.join(vector_store_dir, "index.faiss")
-            if os.path.exists(vector_store_path):
                 return {"message": "File already processed."}
             # Process file based on type
@@ -41,30 +41,63 @@ class FileHandler:
             elif file.name.endswith(".csv"):
                 texts, metadatas = self.load_and_split_csv(content)
             else:
                 raise ValueError("Unsupported file format.")
             if not texts:
                 return {"message": "No text extracted from the file. Check the file content."}
-            # Create FAISS vector store using LangChain's from_texts method
-            vector_store = FAISS.from_texts(texts, embedding=self.embeddings, metadatas=metadatas)
-            vector_store.save_local(vector_store_dir)
-            metadata = {
-                "filename": file.name,
-                "document_name": document_name,
-                "document_description": document_description,
-                "file_size": len(content),
-            }
-            metadata_path = os.path.join(vector_store_dir, "metadata.json")
-            with open(metadata_path, 'w') as md_file:
-                json.dump(metadata, md_file)
             return {"message": "File processed successfully."}
         except Exception as e:
             return {"message": f"Error processing file: {str(e)}"}
     def load_and_split_pdf(self, file):
         reader = PdfReader(file)
         texts = []

 import os
 import hashlib
 import io
 import pandas as pd
 from PyPDF2 import PdfReader
 from docx import Document
 from langchain_huggingface import HuggingFaceEmbeddings
+from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
+import json
 class FileHandler:
+    def __init__(self,api_token,logger):
+        self.logger = logger
+        self.logger.info("Initializing FileHandler...")
         # Initialize the embedding model using Hugging Face
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
         try:
             content = file.read()
             file_hash = hashlib.md5(content).hexdigest()
+            collection_name = f"collection_{file_hash}"
+            # Check if the collection exists
+            if connections._fetch_handler().has_collection(collection_name):
+                self.logger.info(f"Collection '{collection_name}' already exists.")
                 return {"message": "File already processed."}
             # Process file based on type
             elif file.name.endswith(".csv"):
                 texts, metadatas = self.load_and_split_csv(content)
             else:
+                self.logger.info("Unsupported file format.")
                 raise ValueError("Unsupported file format.")
             if not texts:
                 return {"message": "No text extracted from the file. Check the file content."}
+            # self._store_vectors(collection_name, texts, metadatas)
+            filename = file.name
+            filelen = len(content)
+            self._store_vectors(collection_name, texts, metadatas, document_name, document_description,filename,filelen)
+            self.logger.info(f"File processed successfully. Collection name: {collection_name}")
             return {"message": "File processed successfully."}
         except Exception as e:
+            self.logger.error(f"Error processing file: {str(e)}")
             return {"message": f"Error processing file: {str(e)}"}
+    def _store_vectors(self, collection_name, texts, metadatas, document_name, document_description,file_name,file_len):
+        fields = [
+            FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
+            FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=384),
+            FieldSchema(name="file_name_hash", dtype=DataType.INT64),  # Hash of file name
+            FieldSchema(name="document_name_hash", dtype=DataType.INT64),  # Hash of document name
+            FieldSchema(name="document_description_hash", dtype=DataType.INT64),  # Hash of document description
+            FieldSchema(name="file_meta_hash", dtype=DataType.INT64),
+            FieldSchema(name="file_size", dtype=DataType.INT64),
+        ]
+        schema = CollectionSchema(fields, description="Document embeddings with metadata")
+        collection = Collection(name=collection_name, schema=schema)
+        # Generate embeddings
+        embeddings = [self.embeddings.embed_query(text) for text in texts]
+        # Convert metadata to hashed values
+        file_name_hash = int(hashlib.md5(file_name.encode('utf-8')).hexdigest(), 16) % (10 ** 12)
+        document_name_hash = int(hashlib.md5((document_name or "Unknown Document").encode('utf-8')).hexdigest(), 16) % (
+                    10 ** 12)
+        document_description_hash = int(
+            hashlib.md5((document_description or "No Description Provided").encode('utf-8')).hexdigest(), 16) % (
+                                                10 ** 12)
+        # Convert metadata list to JSON string and hash it
+        metadata_string = json.dumps(metadatas, ensure_ascii=False)
+        file_meta_hash = int(hashlib.md5(metadata_string.encode('utf-8')).hexdigest(), 16) % (10 ** 12)
+        # Prepare data for insertion
+        data = [
+            embeddings,
+            [file_name_hash] * len(embeddings),
+            [document_name_hash] * len(embeddings),
+            [document_description_hash] * len(embeddings),
+            [file_meta_hash] * len(embeddings),
+            [file_len or 0] * len(embeddings),
+        ]
+        # Insert data into collection
+        collection.insert(data)
+        collection.load()
     def load_and_split_pdf(self, file):
         reader = PdfReader(file)
         texts = []

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ