main files
Browse files- .env +4 -0
- app.py +130 -0
- chat_handler.py +121 -0
- file_handler.py +123 -0
- requirements.txt +0 -0
.env
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPENAI_API_KEY=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
|
2 |
+
VECTOR_DB_PATH_DB=D:\\rajesh\\python\\doge_hackathon\\vectordb\\openai_dbstore\\db
|
3 |
+
LOG_PATH=D:\\rajesh\\python\\doge_hackathon\\logs\\
|
4 |
+
GROK_API_KEY=xai-mrBds1WpANksRr9CA8k57BGUiWGF8spR0STmgKo9iWTAvmW62K0WulQ1CUKiP1sRMhOg0a6IVr7aOB8t
|
app.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from file_handler import FileHandler
|
5 |
+
from chat_handler import ChatHandler
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
# Static credentials
|
11 |
+
USERNAME = st.secrets["USERNAME"]
|
12 |
+
PASSWORD = st.secrets["PASSWORD"]
|
13 |
+
|
14 |
+
# Initialize Handlers
|
15 |
+
VECTOR_DB_PATH = st.secrets["VECTOR_DB_PATH_DB"]
|
16 |
+
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
|
17 |
+
HUGGINGFACE_API_TOKEN = st.secrets["HUGGINGFACE_API_TOKEN"]
|
18 |
+
GROQ_API_KEY_TOKEN = st.secrets["GROQ_API_KEY"]
|
19 |
+
|
20 |
+
os.makedirs(VECTOR_DB_PATH, exist_ok=True)
|
21 |
+
|
22 |
+
file_handler = FileHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN)
|
23 |
+
chat_handler = ChatHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN, OPENAI_API_KEY,GROQ_API_KEY_TOKEN)
|
24 |
+
|
25 |
+
# Streamlit UI
|
26 |
+
st.set_page_config(layout="wide", page_title="AI Connect - Smarter Network Planning for the Future")
|
27 |
+
|
28 |
+
# Session state to track login status
|
29 |
+
if "logged_in" not in st.session_state:
|
30 |
+
st.session_state["logged_in"] = False
|
31 |
+
|
32 |
+
# Login page
|
33 |
+
# Refined Login Page
|
34 |
+
if not st.session_state["logged_in"]:
|
35 |
+
# Customize page title
|
36 |
+
st.markdown(
|
37 |
+
"""
|
38 |
+
<style>
|
39 |
+
.title {
|
40 |
+
font-size: 2.5rem;
|
41 |
+
color: #1f77b4;
|
42 |
+
font-weight: bold;
|
43 |
+
text-align: center;
|
44 |
+
margin-bottom: 10px;
|
45 |
+
}
|
46 |
+
.subtitle {
|
47 |
+
font-size: 1.2rem;
|
48 |
+
color: #555;
|
49 |
+
text-align: center;
|
50 |
+
margin-bottom: 20px;
|
51 |
+
}
|
52 |
+
.login-box {
|
53 |
+
margin: auto;
|
54 |
+
width: 50%;
|
55 |
+
padding: 20px;
|
56 |
+
background: #f9f9f9;
|
57 |
+
border: 1px solid #ddd;
|
58 |
+
border-radius: 10px;
|
59 |
+
}
|
60 |
+
.login-box input {
|
61 |
+
margin-bottom: 10px;
|
62 |
+
}
|
63 |
+
</style>
|
64 |
+
<div>
|
65 |
+
<div class="title">Welcome to AI Connect</div>
|
66 |
+
<div class="subtitle">Smarter Network Planning for the Future</div>
|
67 |
+
</div>
|
68 |
+
""",
|
69 |
+
unsafe_allow_html=True,
|
70 |
+
)
|
71 |
+
|
72 |
+
# Centered Login Box
|
73 |
+
# st.markdown('<div class="login-box">', unsafe_allow_html=True)
|
74 |
+
st.subheader("Login to Continue")
|
75 |
+
username = st.text_input("Username")
|
76 |
+
password = st.text_input("Password", type="password")
|
77 |
+
if st.button("Login"):
|
78 |
+
if username == USERNAME and password == PASSWORD:
|
79 |
+
st.session_state["logged_in"] = True
|
80 |
+
st.success("Login successful!")
|
81 |
+
st.rerun()
|
82 |
+
else:
|
83 |
+
st.error("Invalid username or password.")
|
84 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
85 |
+
else:
|
86 |
+
# Main app (Chat Interface)
|
87 |
+
st.title("Chatbot - Smarter Network Planning for the Future")
|
88 |
+
st.sidebar.header("Upload Documents")
|
89 |
+
uploaded_file = st.sidebar.file_uploader("Upload PDF, Excel, Docx, or Txt", type=["pdf", "xlsx", "docx", "txt", "csv"])
|
90 |
+
document_name = st.sidebar.text_input("Document Name", "")
|
91 |
+
document_description = st.sidebar.text_area("Document Description", "")
|
92 |
+
|
93 |
+
if st.sidebar.button("Process File"):
|
94 |
+
if uploaded_file:
|
95 |
+
with st.spinner("Processing your file..."):
|
96 |
+
response = file_handler.handle_file_upload(
|
97 |
+
file=uploaded_file,
|
98 |
+
document_name=document_name,
|
99 |
+
document_description=document_description,
|
100 |
+
)
|
101 |
+
st.sidebar.success(f"File processed: {response['message']}")
|
102 |
+
else:
|
103 |
+
st.sidebar.warning("Please upload a file before processing.")
|
104 |
+
|
105 |
+
# Chat Interface
|
106 |
+
if "messages" not in st.session_state:
|
107 |
+
st.session_state["messages"] = []
|
108 |
+
|
109 |
+
# Display chat messages from history
|
110 |
+
for message in st.session_state["messages"]:
|
111 |
+
with st.chat_message(message["role"]):
|
112 |
+
st.markdown(message["content"])
|
113 |
+
|
114 |
+
# Accept user input
|
115 |
+
if prompt := st.chat_input("Type your question here..."):
|
116 |
+
with st.chat_message("user"):
|
117 |
+
st.markdown(prompt)
|
118 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
119 |
+
|
120 |
+
with st.spinner("Processing your question..."):
|
121 |
+
response = chat_handler.answer_question(prompt)
|
122 |
+
with st.chat_message("assistant"):
|
123 |
+
st.markdown(response)
|
124 |
+
st.session_state["messages"].append({"role": "assistant", "content": response})
|
125 |
+
|
126 |
+
# Logout button
|
127 |
+
if st.session_state["logged_in"]:
|
128 |
+
if st.sidebar.button("Logout"):
|
129 |
+
st.session_state["logged_in"] = False
|
130 |
+
st.rerun()
|
chat_handler.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_community.vectorstores import FAISS
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
5 |
+
from groq import Groq
|
6 |
+
import requests
|
7 |
+
|
8 |
+
|
9 |
+
class ChatHandler:
|
10 |
+
def __init__(self, vector_db_path,api_token,open_api_key,grok_api_token):
|
11 |
+
self.vector_db_path = vector_db_path
|
12 |
+
self.groq_client = Groq(api_key=grok_api_token)
|
13 |
+
# Initialize the embedding model using Hugging Face
|
14 |
+
self.embeddings = HuggingFaceEmbeddings(
|
15 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
16 |
+
model_kwargs={"token": api_token},
|
17 |
+
)
|
18 |
+
self.llm = ChatOpenAI(
|
19 |
+
model_name="gpt-4",
|
20 |
+
api_key=open_api_key,
|
21 |
+
max_tokens=500,
|
22 |
+
temperature=0.2,
|
23 |
+
)
|
24 |
+
def _query_groq_model(self, prompt):
|
25 |
+
"""
|
26 |
+
Query Groq's Llama model using the SDK.
|
27 |
+
"""
|
28 |
+
try:
|
29 |
+
chat_completion = self.groq_client.chat.completions.create(
|
30 |
+
messages=[{"role": "user", "content": prompt}],
|
31 |
+
model="llama-3.1-8b-instant", # Ensure the model name is correct
|
32 |
+
)
|
33 |
+
# Return the assistant's response
|
34 |
+
return chat_completion.choices[0].message.content
|
35 |
+
except Exception as e:
|
36 |
+
return f"Error querying Groq API: {e}"
|
37 |
+
|
38 |
+
def answer_question(self, question):
|
39 |
+
# Generate embedding for the question
|
40 |
+
responses = []
|
41 |
+
for root, dirs, files in os.walk(self.vector_db_path):
|
42 |
+
for dir in dirs:
|
43 |
+
index_path = os.path.join(root, dir, "index.faiss")
|
44 |
+
if os.path.exists(index_path):
|
45 |
+
vector_store = FAISS.load_local(
|
46 |
+
os.path.join(root, dir), self.embeddings, allow_dangerous_deserialization=True
|
47 |
+
)
|
48 |
+
response_with_scores = vector_store.similarity_search_with_relevance_scores(question, k=100)
|
49 |
+
filtered_responses = [doc.page_content for doc, score in response_with_scores]
|
50 |
+
responses.extend(filtered_responses)
|
51 |
+
|
52 |
+
if responses:
|
53 |
+
prompt = self._generate_prompt(question, responses)
|
54 |
+
# response = self.llm.invoke(prompt)
|
55 |
+
# if hasattr(response, "content"):
|
56 |
+
# return response.content.strip() # Ensure clean output
|
57 |
+
# else:
|
58 |
+
# return "Error: 'content' attribute not found in the AI's response."
|
59 |
+
response = self._query_groq_model(prompt)
|
60 |
+
return response
|
61 |
+
|
62 |
+
|
63 |
+
return "No relevant documents found or context is insufficient to answer your question."
|
64 |
+
|
65 |
+
def _generate_prompt(self, question, documents):
|
66 |
+
"""
|
67 |
+
Generate a structured prompt tailored to analyze government energy consumption data
|
68 |
+
and answer questions effectively using the provided documents.
|
69 |
+
"""
|
70 |
+
context = "\n".join(
|
71 |
+
[f"Document {i + 1}:\n{doc.strip()}" for i, doc in enumerate(documents[:5])]
|
72 |
+
)
|
73 |
+
|
74 |
+
prompt = f"""
|
75 |
+
You are an advanced AI assistant with expertise in 5G network optimization, deployment strategies,
|
76 |
+
and resource allocation. Your role is to analyze network datasets to identify inefficiencies,
|
77 |
+
propose actionable deployment and optimization strategies, and quantify potential improvements.
|
78 |
+
|
79 |
+
### Data Provided:
|
80 |
+
The following documents contain detailed information about 5G network deployment, resource utilization,
|
81 |
+
and operational metrics:
|
82 |
+
{context}
|
83 |
+
|
84 |
+
### Question:
|
85 |
+
{question}
|
86 |
+
|
87 |
+
### Instructions:
|
88 |
+
1. **Highlight Areas of Network Inefficiencies**:
|
89 |
+
- Identify inefficiencies such as underutilized network nodes, high latency areas, or
|
90 |
+
imbalanced resource allocation.
|
91 |
+
- Use data points from the documents to back your observations.
|
92 |
+
|
93 |
+
2. **Suggest Strategies for Network Optimization**:
|
94 |
+
- Recommend actionable steps such as adjusting network configurations, deploying additional nodes,
|
95 |
+
or reallocating bandwidth.
|
96 |
+
- Ensure suggestions are feasible and aligned with the provided datasets.
|
97 |
+
|
98 |
+
3. **Quantify Cost-Saving and Performance Benefits**:
|
99 |
+
- Provide quantitative estimates of potential cost savings from the suggested strategies.
|
100 |
+
- Highlight the performance benefits, such as improved latency, higher throughput, or enhanced user experience.
|
101 |
+
|
102 |
+
4. **Present the Response Clearly**:
|
103 |
+
- Organize your findings in a step-by-step format.
|
104 |
+
- Use tables, bullet points, or concise paragraphs for clarity.
|
105 |
+
|
106 |
+
### Example Output Format:
|
107 |
+
- **Network Inefficiencies Identified**:
|
108 |
+
1. ...
|
109 |
+
2. ...
|
110 |
+
|
111 |
+
- **Optimization Strategies**:
|
112 |
+
1. ...
|
113 |
+
2. ...
|
114 |
+
|
115 |
+
- **Cost-Saving and Performance Benefits**:
|
116 |
+
- Cost Savings: $...
|
117 |
+
- Performance Improvements: ...
|
118 |
+
|
119 |
+
Please ensure the response is data-driven, actionable, and easy to understand.
|
120 |
+
"""
|
121 |
+
return prompt
|
file_handler.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import hashlib
|
3 |
+
import io
|
4 |
+
import json
|
5 |
+
import pandas as pd
|
6 |
+
from langchain_community.vectorstores import FAISS
|
7 |
+
from PyPDF2 import PdfReader
|
8 |
+
from docx import Document
|
9 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
10 |
+
|
11 |
+
class FileHandler:
|
12 |
+
def __init__(self, vector_db_path,api_token):
|
13 |
+
self.vector_db_path = vector_db_path
|
14 |
+
# Initialize the embedding model using Hugging Face
|
15 |
+
self.embeddings = HuggingFaceEmbeddings(
|
16 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
17 |
+
model_kwargs={"token": api_token},
|
18 |
+
)
|
19 |
+
|
20 |
+
def handle_file_upload(self, file, document_name, document_description):
|
21 |
+
try:
|
22 |
+
content = file.read()
|
23 |
+
file_hash = hashlib.md5(content).hexdigest()
|
24 |
+
file_key = f"{file.name}_{file_hash}"
|
25 |
+
vector_store_dir = os.path.join(self.vector_db_path, file_key)
|
26 |
+
os.makedirs(vector_store_dir, exist_ok=True)
|
27 |
+
vector_store_path = os.path.join(vector_store_dir, "index.faiss")
|
28 |
+
|
29 |
+
if os.path.exists(vector_store_path):
|
30 |
+
return {"message": "File already processed."}
|
31 |
+
|
32 |
+
# Process file based on type
|
33 |
+
if file.name.endswith(".pdf"):
|
34 |
+
texts, metadatas = self.load_and_split_pdf(file)
|
35 |
+
elif file.name.endswith(".docx"):
|
36 |
+
texts, metadatas = self.load_and_split_docx(file)
|
37 |
+
elif file.name.endswith(".txt"):
|
38 |
+
texts, metadatas = self.load_and_split_txt(content)
|
39 |
+
elif file.name.endswith(".xlsx"):
|
40 |
+
texts, metadatas = self.load_and_split_table(content)
|
41 |
+
elif file.name.endswith(".csv"):
|
42 |
+
texts, metadatas = self.load_and_split_csv(content)
|
43 |
+
else:
|
44 |
+
raise ValueError("Unsupported file format.")
|
45 |
+
|
46 |
+
if not texts:
|
47 |
+
return {"message": "No text extracted from the file. Check the file content."}
|
48 |
+
|
49 |
+
# Create FAISS vector store using LangChain's from_texts method
|
50 |
+
vector_store = FAISS.from_texts(texts, embedding=self.embeddings, metadatas=metadatas)
|
51 |
+
vector_store.save_local(vector_store_dir)
|
52 |
+
|
53 |
+
metadata = {
|
54 |
+
"filename": file.name,
|
55 |
+
"document_name": document_name,
|
56 |
+
"document_description": document_description,
|
57 |
+
"file_size": len(content),
|
58 |
+
}
|
59 |
+
metadata_path = os.path.join(vector_store_dir, "metadata.json")
|
60 |
+
with open(metadata_path, 'w') as md_file:
|
61 |
+
json.dump(metadata, md_file)
|
62 |
+
|
63 |
+
return {"message": "File processed successfully."}
|
64 |
+
except Exception as e:
|
65 |
+
return {"message": f"Error processing file: {str(e)}"}
|
66 |
+
|
67 |
+
|
68 |
+
def load_and_split_pdf(self, file):
|
69 |
+
reader = PdfReader(file)
|
70 |
+
texts = []
|
71 |
+
metadatas = []
|
72 |
+
for page_num, page in enumerate(reader.pages):
|
73 |
+
text = page.extract_text()
|
74 |
+
if text:
|
75 |
+
texts.append(text)
|
76 |
+
metadatas.append({"page_number": page_num + 1})
|
77 |
+
return texts, metadatas
|
78 |
+
|
79 |
+
def load_and_split_docx(self, file):
|
80 |
+
doc = Document(file)
|
81 |
+
texts = []
|
82 |
+
metadatas = []
|
83 |
+
for para_num, paragraph in enumerate(doc.paragraphs):
|
84 |
+
if paragraph.text:
|
85 |
+
texts.append(paragraph.text)
|
86 |
+
metadatas.append({"paragraph_number": para_num + 1})
|
87 |
+
return texts, metadatas
|
88 |
+
|
89 |
+
def load_and_split_txt(self, content):
|
90 |
+
text = content.decode("utf-8")
|
91 |
+
lines = text.split('\n')
|
92 |
+
texts = [line for line in lines if line.strip()]
|
93 |
+
metadatas = [{}] * len(texts)
|
94 |
+
return texts, metadatas
|
95 |
+
|
96 |
+
def load_and_split_table(self, content):
|
97 |
+
excel_data = pd.read_excel(io.BytesIO(content), sheet_name=None)
|
98 |
+
texts = []
|
99 |
+
metadatas = []
|
100 |
+
for sheet_name, df in excel_data.items():
|
101 |
+
df = df.dropna(how='all', axis=0).dropna(how='all', axis=1)
|
102 |
+
df = df.fillna('N/A')
|
103 |
+
for _, row in df.iterrows():
|
104 |
+
row_dict = row.to_dict()
|
105 |
+
# Combine key-value pairs into a string
|
106 |
+
row_text = ', '.join([f"{key}: {value}" for key, value in row_dict.items()])
|
107 |
+
texts.append(row_text)
|
108 |
+
metadatas.append({"sheet_name": sheet_name})
|
109 |
+
return texts, metadatas
|
110 |
+
|
111 |
+
def load_and_split_csv(self, content):
|
112 |
+
csv_data = pd.read_csv(io.StringIO(content.decode('utf-8')))
|
113 |
+
texts = []
|
114 |
+
metadatas = []
|
115 |
+
csv_data = csv_data.dropna(how='all', axis=0).dropna(how='all', axis=1)
|
116 |
+
csv_data = csv_data.fillna('N/A')
|
117 |
+
for _, row in csv_data.iterrows():
|
118 |
+
row_dict = row.to_dict()
|
119 |
+
row_text = ', '.join([f"{key}: {value}" for key, value in row_dict.items()])
|
120 |
+
texts.append(row_text)
|
121 |
+
metadatas.append({"row_index": _})
|
122 |
+
return texts, metadatas
|
123 |
+
|
requirements.txt
ADDED
Binary file (4.33 kB). View file
|
|