rajeshthangaraj1 commited on
Commit
6d55408
·
verified ·
1 Parent(s): 4112ebb

main files

Browse files
Files changed (5) hide show
  1. .env +4 -0
  2. app.py +130 -0
  3. chat_handler.py +121 -0
  4. file_handler.py +123 -0
  5. requirements.txt +0 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ OPENAI_API_KEY=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
2
+ VECTOR_DB_PATH_DB=D:\\rajesh\\python\\doge_hackathon\\vectordb\\openai_dbstore\\db
3
+ LOG_PATH=D:\\rajesh\\python\\doge_hackathon\\logs\\
4
+ GROK_API_KEY=xai-mrBds1WpANksRr9CA8k57BGUiWGF8spR0STmgKo9iWTAvmW62K0WulQ1CUKiP1sRMhOg0a6IVr7aOB8t
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from file_handler import FileHandler
5
+ from chat_handler import ChatHandler
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+ # Static credentials
11
+ USERNAME = st.secrets["USERNAME"]
12
+ PASSWORD = st.secrets["PASSWORD"]
13
+
14
+ # Initialize Handlers
15
+ VECTOR_DB_PATH = st.secrets["VECTOR_DB_PATH_DB"]
16
+ OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
17
+ HUGGINGFACE_API_TOKEN = st.secrets["HUGGINGFACE_API_TOKEN"]
18
+ GROQ_API_KEY_TOKEN = st.secrets["GROQ_API_KEY"]
19
+
20
+ os.makedirs(VECTOR_DB_PATH, exist_ok=True)
21
+
22
+ file_handler = FileHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN)
23
+ chat_handler = ChatHandler(VECTOR_DB_PATH, HUGGINGFACE_API_TOKEN, OPENAI_API_KEY,GROQ_API_KEY_TOKEN)
24
+
25
+ # Streamlit UI
26
+ st.set_page_config(layout="wide", page_title="AI Connect - Smarter Network Planning for the Future")
27
+
28
+ # Session state to track login status
29
+ if "logged_in" not in st.session_state:
30
+ st.session_state["logged_in"] = False
31
+
32
+ # Login page
33
+ # Refined Login Page
34
+ if not st.session_state["logged_in"]:
35
+ # Customize page title
36
+ st.markdown(
37
+ """
38
+ <style>
39
+ .title {
40
+ font-size: 2.5rem;
41
+ color: #1f77b4;
42
+ font-weight: bold;
43
+ text-align: center;
44
+ margin-bottom: 10px;
45
+ }
46
+ .subtitle {
47
+ font-size: 1.2rem;
48
+ color: #555;
49
+ text-align: center;
50
+ margin-bottom: 20px;
51
+ }
52
+ .login-box {
53
+ margin: auto;
54
+ width: 50%;
55
+ padding: 20px;
56
+ background: #f9f9f9;
57
+ border: 1px solid #ddd;
58
+ border-radius: 10px;
59
+ }
60
+ .login-box input {
61
+ margin-bottom: 10px;
62
+ }
63
+ </style>
64
+ <div>
65
+ <div class="title">Welcome to AI Connect</div>
66
+ <div class="subtitle">Smarter Network Planning for the Future</div>
67
+ </div>
68
+ """,
69
+ unsafe_allow_html=True,
70
+ )
71
+
72
+ # Centered Login Box
73
+ # st.markdown('<div class="login-box">', unsafe_allow_html=True)
74
+ st.subheader("Login to Continue")
75
+ username = st.text_input("Username")
76
+ password = st.text_input("Password", type="password")
77
+ if st.button("Login"):
78
+ if username == USERNAME and password == PASSWORD:
79
+ st.session_state["logged_in"] = True
80
+ st.success("Login successful!")
81
+ st.rerun()
82
+ else:
83
+ st.error("Invalid username or password.")
84
+ st.markdown("</div>", unsafe_allow_html=True)
85
+ else:
86
+ # Main app (Chat Interface)
87
+ st.title("Chatbot - Smarter Network Planning for the Future")
88
+ st.sidebar.header("Upload Documents")
89
+ uploaded_file = st.sidebar.file_uploader("Upload PDF, Excel, Docx, or Txt", type=["pdf", "xlsx", "docx", "txt", "csv"])
90
+ document_name = st.sidebar.text_input("Document Name", "")
91
+ document_description = st.sidebar.text_area("Document Description", "")
92
+
93
+ if st.sidebar.button("Process File"):
94
+ if uploaded_file:
95
+ with st.spinner("Processing your file..."):
96
+ response = file_handler.handle_file_upload(
97
+ file=uploaded_file,
98
+ document_name=document_name,
99
+ document_description=document_description,
100
+ )
101
+ st.sidebar.success(f"File processed: {response['message']}")
102
+ else:
103
+ st.sidebar.warning("Please upload a file before processing.")
104
+
105
+ # Chat Interface
106
+ if "messages" not in st.session_state:
107
+ st.session_state["messages"] = []
108
+
109
+ # Display chat messages from history
110
+ for message in st.session_state["messages"]:
111
+ with st.chat_message(message["role"]):
112
+ st.markdown(message["content"])
113
+
114
+ # Accept user input
115
+ if prompt := st.chat_input("Type your question here..."):
116
+ with st.chat_message("user"):
117
+ st.markdown(prompt)
118
+ st.session_state["messages"].append({"role": "user", "content": prompt})
119
+
120
+ with st.spinner("Processing your question..."):
121
+ response = chat_handler.answer_question(prompt)
122
+ with st.chat_message("assistant"):
123
+ st.markdown(response)
124
+ st.session_state["messages"].append({"role": "assistant", "content": response})
125
+
126
+ # Logout button
127
+ if st.session_state["logged_in"]:
128
+ if st.sidebar.button("Logout"):
129
+ st.session_state["logged_in"] = False
130
+ st.rerun()
chat_handler.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from groq import Groq
6
+ import requests
7
+
8
+
9
+ class ChatHandler:
10
+ def __init__(self, vector_db_path,api_token,open_api_key,grok_api_token):
11
+ self.vector_db_path = vector_db_path
12
+ self.groq_client = Groq(api_key=grok_api_token)
13
+ # Initialize the embedding model using Hugging Face
14
+ self.embeddings = HuggingFaceEmbeddings(
15
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
16
+ model_kwargs={"token": api_token},
17
+ )
18
+ self.llm = ChatOpenAI(
19
+ model_name="gpt-4",
20
+ api_key=open_api_key,
21
+ max_tokens=500,
22
+ temperature=0.2,
23
+ )
24
+ def _query_groq_model(self, prompt):
25
+ """
26
+ Query Groq's Llama model using the SDK.
27
+ """
28
+ try:
29
+ chat_completion = self.groq_client.chat.completions.create(
30
+ messages=[{"role": "user", "content": prompt}],
31
+ model="llama-3.1-8b-instant", # Ensure the model name is correct
32
+ )
33
+ # Return the assistant's response
34
+ return chat_completion.choices[0].message.content
35
+ except Exception as e:
36
+ return f"Error querying Groq API: {e}"
37
+
38
+ def answer_question(self, question):
39
+ # Generate embedding for the question
40
+ responses = []
41
+ for root, dirs, files in os.walk(self.vector_db_path):
42
+ for dir in dirs:
43
+ index_path = os.path.join(root, dir, "index.faiss")
44
+ if os.path.exists(index_path):
45
+ vector_store = FAISS.load_local(
46
+ os.path.join(root, dir), self.embeddings, allow_dangerous_deserialization=True
47
+ )
48
+ response_with_scores = vector_store.similarity_search_with_relevance_scores(question, k=100)
49
+ filtered_responses = [doc.page_content for doc, score in response_with_scores]
50
+ responses.extend(filtered_responses)
51
+
52
+ if responses:
53
+ prompt = self._generate_prompt(question, responses)
54
+ # response = self.llm.invoke(prompt)
55
+ # if hasattr(response, "content"):
56
+ # return response.content.strip() # Ensure clean output
57
+ # else:
58
+ # return "Error: 'content' attribute not found in the AI's response."
59
+ response = self._query_groq_model(prompt)
60
+ return response
61
+
62
+
63
+ return "No relevant documents found or context is insufficient to answer your question."
64
+
65
+ def _generate_prompt(self, question, documents):
66
+ """
67
+ Generate a structured prompt tailored to analyze government energy consumption data
68
+ and answer questions effectively using the provided documents.
69
+ """
70
+ context = "\n".join(
71
+ [f"Document {i + 1}:\n{doc.strip()}" for i, doc in enumerate(documents[:5])]
72
+ )
73
+
74
+ prompt = f"""
75
+ You are an advanced AI assistant with expertise in 5G network optimization, deployment strategies,
76
+ and resource allocation. Your role is to analyze network datasets to identify inefficiencies,
77
+ propose actionable deployment and optimization strategies, and quantify potential improvements.
78
+
79
+ ### Data Provided:
80
+ The following documents contain detailed information about 5G network deployment, resource utilization,
81
+ and operational metrics:
82
+ {context}
83
+
84
+ ### Question:
85
+ {question}
86
+
87
+ ### Instructions:
88
+ 1. **Highlight Areas of Network Inefficiencies**:
89
+ - Identify inefficiencies such as underutilized network nodes, high latency areas, or
90
+ imbalanced resource allocation.
91
+ - Use data points from the documents to back your observations.
92
+
93
+ 2. **Suggest Strategies for Network Optimization**:
94
+ - Recommend actionable steps such as adjusting network configurations, deploying additional nodes,
95
+ or reallocating bandwidth.
96
+ - Ensure suggestions are feasible and aligned with the provided datasets.
97
+
98
+ 3. **Quantify Cost-Saving and Performance Benefits**:
99
+ - Provide quantitative estimates of potential cost savings from the suggested strategies.
100
+ - Highlight the performance benefits, such as improved latency, higher throughput, or enhanced user experience.
101
+
102
+ 4. **Present the Response Clearly**:
103
+ - Organize your findings in a step-by-step format.
104
+ - Use tables, bullet points, or concise paragraphs for clarity.
105
+
106
+ ### Example Output Format:
107
+ - **Network Inefficiencies Identified**:
108
+ 1. ...
109
+ 2. ...
110
+
111
+ - **Optimization Strategies**:
112
+ 1. ...
113
+ 2. ...
114
+
115
+ - **Cost-Saving and Performance Benefits**:
116
+ - Cost Savings: $...
117
+ - Performance Improvements: ...
118
+
119
+ Please ensure the response is data-driven, actionable, and easy to understand.
120
+ """
121
+ return prompt
file_handler.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ import io
4
+ import json
5
+ import pandas as pd
6
+ from langchain_community.vectorstores import FAISS
7
+ from PyPDF2 import PdfReader
8
+ from docx import Document
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+
11
+ class FileHandler:
12
+ def __init__(self, vector_db_path,api_token):
13
+ self.vector_db_path = vector_db_path
14
+ # Initialize the embedding model using Hugging Face
15
+ self.embeddings = HuggingFaceEmbeddings(
16
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
17
+ model_kwargs={"token": api_token},
18
+ )
19
+
20
+ def handle_file_upload(self, file, document_name, document_description):
21
+ try:
22
+ content = file.read()
23
+ file_hash = hashlib.md5(content).hexdigest()
24
+ file_key = f"{file.name}_{file_hash}"
25
+ vector_store_dir = os.path.join(self.vector_db_path, file_key)
26
+ os.makedirs(vector_store_dir, exist_ok=True)
27
+ vector_store_path = os.path.join(vector_store_dir, "index.faiss")
28
+
29
+ if os.path.exists(vector_store_path):
30
+ return {"message": "File already processed."}
31
+
32
+ # Process file based on type
33
+ if file.name.endswith(".pdf"):
34
+ texts, metadatas = self.load_and_split_pdf(file)
35
+ elif file.name.endswith(".docx"):
36
+ texts, metadatas = self.load_and_split_docx(file)
37
+ elif file.name.endswith(".txt"):
38
+ texts, metadatas = self.load_and_split_txt(content)
39
+ elif file.name.endswith(".xlsx"):
40
+ texts, metadatas = self.load_and_split_table(content)
41
+ elif file.name.endswith(".csv"):
42
+ texts, metadatas = self.load_and_split_csv(content)
43
+ else:
44
+ raise ValueError("Unsupported file format.")
45
+
46
+ if not texts:
47
+ return {"message": "No text extracted from the file. Check the file content."}
48
+
49
+ # Create FAISS vector store using LangChain's from_texts method
50
+ vector_store = FAISS.from_texts(texts, embedding=self.embeddings, metadatas=metadatas)
51
+ vector_store.save_local(vector_store_dir)
52
+
53
+ metadata = {
54
+ "filename": file.name,
55
+ "document_name": document_name,
56
+ "document_description": document_description,
57
+ "file_size": len(content),
58
+ }
59
+ metadata_path = os.path.join(vector_store_dir, "metadata.json")
60
+ with open(metadata_path, 'w') as md_file:
61
+ json.dump(metadata, md_file)
62
+
63
+ return {"message": "File processed successfully."}
64
+ except Exception as e:
65
+ return {"message": f"Error processing file: {str(e)}"}
66
+
67
+
68
+ def load_and_split_pdf(self, file):
69
+ reader = PdfReader(file)
70
+ texts = []
71
+ metadatas = []
72
+ for page_num, page in enumerate(reader.pages):
73
+ text = page.extract_text()
74
+ if text:
75
+ texts.append(text)
76
+ metadatas.append({"page_number": page_num + 1})
77
+ return texts, metadatas
78
+
79
+ def load_and_split_docx(self, file):
80
+ doc = Document(file)
81
+ texts = []
82
+ metadatas = []
83
+ for para_num, paragraph in enumerate(doc.paragraphs):
84
+ if paragraph.text:
85
+ texts.append(paragraph.text)
86
+ metadatas.append({"paragraph_number": para_num + 1})
87
+ return texts, metadatas
88
+
89
+ def load_and_split_txt(self, content):
90
+ text = content.decode("utf-8")
91
+ lines = text.split('\n')
92
+ texts = [line for line in lines if line.strip()]
93
+ metadatas = [{}] * len(texts)
94
+ return texts, metadatas
95
+
96
+ def load_and_split_table(self, content):
97
+ excel_data = pd.read_excel(io.BytesIO(content), sheet_name=None)
98
+ texts = []
99
+ metadatas = []
100
+ for sheet_name, df in excel_data.items():
101
+ df = df.dropna(how='all', axis=0).dropna(how='all', axis=1)
102
+ df = df.fillna('N/A')
103
+ for _, row in df.iterrows():
104
+ row_dict = row.to_dict()
105
+ # Combine key-value pairs into a string
106
+ row_text = ', '.join([f"{key}: {value}" for key, value in row_dict.items()])
107
+ texts.append(row_text)
108
+ metadatas.append({"sheet_name": sheet_name})
109
+ return texts, metadatas
110
+
111
+ def load_and_split_csv(self, content):
112
+ csv_data = pd.read_csv(io.StringIO(content.decode('utf-8')))
113
+ texts = []
114
+ metadatas = []
115
+ csv_data = csv_data.dropna(how='all', axis=0).dropna(how='all', axis=1)
116
+ csv_data = csv_data.fillna('N/A')
117
+ for _, row in csv_data.iterrows():
118
+ row_dict = row.to_dict()
119
+ row_text = ', '.join([f"{key}: {value}" for key, value in row_dict.items()])
120
+ texts.append(row_text)
121
+ metadatas.append({"row_index": _})
122
+ return texts, metadatas
123
+
requirements.txt ADDED
Binary file (4.33 kB). View file