File size: 11,819 Bytes
1112aa6
d657a45
 
c6d1e68
d657a45
8656f6b
 
d657a45
 
 
 
 
a76ab68
 
d657a45
 
 
 
 
 
 
 
 
 
 
 
3b5ec02
d657a45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2881a49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d657a45
 
 
 
 
 
 
2881a49
d657a45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b5ec02
d657a45
 
 
 
 
 
 
 
 
 
 
a76ab68
b0b96fa
d657a45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import streamlit as st
import PyPDF2
import subprocess
import openai
from openai import OpenAI
from langchain_openai import ChatOpenAI
from io import BytesIO
from typing import List, Dict
from dotenv import load_dotenv
# Load environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API")
TOKEN=os.getenv('HF_TOKEN')
subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
st.sidebar.title("Welcome to MBAL Chatbot")
class PDFChatbot:
   def __init__(self):
       # Initialize Azure OpenAI client
       # self.azure_client = AzureOpenAI(
       #     api_key=os.getenv("AZURE_OPENAI_KEY"),
       #     api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
       #     azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
       # )
       self.azure_client = openai.OpenAI()
       # Model name for your deployment
       # self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4")
       # self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY)

       # Store conversation history
       self.conversation_history = []
       self.pdf_content = ""
   def extract_text_from_pdf(self, pdf_file):
       """Extract text content from uploaded PDF file."""
       try:
           pdf_reader = PyPDF2.PdfReader(pdf_file)
           text = ""
           for page_num in range(len(pdf_reader.pages)):
               page = pdf_reader.pages[page_num]
               text += page.extract_text() + "\n"
           return text.strip()
       except Exception as e:
           st.error(f"Error reading PDF: {str(e)}")
           return None
   def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]:
       """Split text into smaller chunks for better processing."""
       words = text.split()
       chunks = []
       current_chunk = []
       current_length = 0
       for word in words:
           if current_length + len(word) + 1 > chunk_size:
               if current_chunk:
                   chunks.append(" ".join(current_chunk))
                   current_chunk = [word]
                   current_length = len(word)
           else:
               current_chunk.append(word)
               current_length += len(word) + 1
       if current_chunk:
           chunks.append(" ".join(current_chunk))
       return chunks
   def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str:
       """Get the most relevant chunks for the query (simple keyword matching)."""
       # Simple keyword-based relevance scoring
       query_words = set(query.lower().split())
       chunk_scores = []
       for i, chunk in enumerate(chunks):
           chunk_words = set(chunk.lower().split())
           # Calculate simple overlap score
           overlap = len(query_words.intersection(chunk_words))
           chunk_scores.append((i, overlap, chunk))
       # Sort by relevance score and take top chunks
       chunk_scores.sort(key=lambda x: x[1], reverse=True)
       relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]]
       return "\n\n".join(relevant_chunks)
   def chat_with_pdf(self, user_question: str, pdf_content: str) -> str:
       """Generate response using Azure OpenAI based on PDF content and user question."""
       try:
           # Split PDF content into chunks
           chunks = self.chunk_text(pdf_content)
           # Get relevant context for the question
           relevant_context = self.get_relevant_context(user_question, chunks)
           # Prepare messages for the chat
#            messages = [
#                {
#                    "role": "system",
#                    "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
#                    1. Only provide information based on the PDF content provided
#                    2. If the answer is not in the PDF, clearly state that the information is not available in the document
#                    3. Provide clear, concise, and helpful responses in a professional manner
#                    4. Always respond in English using proper grammar and formatting
#                    5. When possible, reference specific sections or clauses from the policy
#                    6. Use insurance terminology appropriately but explain complex terms when necessary
#                    7. Be empathetic and patient, as insurance can be confusing for customers
#                    8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
#                    9. Always prioritize customer understanding and satisfaction
#                    10. If multiple interpretations are possible, explain the different scenarios clearly
#                    Remember: You are here to help customers understand their insurance coverage better."""
#                },
#                {
#                    "role": "user",
#                    "content": f"""Insurance Document Content:
# {relevant_context}
# Customer Question: {user_question}
# Please provide a helpful response based on the insurance document content above."""
#                }
#            ]
           messages = [
               {
                   "role": "system",
                   "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
                   1. Only provide information based on the PDF content provided
                   2. If the answer is not in the PDF, clearly state that the information is not available in the document
                   3. Provide clear, concise, and helpful responses in a professional manner
                   4. Always respond in Vietnamese using proper grammar and formatting
                   5. When possible, reference specific sections or clauses from the policy
                   6. Use insurance terminology appropriately but explain complex terms when necessary
                   7. Be empathetic and patient, as insurance can be confusing for customers
                   8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
                   9. Always prioritize customer understanding and satisfaction
                   10. If multiple interpretations are possible, explain the different scenarios clearly
                   Remember: You are here to help customers understand their insurance coverage better."""
               },
               {
                   "role": "user",
                   "content": f"""Insurance Document Content:
{relevant_context}
Customer Question: {user_question}
Please provide a helpful response based on the insurance document content above."""
               }
           ]
           # Add conversation history
           for msg in self.conversation_history[-6:]:  # Keep last 6 messages for context
               messages.append(msg)
           # Get response from Azure OpenAI
           response = self.azure_client.chat.completions.create(
               model="gpt-3.5-turbo-0125",
               messages=messages,
               max_tokens=1000,
               temperature=0.7
           )
           bot_response = response.choices[0].message.content
           # Update conversation history
           self.conversation_history.append({"role": "user", "content": user_question})
           self.conversation_history.append({"role": "assistant", "content": bot_response})
           return bot_response
       except Exception as e:
           return f"Error generating response: {str(e)}"
def main():
   # st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide")
   st.title("🛡️ Insurance Policy Assistant")
   st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!")
   # Initialize chatbot
   if 'chatbot' not in st.session_state:
       st.session_state.chatbot = PDFChatbot()
       st.session_state.pdf_processed = False
       st.session_state.chat_history = []
   # Sidebar for PDF upload and settings
   with st.sidebar:
       st.header("📁 Upload Insurance Document")
       uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
       if uploaded_file is not None:
           if st.button("Process PDF"):
               with st.spinner("Processing your insurance document..."):
                   # Extract text from PDF
                   text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file)
                   if text_content:
                       st.session_state.chatbot.pdf_content = text_content
                       st.session_state.pdf_processed = True
                       st.success("Insurance document processed successfully!")
                       # Show PDF summary
                       st.subheader("Document Preview")
                       st.text_area(
                           "First 500 characters:",
                           text_content[:500] + "..." if len(text_content) > 500 else text_content,
                           height=100
                       )
                   else:
                       st.error("Failed to process PDF")
       # Clear conversation
       if st.button("Clear Conversation"):
           st.session_state.chatbot.conversation_history = []
           st.session_state.chat_history = []
           st.rerun()
   # Main chat interface
   if st.session_state.pdf_processed:
       st.header("💬 Ask About Your Insurance Policy")
       # Display chat history
       for i, (question, answer) in enumerate(st.session_state.chat_history):
           with st.container():
               st.markdown(f"**You:** {question}")
               st.markdown(f"**Insurance Assistant:** {answer}")
               st.divider()
       # Chat input
       user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...")
       if user_question:
           with st.spinner("Analyzing your policy..."):
               # Get response from chatbot
               response = st.session_state.chatbot.chat_with_pdf(
                   user_question,
                   st.session_state.chatbot.pdf_content
               )
               # Add to chat history
               st.session_state.chat_history.append((user_question, response))
               # Display the new response
               st.markdown(f"**You:** {user_question}")
               st.markdown(f"**Insurance Assistant:** {response}")
   else:
       st.info("👆 Please upload and process an insurance PDF document to start chatting!")
       # Show example questions
       st.subheader("Example questions you can ask:")
       st.markdown("""
       - What is my coverage limit for property damage?
       - What is my deductible amount?
       - What types of incidents are covered under this policy?
       - What is excluded from my coverage?
       - How do I file a claim?
       - What is the process for claim settlement?
       - What are my premium payment options?
       - When does my policy expire?
       - Is flood damage covered?
       - What documentation do I need for a claim?
       """)
       # Add insurance tips
       st.subheader("💡 Insurance Tips")
       st.markdown("""
       - Review your policy regularly to understand your coverage
       - Keep your policy documents in a safe place
       - Update your coverage when your circumstances change
       - Document any incidents immediately
       - Contact your insurance agent if you have questions
       """)
if __name__ == "__main__":
   main()