Spaces:
Sleeping
Sleeping
File size: 11,819 Bytes
1112aa6 d657a45 c6d1e68 d657a45 8656f6b d657a45 a76ab68 d657a45 3b5ec02 d657a45 2881a49 d657a45 2881a49 d657a45 3b5ec02 d657a45 a76ab68 b0b96fa d657a45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
import os
import streamlit as st
import PyPDF2
import subprocess
import openai
from openai import OpenAI
from langchain_openai import ChatOpenAI
from io import BytesIO
from typing import List, Dict
from dotenv import load_dotenv
# Load environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API")
TOKEN=os.getenv('HF_TOKEN')
subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
st.sidebar.title("Welcome to MBAL Chatbot")
class PDFChatbot:
def __init__(self):
# Initialize Azure OpenAI client
# self.azure_client = AzureOpenAI(
# api_key=os.getenv("AZURE_OPENAI_KEY"),
# api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
# azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
# )
self.azure_client = openai.OpenAI()
# Model name for your deployment
# self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4")
# self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY)
# Store conversation history
self.conversation_history = []
self.pdf_content = ""
def extract_text_from_pdf(self, pdf_file):
"""Extract text content from uploaded PDF file."""
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() + "\n"
return text.strip()
except Exception as e:
st.error(f"Error reading PDF: {str(e)}")
return None
def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]:
"""Split text into smaller chunks for better processing."""
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) + 1 > chunk_size:
if current_chunk:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
current_length += len(word) + 1
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str:
"""Get the most relevant chunks for the query (simple keyword matching)."""
# Simple keyword-based relevance scoring
query_words = set(query.lower().split())
chunk_scores = []
for i, chunk in enumerate(chunks):
chunk_words = set(chunk.lower().split())
# Calculate simple overlap score
overlap = len(query_words.intersection(chunk_words))
chunk_scores.append((i, overlap, chunk))
# Sort by relevance score and take top chunks
chunk_scores.sort(key=lambda x: x[1], reverse=True)
relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]]
return "\n\n".join(relevant_chunks)
def chat_with_pdf(self, user_question: str, pdf_content: str) -> str:
"""Generate response using Azure OpenAI based on PDF content and user question."""
try:
# Split PDF content into chunks
chunks = self.chunk_text(pdf_content)
# Get relevant context for the question
relevant_context = self.get_relevant_context(user_question, chunks)
# Prepare messages for the chat
# messages = [
# {
# "role": "system",
# "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
# 1. Only provide information based on the PDF content provided
# 2. If the answer is not in the PDF, clearly state that the information is not available in the document
# 3. Provide clear, concise, and helpful responses in a professional manner
# 4. Always respond in English using proper grammar and formatting
# 5. When possible, reference specific sections or clauses from the policy
# 6. Use insurance terminology appropriately but explain complex terms when necessary
# 7. Be empathetic and patient, as insurance can be confusing for customers
# 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
# 9. Always prioritize customer understanding and satisfaction
# 10. If multiple interpretations are possible, explain the different scenarios clearly
# Remember: You are here to help customers understand their insurance coverage better."""
# },
# {
# "role": "user",
# "content": f"""Insurance Document Content:
# {relevant_context}
# Customer Question: {user_question}
# Please provide a helpful response based on the insurance document content above."""
# }
# ]
messages = [
{
"role": "system",
"content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
1. Only provide information based on the PDF content provided
2. If the answer is not in the PDF, clearly state that the information is not available in the document
3. Provide clear, concise, and helpful responses in a professional manner
4. Always respond in Vietnamese using proper grammar and formatting
5. When possible, reference specific sections or clauses from the policy
6. Use insurance terminology appropriately but explain complex terms when necessary
7. Be empathetic and patient, as insurance can be confusing for customers
8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
9. Always prioritize customer understanding and satisfaction
10. If multiple interpretations are possible, explain the different scenarios clearly
Remember: You are here to help customers understand their insurance coverage better."""
},
{
"role": "user",
"content": f"""Insurance Document Content:
{relevant_context}
Customer Question: {user_question}
Please provide a helpful response based on the insurance document content above."""
}
]
# Add conversation history
for msg in self.conversation_history[-6:]: # Keep last 6 messages for context
messages.append(msg)
# Get response from Azure OpenAI
response = self.azure_client.chat.completions.create(
model="gpt-3.5-turbo-0125",
messages=messages,
max_tokens=1000,
temperature=0.7
)
bot_response = response.choices[0].message.content
# Update conversation history
self.conversation_history.append({"role": "user", "content": user_question})
self.conversation_history.append({"role": "assistant", "content": bot_response})
return bot_response
except Exception as e:
return f"Error generating response: {str(e)}"
def main():
# st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide")
st.title("🛡️ Insurance Policy Assistant")
st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!")
# Initialize chatbot
if 'chatbot' not in st.session_state:
st.session_state.chatbot = PDFChatbot()
st.session_state.pdf_processed = False
st.session_state.chat_history = []
# Sidebar for PDF upload and settings
with st.sidebar:
st.header("📁 Upload Insurance Document")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
if st.button("Process PDF"):
with st.spinner("Processing your insurance document..."):
# Extract text from PDF
text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file)
if text_content:
st.session_state.chatbot.pdf_content = text_content
st.session_state.pdf_processed = True
st.success("Insurance document processed successfully!")
# Show PDF summary
st.subheader("Document Preview")
st.text_area(
"First 500 characters:",
text_content[:500] + "..." if len(text_content) > 500 else text_content,
height=100
)
else:
st.error("Failed to process PDF")
# Clear conversation
if st.button("Clear Conversation"):
st.session_state.chatbot.conversation_history = []
st.session_state.chat_history = []
st.rerun()
# Main chat interface
if st.session_state.pdf_processed:
st.header("💬 Ask About Your Insurance Policy")
# Display chat history
for i, (question, answer) in enumerate(st.session_state.chat_history):
with st.container():
st.markdown(f"**You:** {question}")
st.markdown(f"**Insurance Assistant:** {answer}")
st.divider()
# Chat input
user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...")
if user_question:
with st.spinner("Analyzing your policy..."):
# Get response from chatbot
response = st.session_state.chatbot.chat_with_pdf(
user_question,
st.session_state.chatbot.pdf_content
)
# Add to chat history
st.session_state.chat_history.append((user_question, response))
# Display the new response
st.markdown(f"**You:** {user_question}")
st.markdown(f"**Insurance Assistant:** {response}")
else:
st.info("👆 Please upload and process an insurance PDF document to start chatting!")
# Show example questions
st.subheader("Example questions you can ask:")
st.markdown("""
- What is my coverage limit for property damage?
- What is my deductible amount?
- What types of incidents are covered under this policy?
- What is excluded from my coverage?
- How do I file a claim?
- What is the process for claim settlement?
- What are my premium payment options?
- When does my policy expire?
- Is flood damage covered?
- What documentation do I need for a claim?
""")
# Add insurance tips
st.subheader("💡 Insurance Tips")
st.markdown("""
- Review your policy regularly to understand your coverage
- Keep your policy documents in a safe place
- Update your coverage when your circumstances change
- Document any incidents immediately
- Contact your insurance agent if you have questions
""")
if __name__ == "__main__":
main() |