MBAL_chatbot / app_abhi.py
ngcanh's picture
Rename app.py to app_abhi.py
49055b0 verified
import os
import streamlit as st
import PyPDF2
import subprocess
import openai
from openai import OpenAI
from langchain_openai import ChatOpenAI
from io import BytesIO
from typing import List, Dict
from dotenv import load_dotenv
# Load environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API")
TOKEN=os.getenv('HF_TOKEN')
subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
st.sidebar.title("Welcome to MBAL Chatbot")
class PDFChatbot:
def __init__(self):
# Initialize Azure OpenAI client
# self.azure_client = AzureOpenAI(
# api_key=os.getenv("AZURE_OPENAI_KEY"),
# api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
# azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
# )
self.azure_client = openai.OpenAI()
# Model name for your deployment
# self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4")
# self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY)
# Store conversation history
self.conversation_history = []
self.pdf_content = ""
def extract_text_from_pdf(self, pdf_file):
"""Extract text content from uploaded PDF file."""
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() + "\n"
return text.strip()
except Exception as e:
st.error(f"Error reading PDF: {str(e)}")
return None
def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]:
"""Split text into smaller chunks for better processing."""
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) + 1 > chunk_size:
if current_chunk:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
current_length += len(word) + 1
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str:
"""Get the most relevant chunks for the query (simple keyword matching)."""
# Simple keyword-based relevance scoring
query_words = set(query.lower().split())
chunk_scores = []
for i, chunk in enumerate(chunks):
chunk_words = set(chunk.lower().split())
# Calculate simple overlap score
overlap = len(query_words.intersection(chunk_words))
chunk_scores.append((i, overlap, chunk))
# Sort by relevance score and take top chunks
chunk_scores.sort(key=lambda x: x[1], reverse=True)
relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]]
return "\n\n".join(relevant_chunks)
def chat_with_pdf(self, user_question: str, pdf_content: str) -> str:
"""Generate response using Azure OpenAI based on PDF content and user question."""
try:
# Split PDF content into chunks
chunks = self.chunk_text(pdf_content)
# Get relevant context for the question
relevant_context = self.get_relevant_context(user_question, chunks)
# Prepare messages for the chat
# messages = [
# {
# "role": "system",
# "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
# 1. Only provide information based on the PDF content provided
# 2. If the answer is not in the PDF, clearly state that the information is not available in the document
# 3. Provide clear, concise, and helpful responses in a professional manner
# 4. Always respond in English using proper grammar and formatting
# 5. When possible, reference specific sections or clauses from the policy
# 6. Use insurance terminology appropriately but explain complex terms when necessary
# 7. Be empathetic and patient, as insurance can be confusing for customers
# 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
# 9. Always prioritize customer understanding and satisfaction
# 10. If multiple interpretations are possible, explain the different scenarios clearly
# Remember: You are here to help customers understand their insurance coverage better."""
# },
# {
# "role": "user",
# "content": f"""Insurance Document Content:
# {relevant_context}
# Customer Question: {user_question}
# Please provide a helpful response based on the insurance document content above."""
# }
# ]
messages = [
{
"role": "system",
"content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
1. Only provide information based on the PDF content provided
2. If the answer is not in the PDF, clearly state that the information is not available in the document
3. Provide clear, concise, and helpful responses in a professional manner
4. Always respond in Vietnamese using proper grammar and formatting
5. When possible, reference specific sections or clauses from the policy
6. Use insurance terminology appropriately but explain complex terms when necessary
7. Be empathetic and patient, as insurance can be confusing for customers
8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
9. Always prioritize customer understanding and satisfaction
10. If multiple interpretations are possible, explain the different scenarios clearly
Remember: You are here to help customers understand their insurance coverage better."""
},
{
"role": "user",
"content": f"""Insurance Document Content:
{relevant_context}
Customer Question: {user_question}
Please provide a helpful response based on the insurance document content above."""
}
]
# Add conversation history
for msg in self.conversation_history[-6:]: # Keep last 6 messages for context
messages.append(msg)
# Get response from Azure OpenAI
response = self.azure_client.chat.completions.create(
model="gpt-3.5-turbo-0125",
messages=messages,
max_tokens=1000,
temperature=0.7
)
bot_response = response.choices[0].message.content
# Update conversation history
self.conversation_history.append({"role": "user", "content": user_question})
self.conversation_history.append({"role": "assistant", "content": bot_response})
return bot_response
except Exception as e:
return f"Error generating response: {str(e)}"
def main():
# st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide")
st.title("🛡️ Insurance Policy Assistant")
st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!")
# Initialize chatbot
if 'chatbot' not in st.session_state:
st.session_state.chatbot = PDFChatbot()
st.session_state.pdf_processed = False
st.session_state.chat_history = []
# Sidebar for PDF upload and settings
with st.sidebar:
st.header("📁 Upload Insurance Document")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
if st.button("Process PDF"):
with st.spinner("Processing your insurance document..."):
# Extract text from PDF
text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file)
if text_content:
st.session_state.chatbot.pdf_content = text_content
st.session_state.pdf_processed = True
st.success("Insurance document processed successfully!")
# Show PDF summary
st.subheader("Document Preview")
st.text_area(
"First 500 characters:",
text_content[:500] + "..." if len(text_content) > 500 else text_content,
height=100
)
else:
st.error("Failed to process PDF")
# Clear conversation
if st.button("Clear Conversation"):
st.session_state.chatbot.conversation_history = []
st.session_state.chat_history = []
st.rerun()
# Main chat interface
if st.session_state.pdf_processed:
st.header("💬 Ask About Your Insurance Policy")
# Display chat history
for i, (question, answer) in enumerate(st.session_state.chat_history):
with st.container():
st.markdown(f"**You:** {question}")
st.markdown(f"**Insurance Assistant:** {answer}")
st.divider()
# Chat input
user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...")
if user_question:
with st.spinner("Analyzing your policy..."):
# Get response from chatbot
response = st.session_state.chatbot.chat_with_pdf(
user_question,
st.session_state.chatbot.pdf_content
)
# Add to chat history
st.session_state.chat_history.append((user_question, response))
# Display the new response
st.markdown(f"**You:** {user_question}")
st.markdown(f"**Insurance Assistant:** {response}")
else:
st.info("👆 Please upload and process an insurance PDF document to start chatting!")
# Show example questions
st.subheader("Example questions you can ask:")
st.markdown("""
- What is my coverage limit for property damage?
- What is my deductible amount?
- What types of incidents are covered under this policy?
- What is excluded from my coverage?
- How do I file a claim?
- What is the process for claim settlement?
- What are my premium payment options?
- When does my policy expire?
- Is flood damage covered?
- What documentation do I need for a claim?
""")
# Add insurance tips
st.subheader("💡 Insurance Tips")
st.markdown("""
- Review your policy regularly to understand your coverage
- Keep your policy documents in a safe place
- Update your coverage when your circumstances change
- Document any incidents immediately
- Contact your insurance agent if you have questions
""")
if __name__ == "__main__":
main()