import os import streamlit as st import PyPDF2 import subprocess import openai from openai import OpenAI from langchain_openai import ChatOpenAI from io import BytesIO from typing import List, Dict from dotenv import load_dotenv # Load environment variables OPENAI_API_KEY = os.getenv("OPENAI_API") TOKEN=os.getenv('HF_TOKEN') subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"]) st.sidebar.title("Welcome to MBAL Chatbot") class PDFChatbot: def __init__(self): # Initialize Azure OpenAI client # self.azure_client = AzureOpenAI( # api_key=os.getenv("AZURE_OPENAI_KEY"), # api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"), # azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") # ) self.azure_client = openai.OpenAI() # Model name for your deployment # self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4") # self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY) # Store conversation history self.conversation_history = [] self.pdf_content = "" def extract_text_from_pdf(self, pdf_file): """Extract text content from uploaded PDF file.""" try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() + "\n" return text.strip() except Exception as e: st.error(f"Error reading PDF: {str(e)}") return None def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]: """Split text into smaller chunks for better processing.""" words = text.split() chunks = [] current_chunk = [] current_length = 0 for word in words: if current_length + len(word) + 1 > chunk_size: if current_chunk: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_length = len(word) else: current_chunk.append(word) current_length += len(word) + 1 if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str: """Get the most relevant chunks for the query (simple keyword matching).""" # Simple keyword-based relevance scoring query_words = set(query.lower().split()) chunk_scores = [] for i, chunk in enumerate(chunks): chunk_words = set(chunk.lower().split()) # Calculate simple overlap score overlap = len(query_words.intersection(chunk_words)) chunk_scores.append((i, overlap, chunk)) # Sort by relevance score and take top chunks chunk_scores.sort(key=lambda x: x[1], reverse=True) relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]] return "\n\n".join(relevant_chunks) def chat_with_pdf(self, user_question: str, pdf_content: str) -> str: """Generate response using Azure OpenAI based on PDF content and user question.""" try: # Split PDF content into chunks chunks = self.chunk_text(pdf_content) # Get relevant context for the question relevant_context = self.get_relevant_context(user_question, chunks) # Prepare messages for the chat # messages = [ # { # "role": "system", # "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines: # 1. Only provide information based on the PDF content provided # 2. If the answer is not in the PDF, clearly state that the information is not available in the document # 3. Provide clear, concise, and helpful responses in a professional manner # 4. Always respond in English using proper grammar and formatting # 5. When possible, reference specific sections or clauses from the policy # 6. Use insurance terminology appropriately but explain complex terms when necessary # 7. Be empathetic and patient, as insurance can be confusing for customers # 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document # 9. Always prioritize customer understanding and satisfaction # 10. If multiple interpretations are possible, explain the different scenarios clearly # Remember: You are here to help customers understand their insurance coverage better.""" # }, # { # "role": "user", # "content": f"""Insurance Document Content: # {relevant_context} # Customer Question: {user_question} # Please provide a helpful response based on the insurance document content above.""" # } # ] messages = [ { "role": "system", "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines: 1. Only provide information based on the PDF content provided 2. If the answer is not in the PDF, clearly state that the information is not available in the document 3. Provide clear, concise, and helpful responses in a professional manner 4. Always respond in Vietnamese using proper grammar and formatting 5. When possible, reference specific sections or clauses from the policy 6. Use insurance terminology appropriately but explain complex terms when necessary 7. Be empathetic and patient, as insurance can be confusing for customers 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document 9. Always prioritize customer understanding and satisfaction 10. If multiple interpretations are possible, explain the different scenarios clearly Remember: You are here to help customers understand their insurance coverage better.""" }, { "role": "user", "content": f"""Insurance Document Content: {relevant_context} Customer Question: {user_question} Please provide a helpful response based on the insurance document content above.""" } ] # Add conversation history for msg in self.conversation_history[-6:]: # Keep last 6 messages for context messages.append(msg) # Get response from Azure OpenAI response = self.azure_client.chat.completions.create( model="gpt-3.5-turbo-0125", messages=messages, max_tokens=1000, temperature=0.7 ) bot_response = response.choices[0].message.content # Update conversation history self.conversation_history.append({"role": "user", "content": user_question}) self.conversation_history.append({"role": "assistant", "content": bot_response}) return bot_response except Exception as e: return f"Error generating response: {str(e)}" def main(): # st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide") st.title("🛡️ Insurance Policy Assistant") st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!") # Initialize chatbot if 'chatbot' not in st.session_state: st.session_state.chatbot = PDFChatbot() st.session_state.pdf_processed = False st.session_state.chat_history = [] # Sidebar for PDF upload and settings with st.sidebar: st.header("📁 Upload Insurance Document") uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: if st.button("Process PDF"): with st.spinner("Processing your insurance document..."): # Extract text from PDF text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file) if text_content: st.session_state.chatbot.pdf_content = text_content st.session_state.pdf_processed = True st.success("Insurance document processed successfully!") # Show PDF summary st.subheader("Document Preview") st.text_area( "First 500 characters:", text_content[:500] + "..." if len(text_content) > 500 else text_content, height=100 ) else: st.error("Failed to process PDF") # Clear conversation if st.button("Clear Conversation"): st.session_state.chatbot.conversation_history = [] st.session_state.chat_history = [] st.rerun() # Main chat interface if st.session_state.pdf_processed: st.header("💬 Ask About Your Insurance Policy") # Display chat history for i, (question, answer) in enumerate(st.session_state.chat_history): with st.container(): st.markdown(f"**You:** {question}") st.markdown(f"**Insurance Assistant:** {answer}") st.divider() # Chat input user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...") if user_question: with st.spinner("Analyzing your policy..."): # Get response from chatbot response = st.session_state.chatbot.chat_with_pdf( user_question, st.session_state.chatbot.pdf_content ) # Add to chat history st.session_state.chat_history.append((user_question, response)) # Display the new response st.markdown(f"**You:** {user_question}") st.markdown(f"**Insurance Assistant:** {response}") else: st.info("👆 Please upload and process an insurance PDF document to start chatting!") # Show example questions st.subheader("Example questions you can ask:") st.markdown(""" - What is my coverage limit for property damage? - What is my deductible amount? - What types of incidents are covered under this policy? - What is excluded from my coverage? - How do I file a claim? - What is the process for claim settlement? - What are my premium payment options? - When does my policy expire? - Is flood damage covered? - What documentation do I need for a claim? """) # Add insurance tips st.subheader("💡 Insurance Tips") st.markdown(""" - Review your policy regularly to understand your coverage - Keep your policy documents in a safe place - Update your coverage when your circumstances change - Document any incidents immediately - Contact your insurance agent if you have questions """) if __name__ == "__main__": main()