Spaces:
Running
Running
| import os | |
| import streamlit as st | |
| import PyPDF2 | |
| import subprocess | |
| import openai | |
| from openai import OpenAI | |
| from langchain_openai import ChatOpenAI | |
| from io import BytesIO | |
| from typing import List, Dict | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| OPENAI_API_KEY = os.getenv("OPENAI_API") | |
| TOKEN=os.getenv('HF_TOKEN') | |
| subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"]) | |
| st.sidebar.title("Welcome to MBAL Chatbot") | |
| class PDFChatbot: | |
| def __init__(self): | |
| # Initialize Azure OpenAI client | |
| # self.azure_client = AzureOpenAI( | |
| # api_key=os.getenv("AZURE_OPENAI_KEY"), | |
| # api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"), | |
| # azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") | |
| # ) | |
| self.azure_client = openai.OpenAI() | |
| # Model name for your deployment | |
| # self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4") | |
| # self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY) | |
| # Store conversation history | |
| self.conversation_history = [] | |
| self.pdf_content = "" | |
| def extract_text_from_pdf(self, pdf_file): | |
| """Extract text content from uploaded PDF file.""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| st.error(f"Error reading PDF: {str(e)}") | |
| return None | |
| def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]: | |
| """Split text into smaller chunks for better processing.""" | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for word in words: | |
| if current_length + len(word) + 1 > chunk_size: | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| current_chunk = [word] | |
| current_length = len(word) | |
| else: | |
| current_chunk.append(word) | |
| current_length += len(word) + 1 | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| return chunks | |
| def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str: | |
| """Get the most relevant chunks for the query (simple keyword matching).""" | |
| # Simple keyword-based relevance scoring | |
| query_words = set(query.lower().split()) | |
| chunk_scores = [] | |
| for i, chunk in enumerate(chunks): | |
| chunk_words = set(chunk.lower().split()) | |
| # Calculate simple overlap score | |
| overlap = len(query_words.intersection(chunk_words)) | |
| chunk_scores.append((i, overlap, chunk)) | |
| # Sort by relevance score and take top chunks | |
| chunk_scores.sort(key=lambda x: x[1], reverse=True) | |
| relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]] | |
| return "\n\n".join(relevant_chunks) | |
| def chat_with_pdf(self, user_question: str, pdf_content: str) -> str: | |
| """Generate response using Azure OpenAI based on PDF content and user question.""" | |
| try: | |
| # Split PDF content into chunks | |
| chunks = self.chunk_text(pdf_content) | |
| # Get relevant context for the question | |
| relevant_context = self.get_relevant_context(user_question, chunks) | |
| # Prepare messages for the chat | |
| # messages = [ | |
| # { | |
| # "role": "system", | |
| # "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines: | |
| # 1. Only provide information based on the PDF content provided | |
| # 2. If the answer is not in the PDF, clearly state that the information is not available in the document | |
| # 3. Provide clear, concise, and helpful responses in a professional manner | |
| # 4. Always respond in English using proper grammar and formatting | |
| # 5. When possible, reference specific sections or clauses from the policy | |
| # 6. Use insurance terminology appropriately but explain complex terms when necessary | |
| # 7. Be empathetic and patient, as insurance can be confusing for customers | |
| # 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document | |
| # 9. Always prioritize customer understanding and satisfaction | |
| # 10. If multiple interpretations are possible, explain the different scenarios clearly | |
| # Remember: You are here to help customers understand their insurance coverage better.""" | |
| # }, | |
| # { | |
| # "role": "user", | |
| # "content": f"""Insurance Document Content: | |
| # {relevant_context} | |
| # Customer Question: {user_question} | |
| # Please provide a helpful response based on the insurance document content above.""" | |
| # } | |
| # ] | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines: | |
| 1. Only provide information based on the PDF content provided | |
| 2. If the answer is not in the PDF, clearly state that the information is not available in the document | |
| 3. Provide clear, concise, and helpful responses in a professional manner | |
| 4. Always respond in Vietnamese using proper grammar and formatting | |
| 5. When possible, reference specific sections or clauses from the policy | |
| 6. Use insurance terminology appropriately but explain complex terms when necessary | |
| 7. Be empathetic and patient, as insurance can be confusing for customers | |
| 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document | |
| 9. Always prioritize customer understanding and satisfaction | |
| 10. If multiple interpretations are possible, explain the different scenarios clearly | |
| Remember: You are here to help customers understand their insurance coverage better.""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"""Insurance Document Content: | |
| {relevant_context} | |
| Customer Question: {user_question} | |
| Please provide a helpful response based on the insurance document content above.""" | |
| } | |
| ] | |
| # Add conversation history | |
| for msg in self.conversation_history[-6:]: # Keep last 6 messages for context | |
| messages.append(msg) | |
| # Get response from Azure OpenAI | |
| response = self.azure_client.chat.completions.create( | |
| model="gpt-3.5-turbo-0125", | |
| messages=messages, | |
| max_tokens=1000, | |
| temperature=0.7 | |
| ) | |
| bot_response = response.choices[0].message.content | |
| # Update conversation history | |
| self.conversation_history.append({"role": "user", "content": user_question}) | |
| self.conversation_history.append({"role": "assistant", "content": bot_response}) | |
| return bot_response | |
| except Exception as e: | |
| return f"Error generating response: {str(e)}" | |
| def main(): | |
| # st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide") | |
| st.title("🛡️ Insurance Policy Assistant") | |
| st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!") | |
| # Initialize chatbot | |
| if 'chatbot' not in st.session_state: | |
| st.session_state.chatbot = PDFChatbot() | |
| st.session_state.pdf_processed = False | |
| st.session_state.chat_history = [] | |
| # Sidebar for PDF upload and settings | |
| with st.sidebar: | |
| st.header("📁 Upload Insurance Document") | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| if st.button("Process PDF"): | |
| with st.spinner("Processing your insurance document..."): | |
| # Extract text from PDF | |
| text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file) | |
| if text_content: | |
| st.session_state.chatbot.pdf_content = text_content | |
| st.session_state.pdf_processed = True | |
| st.success("Insurance document processed successfully!") | |
| # Show PDF summary | |
| st.subheader("Document Preview") | |
| st.text_area( | |
| "First 500 characters:", | |
| text_content[:500] + "..." if len(text_content) > 500 else text_content, | |
| height=100 | |
| ) | |
| else: | |
| st.error("Failed to process PDF") | |
| # Clear conversation | |
| if st.button("Clear Conversation"): | |
| st.session_state.chatbot.conversation_history = [] | |
| st.session_state.chat_history = [] | |
| st.rerun() | |
| # Main chat interface | |
| if st.session_state.pdf_processed: | |
| st.header("💬 Ask About Your Insurance Policy") | |
| # Display chat history | |
| for i, (question, answer) in enumerate(st.session_state.chat_history): | |
| with st.container(): | |
| st.markdown(f"**You:** {question}") | |
| st.markdown(f"**Insurance Assistant:** {answer}") | |
| st.divider() | |
| # Chat input | |
| user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...") | |
| if user_question: | |
| with st.spinner("Analyzing your policy..."): | |
| # Get response from chatbot | |
| response = st.session_state.chatbot.chat_with_pdf( | |
| user_question, | |
| st.session_state.chatbot.pdf_content | |
| ) | |
| # Add to chat history | |
| st.session_state.chat_history.append((user_question, response)) | |
| # Display the new response | |
| st.markdown(f"**You:** {user_question}") | |
| st.markdown(f"**Insurance Assistant:** {response}") | |
| else: | |
| st.info("👆 Please upload and process an insurance PDF document to start chatting!") | |
| # Show example questions | |
| st.subheader("Example questions you can ask:") | |
| st.markdown(""" | |
| - What is my coverage limit for property damage? | |
| - What is my deductible amount? | |
| - What types of incidents are covered under this policy? | |
| - What is excluded from my coverage? | |
| - How do I file a claim? | |
| - What is the process for claim settlement? | |
| - What are my premium payment options? | |
| - When does my policy expire? | |
| - Is flood damage covered? | |
| - What documentation do I need for a claim? | |
| """) | |
| # Add insurance tips | |
| st.subheader("💡 Insurance Tips") | |
| st.markdown(""" | |
| - Review your policy regularly to understand your coverage | |
| - Keep your policy documents in a safe place | |
| - Update your coverage when your circumstances change | |
| - Document any incidents immediately | |
| - Contact your insurance agent if you have questions | |
| """) | |
| if __name__ == "__main__": | |
| main() |