File size: 4,009 Bytes
f8c1ecf
1051bf9
2d8c319
f8c1ecf
91b268b
2e62dd1
91b268b
 
 
0e5b4a4
2e62dd1
569e45d
2e62dd1
2d8c319
0e5b4a4
2e62dd1
f8c1ecf
 
 
 
 
 
 
 
 
 
91b268b
f8c1ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e5b4a4
2e62dd1
 
f8c1ecf
2e62dd1
f8c1ecf
 
0e5b4a4
2e62dd1
f8c1ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e5b4a4
f8c1ecf
1051bf9
f8c1ecf
 
 
 
2e62dd1
91b268b
f8c1ecf
 
2e62dd1
91b268b
f8c1ecf
0e5b4a4
91b268b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Updated app.py with torch import and error handling
import gradio as gr
import os
import torch  # Missing import added here
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer

# Configuration
DOCS_DIR = "business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "microsoft/phi-2"

def initialize_system():
    try:
        # Verify documents
        if not os.path.exists(DOCS_DIR):
            raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
            
        pdf_files = [os.path.join(DOCS_DIR, f) 
                    for f in os.listdir(DOCS_DIR) 
                    if f.endswith(".pdf")]
        if not pdf_files:
            raise ValueError(f"No PDFs found in {DOCS_DIR}")
        
        # Process documents
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,
            chunk_overlap=100
        )
        
        texts = []
        for pdf in pdf_files:
            loader = PyPDFLoader(pdf)
            pages = loader.load_and_split(text_splitter)
            texts.extend(pages)
        
        # Create embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': False}
        )
        
        # Create vector store
        vector_store = FAISS.from_documents(texts, embeddings)
        
        # Load Phi-2 model
        tokenizer = AutoTokenizer.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True,
            padding_side="left"
        )
        
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True,
            device_map="auto",
            load_in_4bit=True,
            torch_dtype=torch.float16
        )
        
        return vector_store, model, tokenizer
        
    except Exception as e:
        raise RuntimeError(f"Initialization failed: {str(e)}")

try:
    vector_store, model, tokenizer = initialize_system()
    print("✅ System initialized successfully")
except Exception as e:
    print(f"❌ Initialization error: {str(e)}")
    raise

def generate_response(query):
    try:
        # Retrieve context
        docs = vector_store.similarity_search(query, k=2)
        context = "\n".join([d.page_content for d in docs])
        
        # Phi-2 optimized prompt
        prompt = f"""<|system|>
        You are a customer service assistant. Answer ONLY using the context below.
        Keep responses under 3 sentences. If unsure, say "I'll check with the team".
        
        Context: {context}</s>
        <|user|>
        {query}</s>
        <|assistant|>"""
        
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("<|assistant|>")[-1].strip()
        
    except Exception as e:
        return "I'm having trouble answering that. Please try again later."

# Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Customer Support Chatbot")
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(label="Your question", placeholder="Type here...")
    clear = gr.Button("Clear History")
    
    def respond(message, history):
        response = generate_response(message)
        return response
    
    msg.submit(respond, [msg, chatbot], chatbot)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(server_port=7860)