File size: 3,510 Bytes
1051bf9
2d8c319
91b268b
2e62dd1
91b268b
 
 
0e5b4a4
2e62dd1
569e45d
2e62dd1
2d8c319
0e5b4a4
2e62dd1
91b268b
2e62dd1
91b268b
 
2d8c319
 
91b268b
 
1051bf9
91b268b
2e62dd1
91b268b
 
2e62dd1
 
2d8c319
 
 
 
 
92e169f
91b268b
 
 
 
 
 
 
2d8c319
 
1051bf9
91b268b
2d8c319
 
 
 
 
91b268b
 
2d8c319
2e62dd1
 
0e5b4a4
2e62dd1
 
91b268b
2e62dd1
91b268b
0e5b4a4
2e62dd1
91b268b
 
 
2d8c319
91b268b
 
2d8c319
91b268b
 
 
 
2d8c319
 
2e62dd1
2d8c319
 
 
91b268b
 
 
 
2d8c319
 
91b268b
 
0e5b4a4
91b268b
1051bf9
91b268b
 
 
 
2e62dd1
91b268b
2d8c319
 
91b268b
2d8c319
91b268b
2e62dd1
91b268b
 
0e5b4a4
91b268b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer

# Configuration
DOCS_DIR = "business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "microsoft/phi-2"

def initialize_system():
    # Verify documents
    if not os.path.exists(DOCS_DIR):
        raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
        
    pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) 
                if f.endswith(".pdf")]
    if not pdf_files:
        raise ValueError(f"No PDFs found in {DOCS_DIR}")
    
    # Process documents
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,  # Reduced for Phi-2's context window
        chunk_overlap=100
    )
    
    texts = []
    for pdf in pdf_files:
        loader = PyPDFLoader(pdf)
        pages = loader.load_and_split(text_splitter)
        texts.extend(pages)
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL,
        model_kwargs={'device': 'cpu'},  # Force CPU for compatibility
        encode_kwargs={'normalize_embeddings': False}
    )
    
    # Create vector store
    vector_store = FAISS.from_documents(texts, embeddings)
    
    # Load Phi-2 with 4-bit quantization
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        device_map="auto",
        load_in_4bit=True,
        torch_dtype=torch.float16
    )
    
    return vector_store, model, tokenizer

try:
    vector_store, model, tokenizer = initialize_system()
    print("System initialized successfully")
except Exception as e:
    raise RuntimeError(f"Initialization error: {str(e)}")

def generate_response(query):
    # Retrieve context
    docs = vector_store.similarity_search(query, k=2)  # Fewer docs for Phi-2
    context = "\n".join([d.page_content for d in docs])
    
    # Phi-2 specific prompt format
    prompt = f"""Question: {query}
    Context: {context}
    Instructions:
    - Answer only using the context
    - Keep responses under 3 sentences
    - If unsure, say "I'll need to check with the team"
    
    Answer:"""
    
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Answer:")[-1].strip()

# Simplified Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Customer Service Chatbot")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your question")
    clear = gr.ClearButton([msg, chatbot])
    
    def respond(message, history):
        try:
            response = generate_response(message)
            return response
        except Exception as e:
            return "I'm having trouble answering that right now. Please try again later."
    
    msg.submit(respond, [msg, chatbot], chatbot)
    msg.submit(lambda: "", None, msg)

demo.launch(server_port=7860)