File size: 4,171 Bytes
2e62dd1
1051bf9
 
 
2e62dd1
af89572
1051bf9
 
0e5b4a4
2e62dd1
4b10c38
2e62dd1
1051bf9
0e5b4a4
1051bf9
2e62dd1
1051bf9
2e62dd1
 
1051bf9
 
2e62dd1
 
 
0e5b4a4
2e62dd1
 
 
 
 
1051bf9
 
 
 
 
 
2e62dd1
1051bf9
 
 
 
 
 
 
 
2e62dd1
1051bf9
2e62dd1
 
 
1051bf9
2e62dd1
1051bf9
2e62dd1
 
 
0e5b4a4
2e62dd1
 
 
1051bf9
2e62dd1
1051bf9
2e62dd1
0e5b4a4
1051bf9
2e62dd1
1051bf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e62dd1
1051bf9
 
0e5b4a4
1051bf9
 
 
 
 
 
 
 
2e62dd1
1051bf9
 
2e62dd1
 
 
1051bf9
2e62dd1
 
 
 
 
0e5b4a4
1051bf9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import gradio as gr
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Configuration
DOCS_DIR = "business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

# System Initialization
def initialize_system():
    # Validate documents folder
    if not os.path.exists(DOCS_DIR):
        raise FileNotFoundError(f"Business documents folder '{DOCS_DIR}' not found")
    
    # Load and process PDFs
    pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
    if not pdf_files:
        raise ValueError(f"No PDF files found in {DOCS_DIR} folder")

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    
    documents = []
    for pdf_path in pdf_files:
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load_and_split(text_splitter))
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    vector_store = FAISS.from_documents(documents, embeddings)
    
    # Quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
    )
    
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    
    return vector_store, model, tokenizer

# Initialize system components
try:
    vector_store, model, tokenizer = initialize_system()
    print("✅ System initialized with business documents")
except Exception as e:
    print(f"❌ Initialization failed: {str(e)}")
    raise

# Response Generation
def generate_response(query):
    try:
        # Retrieve relevant context
        docs = vector_store.similarity_search(query, k=3)
        context = "\n".join([doc.page_content for doc in docs])
        
        # Create formatted prompt
        prompt = f"""<|system|>
        You are a customer support assistant. Answer ONLY using the provided business documents.
        If the answer isn't in the documents, respond: "I don't have that information."
        
        Context: {context}</s>
        <|user|>
        {query}</s>
        <|assistant|>
        """
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=512,
            temperature=0.3,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        return response.split("<|assistant|>")[-1].strip()
    
    except Exception as e:
        return f"⚠️ Error: {str(e)}"

# Chat Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📚 Business Document Assistant")
    
    with gr.Row():
        gr.Image("https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png", 
                width=100)
        gr.Markdown("Ask questions about our policies, products, and services!")
    
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(label="Your Question", placeholder="Type your question here...")
    clear = gr.Button("Clear History")
    
    def respond(message, chat_history):
        response = generate_response(message)
        chat_history.append((message, response))
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)