File size: 3,977 Bytes
2e62dd1
1051bf9
 
 
2e62dd1
af89572
1051bf9
7804d65
0e5b4a4
2e62dd1
7804d65
2e62dd1
7804d65
0e5b4a4
1051bf9
2e62dd1
1051bf9
2e62dd1
7804d65
1051bf9
7804d65
2e62dd1
 
7804d65
0e5b4a4
2e62dd1
7804d65
 
2e62dd1
 
1051bf9
 
92e169f
 
 
 
7804d65
92e169f
7804d65
92e169f
 
 
7804d65
92e169f
1051bf9
 
 
7804d65
92e169f
 
 
 
7804d65
 
 
92e169f
 
7804d65
2e62dd1
 
0e5b4a4
92e169f
2e62dd1
 
7804d65
2e62dd1
7804d65
2e62dd1
0e5b4a4
1051bf9
2e62dd1
1051bf9
92e169f
7804d65
92e169f
1051bf9
7804d65
1051bf9
7804d65
1051bf9
 
 
 
 
 
 
 
7804d65
1051bf9
 
7804d65
 
1051bf9
 
 
 
 
2e62dd1
1051bf9
7804d65
0e5b4a4
92e169f
1051bf9
7804d65
2e62dd1
7804d65
 
2e62dd1
 
92e169f
1051bf9
92e169f
 
2e62dd1
 
 
0e5b4a4
1051bf9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import gradio as gr
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer

# Configuration
DOCS_DIR = ".business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "microsoft/phi-3-mini-4k-instruct"  # CPU-optimized model

# System Initialization
def initialize_system():
    # Validate documents folder
    if not os.path.exists(DOCS_DIR):
        raise FileNotFoundError(f"Missing documents folder: {DOCS_DIR}")
    
    # Process PDFs
    pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
    if not pdf_files:
        raise ValueError(f"No PDFs found in {DOCS_DIR}")

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=512,  # Optimized for CPU
        chunk_overlap=50
    )
    
    documents = []
    for pdf_path in pdf_files:
        try:
            loader = PyPDFLoader(pdf_path)
            documents.extend(loader.load_and_split(text_splitter))
        except Exception as e:
            print(f"Error processing {pdf_path}: {str(e)}")
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL,
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )
    
    vector_store = FAISS.from_documents(documents, embeddings)
    
    # Load CPU-optimized model
    try:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            trust_remote_code=True,
            torch_dtype=torch.float32,
            device_map="cpu"
        )
    except Exception as e:
        raise RuntimeError(f"Model loading failed: {str(e)}")
    
    return vector_store, model, tokenizer

# Initialize system
try:
    vector_store, model, tokenizer = initialize_system()
    print("βœ… System ready with business documents")
except Exception as e:
    print(f"❌ Initialization failed: {str(e)}")
    raise

# Response Generation
def generate_response(query):
    try:
        # Context retrieval
        docs = vector_store.similarity_search(query, k=2)
        context = "\n".join([d.page_content for d in docs])
        
        # Phi-3 prompt template
        prompt = f"""<|system|>
        Answer ONLY using the business documents. Respond to unknown queries with: "This information is not available in our current documentation."
        
        Context: {context}</s>
        <|user|>
        {query}</s>
        <|assistant|>
        """
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=200,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("<|assistant|>")[-1].strip()
    
    except Exception as e:
        return f"Error: Please try again. ({str(e)[:50]})"

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ“š Business Documentation Assistant")
    
    chatbot = gr.Chatbot(height=300)
    msg = gr.Textbox(placeholder="Ask about our services...", label="")
    clear = gr.Button("Clear History")
    
    def respond(message, history):
        response = generate_response(message)
        history.append((message, response))
        return "", history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)