Spaces:
Build error
Build error
File size: 3,510 Bytes
1051bf9 2d8c319 91b268b 2e62dd1 91b268b 0e5b4a4 2e62dd1 569e45d 2e62dd1 2d8c319 0e5b4a4 2e62dd1 91b268b 2e62dd1 91b268b 2d8c319 91b268b 1051bf9 91b268b 2e62dd1 91b268b 2e62dd1 2d8c319 92e169f 91b268b 2d8c319 1051bf9 91b268b 2d8c319 91b268b 2d8c319 2e62dd1 0e5b4a4 2e62dd1 91b268b 2e62dd1 91b268b 0e5b4a4 2e62dd1 91b268b 2d8c319 91b268b 2d8c319 91b268b 2d8c319 2e62dd1 2d8c319 91b268b 2d8c319 91b268b 0e5b4a4 91b268b 1051bf9 91b268b 2e62dd1 91b268b 2d8c319 91b268b 2d8c319 91b268b 2e62dd1 91b268b 0e5b4a4 91b268b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer
# Configuration
DOCS_DIR = "business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "microsoft/phi-2"
def initialize_system():
# Verify documents
if not os.path.exists(DOCS_DIR):
raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR)
if f.endswith(".pdf")]
if not pdf_files:
raise ValueError(f"No PDFs found in {DOCS_DIR}")
# Process documents
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800, # Reduced for Phi-2's context window
chunk_overlap=100
)
texts = []
for pdf in pdf_files:
loader = PyPDFLoader(pdf)
pages = loader.load_and_split(text_splitter)
texts.extend(pages)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name=EMBEDDING_MODEL,
model_kwargs={'device': 'cpu'}, # Force CPU for compatibility
encode_kwargs={'normalize_embeddings': False}
)
# Create vector store
vector_store = FAISS.from_documents(texts, embeddings)
# Load Phi-2 with 4-bit quantization
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
device_map="auto",
load_in_4bit=True,
torch_dtype=torch.float16
)
return vector_store, model, tokenizer
try:
vector_store, model, tokenizer = initialize_system()
print("System initialized successfully")
except Exception as e:
raise RuntimeError(f"Initialization error: {str(e)}")
def generate_response(query):
# Retrieve context
docs = vector_store.similarity_search(query, k=2) # Fewer docs for Phi-2
context = "\n".join([d.page_content for d in docs])
# Phi-2 specific prompt format
prompt = f"""Question: {query}
Context: {context}
Instructions:
- Answer only using the context
- Keep responses under 3 sentences
- If unsure, say "I'll need to check with the team"
Answer:"""
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=200,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("Answer:")[-1].strip()
# Simplified Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Customer Service Chatbot")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your question")
clear = gr.ClearButton([msg, chatbot])
def respond(message, history):
try:
response = generate_response(message)
return response
except Exception as e:
return "I'm having trouble answering that right now. Please try again later."
msg.submit(respond, [msg, chatbot], chatbot)
msg.submit(lambda: "", None, msg)
demo.launch(server_port=7860) |