Spaces:

random2222
/

tryagain

Build error

File size: 3,362 Bytes

21a2e46
1051bf9
2d8c319
8550dc5
91b268b
2e62dd1
91b268b
 
21a2e46
0e5b4a4
2e62dd1
569e45d
2e62dd1
2d8c319
0e5b4a4
21a2e46
 
 
 
 
 
 
 
2e62dd1
21a2e46
8550dc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21a2e46
8550dc5
 
 
 
0e5b4a4
2e62dd1
 
21a2e46
2e62dd1
21a2e46
f8c1ecf
0e5b4a4
2e62dd1
f8c1ecf
 
 
 
 
21a2e46
 
 
 
f8c1ecf
 
 
 
 
21a2e46
f8c1ecf
 
 
 
21a2e46
8550dc5
f8c1ecf
8550dc5
0e5b4a4
f8c1ecf
21a2e46
 
8550dc5
21a2e46
8550dc5
2e62dd1
91b268b
f8c1ecf
8550dc5
 
2e62dd1
8550dc5
0e5b4a4
8550dc5

# app.py
import gradio as gr
import os
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Configuration
DOCS_DIR = "business_docs"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
MODEL_NAME = "microsoft/phi-2"

# Quantization config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

def initialize_system():
    # Document processing
    if not os.path.exists(DOCS_DIR):
        raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
    
    pdf_files = [os.path.join(DOCS_DIR, f) 
                for f in os.listdir(DOCS_DIR) 
                if f.endswith(".pdf")]
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100
    )
    
    texts = []
    for pdf in pdf_files:
        loader = PyPDFLoader(pdf)
        pages = loader.load_and_split(text_splitter)
        texts.extend(pages)

    # Create embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL,
        model_kwargs={'device': 'cpu'}
    )
    
    # Vector store
    vector_store = FAISS.from_documents(texts, embeddings)

    # Model loading
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        padding_side="left"
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        device_map="auto",
        quantization_config=quant_config,
        torch_dtype=torch.float16
    )

    return vector_store, model, tokenizer

try:
    vector_store, model, tokenizer = initialize_system()
    print("✅ System initialized successfully")
except Exception as e:
    print(f"❌ Initialization failed: {str(e)}")
    raise

def generate_response(query):
    try:
        docs = vector_store.similarity_search(query, k=2)
        context = "\n".join([d.page_content for d in docs])
        
        prompt = f"""<|system|>
        Answer using only this context: {context}
        - Max 2 sentences
        - If unsure: "I'll check with the team"</s>
        <|user|>{query}</s>
        <|assistant|>"""
        
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.1,
            pad_token_id=tokenizer.eos_token_id
        )
        
        return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
    
    except Exception as e:
        return "Please try again later."

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Customer Service Chatbot")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your question")
    clear = gr.ClearButton([msg, chatbot])
    
    def respond(message, history):
        response = generate_response(message)
        history.append((message, response))
        return "", history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch()