random2222 commited on
Commit
92e169f
Β·
verified Β·
1 Parent(s): 8223211

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -43
app.py CHANGED
@@ -16,66 +16,76 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
16
  def initialize_system():
17
  # Validate documents folder
18
  if not os.path.exists(DOCS_DIR):
19
- raise FileNotFoundError(f"Business documents folder '{DOCS_DIR}' not found")
20
 
21
  # Load and process PDFs
22
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
23
  if not pdf_files:
24
- raise ValueError(f"No PDF files found in {DOCS_DIR} folder")
25
 
 
26
  text_splitter = RecursiveCharacterTextSplitter(
27
- chunk_size=1000,
28
- chunk_overlap=200
29
  )
30
 
31
  documents = []
32
  for pdf_path in pdf_files:
33
- loader = PyPDFLoader(pdf_path)
34
- documents.extend(loader.load_and_split(text_splitter))
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Create embeddings
37
- embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
38
  vector_store = FAISS.from_documents(documents, embeddings)
39
 
40
- # Quantization config
41
  bnb_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
43
- bnb_4bit_use_double_quant=True,
44
  bnb_4bit_quant_type="nf4",
45
  bnb_4bit_compute_dtype=torch.float16,
46
  )
47
 
48
- # Load model and tokenizer
49
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
50
- model = AutoModelForCausalLM.from_pretrained(
51
- MODEL_NAME,
52
- quantization_config=bnb_config,
53
- device_map="auto",
54
- trust_remote_code=True,
55
- use_flash_attention_2=True
56
- )
 
 
57
 
58
  return vector_store, model, tokenizer
59
 
60
- # Initialize system components
61
  try:
62
  vector_store, model, tokenizer = initialize_system()
63
- print("βœ… System initialized with business documents")
64
  except Exception as e:
65
- print(f"❌ Initialization failed: {str(e)}")
66
  raise
67
 
68
  # Response Generation
69
  def generate_response(query):
70
  try:
71
- # Retrieve relevant context
72
- docs = vector_store.similarity_search(query, k=3)
73
- context = "\n".join([doc.page_content for doc in docs])
74
 
75
- # Create formatted prompt
76
  prompt = f"""<|system|>
77
- You are a customer support assistant. Answer ONLY using the provided business documents.
78
- If the answer isn't in the documents, respond: "I don't have that information."
79
 
80
  Context: {context}</s>
81
  <|user|>
@@ -87,36 +97,33 @@ def generate_response(query):
87
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
88
  outputs = model.generate(
89
  inputs.input_ids,
90
- max_new_tokens=512,
91
- temperature=0.3,
92
  do_sample=True,
93
  pad_token_id=tokenizer.eos_token_id
94
  )
95
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
96
-
97
- # Extract only the assistant's response
98
  return response.split("<|assistant|>")[-1].strip()
99
 
100
  except Exception as e:
101
- return f"⚠️ Error: {str(e)}"
102
 
103
- # Chat Interface
104
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
- gr.Markdown("# πŸ“š Business Document Assistant")
106
 
107
  with gr.Row():
108
- gr.Image("https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png",
109
- width=100)
110
- gr.Markdown("Ask questions about our policies, products, and services!")
111
 
112
- chatbot = gr.Chatbot(height=400)
113
- msg = gr.Textbox(label="Your Question", placeholder="Type your question here...")
114
  clear = gr.Button("Clear History")
115
 
116
- def respond(message, chat_history):
117
  response = generate_response(message)
118
- chat_history.append((message, response))
119
- return "", chat_history
120
 
121
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
122
  clear.click(lambda: None, None, chatbot, queue=False)
 
16
  def initialize_system():
17
  # Validate documents folder
18
  if not os.path.exists(DOCS_DIR):
19
+ raise FileNotFoundError(f"πŸ“ Missing business documents folder: {DOCS_DIR}")
20
 
21
  # Load and process PDFs
22
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
23
  if not pdf_files:
24
+ raise ValueError(f"❌ No PDFs found in {DOCS_DIR}")
25
 
26
+ # Process documents
27
  text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=800, # Reduced for free tier memory
29
+ chunk_overlap=100
30
  )
31
 
32
  documents = []
33
  for pdf_path in pdf_files:
34
+ try:
35
+ loader = PyPDFLoader(pdf_path)
36
+ documents.extend(loader.load_and_split(text_splitter))
37
+ except Exception as e:
38
+ print(f"⚠️ Error processing {pdf_path}: {str(e)}")
39
+
40
+ # Create embeddings with explicit settings
41
+ embeddings = HuggingFaceEmbeddings(
42
+ model_name=EMBEDDING_MODEL,
43
+ model_kwargs={'device': 'cpu'},
44
+ encode_kwargs={'normalize_embeddings': True},
45
+ cache_folder="/tmp/sentence_transformers"
46
+ )
47
 
 
 
48
  vector_store = FAISS.from_documents(documents, embeddings)
49
 
50
+ # 4-bit quantization config
51
  bnb_config = BitsAndBytesConfig(
52
  load_in_4bit=True,
 
53
  bnb_4bit_quant_type="nf4",
54
  bnb_4bit_compute_dtype=torch.float16,
55
  )
56
 
57
+ # Load model with error handling
58
+ try:
59
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
60
+ model = AutoModelForCausalLM.from_pretrained(
61
+ MODEL_NAME,
62
+ quantization_config=bnb_config,
63
+ device_map="auto",
64
+ trust_remote_code=True
65
+ )
66
+ except Exception as e:
67
+ raise RuntimeError(f"πŸ€– Model loading failed: {str(e)}")
68
 
69
  return vector_store, model, tokenizer
70
 
71
+ # Initialize system
72
  try:
73
  vector_store, model, tokenizer = initialize_system()
74
+ print("πŸš€ System ready with business documents")
75
  except Exception as e:
76
+ print(f"πŸ”₯ Critical error: {str(e)}")
77
  raise
78
 
79
  # Response Generation
80
  def generate_response(query):
81
  try:
82
+ # Context retrieval
83
+ docs = vector_store.similarity_search(query, k=2) # Reduced context chunks
84
+ context = "\n".join([d.page_content for d in docs])
85
 
86
+ # Zephyr prompt template
87
  prompt = f"""<|system|>
88
+ Answer ONLY using the business documents. Unknown answers: "I don't have that information."
 
89
 
90
  Context: {context}</s>
91
  <|user|>
 
97
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
98
  outputs = model.generate(
99
  inputs.input_ids,
100
+ max_new_tokens=256, # Reduced for faster responses
101
+ temperature=0.2,
102
  do_sample=True,
103
  pad_token_id=tokenizer.eos_token_id
104
  )
105
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
106
  return response.split("<|assistant|>")[-1].strip()
107
 
108
  except Exception as e:
109
+ return f"⚠️ Error: Please try again. Details: {str(e)[:100]}"
110
 
111
+ # Gradio Interface
112
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
113
+ gr.Markdown("# πŸ“ž Business Support Assistant")
114
 
115
  with gr.Row():
116
+ gr.Image(value="https://placehold.co/100x30?text=Company+Logo", width=100)
117
+ gr.Markdown("Ask questions about our services and policies")
 
118
 
119
+ chatbot = gr.Chatbot(height=350)
120
+ msg = gr.Textbox(placeholder="Type your question...", label="")
121
  clear = gr.Button("Clear History")
122
 
123
+ def respond(message, history):
124
  response = generate_response(message)
125
+ history.append((message, response))
126
+ return "", history
127
 
128
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
129
  clear.click(lambda: None, None, chatbot, queue=False)