random2222 commited on
Commit
21a2e46
·
verified ·
1 Parent(s): 142c7d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -24
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os
3
  import torch
@@ -5,15 +6,23 @@ from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
  # Configuration
11
  DOCS_DIR = "business_docs"
12
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
  MODEL_NAME = "microsoft/phi-2"
14
 
 
 
 
 
 
 
 
 
15
  def initialize_system():
16
- # Document verification
17
  if not os.path.exists(DOCS_DIR):
18
  raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
19
 
@@ -21,10 +30,6 @@ def initialize_system():
21
  for f in os.listdir(DOCS_DIR)
22
  if f.endswith(".pdf")]
23
 
24
- if not pdf_files:
25
- raise ValueError(f"No PDFs found in {DOCS_DIR}")
26
-
27
- # Document processing
28
  text_splitter = RecursiveCharacterTextSplitter(
29
  chunk_size=800,
30
  chunk_overlap=100
@@ -56,7 +61,7 @@ def initialize_system():
56
  MODEL_NAME,
57
  trust_remote_code=True,
58
  device_map="auto",
59
- load_in_4bit=True,
60
  torch_dtype=torch.float16
61
  )
62
 
@@ -64,47 +69,41 @@ def initialize_system():
64
 
65
  try:
66
  vector_store, model, tokenizer = initialize_system()
67
- print("System initialized successfully")
68
  except Exception as e:
69
- print(f"Initialization failed ❌: {str(e)}")
70
  raise
71
 
72
  def generate_response(query):
73
  try:
74
- # Context retrieval
75
  docs = vector_store.similarity_search(query, k=2)
76
  context = "\n".join([d.page_content for d in docs])
77
 
78
- # Phi-2 optimized prompt
79
  prompt = f"""<|system|>
80
- You are a customer service bot. Answer only using:
81
- {context}
82
- - Max 3 sentences
83
- - If unsure: "I'll check with the team"
84
- </s>
85
- <|user|>
86
- {query}</s>
87
  <|assistant|>"""
88
 
89
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
90
  outputs = model.generate(
91
  **inputs,
92
- max_new_tokens=200,
93
  temperature=0.1,
94
  pad_token_id=tokenizer.eos_token_id
95
  )
96
 
97
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
98
- return response.split("<|assistant|>")[-1].strip()
99
 
100
  except Exception as e:
101
  return "Please try again later."
102
 
103
  # Gradio interface
104
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
- gr.Markdown("# Customer Support Chatbot")
106
  chatbot = gr.Chatbot()
107
- msg = gr.Textbox(label="Ask about our services")
108
  clear = gr.ClearButton([msg, chatbot])
109
 
110
  def respond(message, history):
 
1
+ # app.py
2
  import gradio as gr
3
  import os
4
  import torch
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  # Configuration
12
  DOCS_DIR = "business_docs"
13
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
14
  MODEL_NAME = "microsoft/phi-2"
15
 
16
+ # Quantization config
17
+ quant_config = BitsAndBytesConfig(
18
+ load_in_4bit=True,
19
+ bnb_4bit_quant_type="nf4",
20
+ bnb_4bit_compute_dtype=torch.float16,
21
+ bnb_4bit_use_double_quant=False
22
+ )
23
+
24
  def initialize_system():
25
+ # Document processing
26
  if not os.path.exists(DOCS_DIR):
27
  raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
28
 
 
30
  for f in os.listdir(DOCS_DIR)
31
  if f.endswith(".pdf")]
32
 
 
 
 
 
33
  text_splitter = RecursiveCharacterTextSplitter(
34
  chunk_size=800,
35
  chunk_overlap=100
 
61
  MODEL_NAME,
62
  trust_remote_code=True,
63
  device_map="auto",
64
+ quantization_config=quant_config,
65
  torch_dtype=torch.float16
66
  )
67
 
 
69
 
70
  try:
71
  vector_store, model, tokenizer = initialize_system()
72
+ print("System initialized successfully")
73
  except Exception as e:
74
+ print(f"Initialization failed: {str(e)}")
75
  raise
76
 
77
  def generate_response(query):
78
  try:
 
79
  docs = vector_store.similarity_search(query, k=2)
80
  context = "\n".join([d.page_content for d in docs])
81
 
 
82
  prompt = f"""<|system|>
83
+ Answer using only this context: {context}
84
+ - Max 2 sentences
85
+ - If unsure: "I'll check with the team"</s>
86
+ <|user|>{query}</s>
 
 
 
87
  <|assistant|>"""
88
 
89
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
90
  outputs = model.generate(
91
  **inputs,
92
+ max_new_tokens=150,
93
  temperature=0.1,
94
  pad_token_id=tokenizer.eos_token_id
95
  )
96
 
97
+ return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
 
98
 
99
  except Exception as e:
100
  return "Please try again later."
101
 
102
  # Gradio interface
103
+ with gr.Blocks() as demo:
104
+ gr.Markdown("# Customer Service Chatbot")
105
  chatbot = gr.Chatbot()
106
+ msg = gr.Textbox(label="Your question")
107
  clear = gr.ClearButton([msg, chatbot])
108
 
109
  def respond(message, history):