random2222 commited on
Commit
eb90789
·
verified ·
1 Parent(s): 21a2e46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -24
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py
2
  import gradio as gr
3
  import os
4
  import torch
@@ -6,21 +6,13 @@ from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  # Configuration
12
  DOCS_DIR = "business_docs"
13
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
14
  MODEL_NAME = "microsoft/phi-2"
15
 
16
- # Quantization config
17
- quant_config = BitsAndBytesConfig(
18
- load_in_4bit=True,
19
- bnb_4bit_quant_type="nf4",
20
- bnb_4bit_compute_dtype=torch.float16,
21
- bnb_4bit_use_double_quant=False
22
- )
23
-
24
  def initialize_system():
25
  # Document processing
26
  if not os.path.exists(DOCS_DIR):
@@ -31,8 +23,8 @@ def initialize_system():
31
  if f.endswith(".pdf")]
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(
34
- chunk_size=800,
35
- chunk_overlap=100
36
  )
37
 
38
  texts = []
@@ -50,7 +42,7 @@ def initialize_system():
50
  # Vector store
51
  vector_store = FAISS.from_documents(texts, embeddings)
52
 
53
- # Model loading
54
  tokenizer = AutoTokenizer.from_pretrained(
55
  MODEL_NAME,
56
  trust_remote_code=True,
@@ -60,9 +52,8 @@ def initialize_system():
60
  model = AutoModelForCausalLM.from_pretrained(
61
  MODEL_NAME,
62
  trust_remote_code=True,
63
- device_map="auto",
64
- quantization_config=quant_config,
65
- torch_dtype=torch.float16
66
  )
67
 
68
  return vector_store, model, tokenizer
@@ -76,22 +67,21 @@ except Exception as e:
76
 
77
  def generate_response(query):
78
  try:
79
- docs = vector_store.similarity_search(query, k=2)
80
  context = "\n".join([d.page_content for d in docs])
81
 
82
  prompt = f"""<|system|>
83
- Answer using only this context: {context}
84
- - Max 2 sentences
85
  - If unsure: "I'll check with the team"</s>
86
  <|user|>{query}</s>
87
  <|assistant|>"""
88
 
89
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
90
  outputs = model.generate(
91
  **inputs,
92
- max_new_tokens=150,
93
- temperature=0.1,
94
- pad_token_id=tokenizer.eos_token_id
95
  )
96
 
97
  return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
@@ -99,7 +89,7 @@ def generate_response(query):
99
  except Exception as e:
100
  return "Please try again later."
101
 
102
- # Gradio interface
103
  with gr.Blocks() as demo:
104
  gr.Markdown("# Customer Service Chatbot")
105
  chatbot = gr.Chatbot()
 
1
+ # app.py (CPU-optimized)
2
  import gradio as gr
3
  import os
4
  import torch
 
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
11
  # Configuration
12
  DOCS_DIR = "business_docs"
13
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
14
  MODEL_NAME = "microsoft/phi-2"
15
 
 
 
 
 
 
 
 
 
16
  def initialize_system():
17
  # Document processing
18
  if not os.path.exists(DOCS_DIR):
 
23
  if f.endswith(".pdf")]
24
 
25
  text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=500, # Smaller chunks for CPU
27
+ chunk_overlap=50
28
  )
29
 
30
  texts = []
 
42
  # Vector store
43
  vector_store = FAISS.from_documents(texts, embeddings)
44
 
45
+ # Load model without quantization
46
  tokenizer = AutoTokenizer.from_pretrained(
47
  MODEL_NAME,
48
  trust_remote_code=True,
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
  MODEL_NAME,
54
  trust_remote_code=True,
55
+ torch_dtype=torch.float16,
56
+ device_map="cpu" # Force CPU
 
57
  )
58
 
59
  return vector_store, model, tokenizer
 
67
 
68
  def generate_response(query):
69
  try:
70
+ docs = vector_store.similarity_search(query, k=1) # Less context
71
  context = "\n".join([d.page_content for d in docs])
72
 
73
  prompt = f"""<|system|>
74
+ Answer using: {context}
75
+ - Max 1 sentence
76
  - If unsure: "I'll check with the team"</s>
77
  <|user|>{query}</s>
78
  <|assistant|>"""
79
 
80
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
81
  outputs = model.generate(
82
  **inputs,
83
+ max_new_tokens=100,
84
+ temperature=0.1
 
85
  )
86
 
87
  return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
 
89
  except Exception as e:
90
  return "Please try again later."
91
 
92
+ # Simplified interface
93
  with gr.Blocks() as demo:
94
  gr.Markdown("# Customer Service Chatbot")
95
  chatbot = gr.Chatbot()