random2222 commited on
Commit
2d88065
·
verified ·
1 Parent(s): 2ae3591

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -22
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py (CPU-optimized)
2
  import gradio as gr
3
  import os
4
  import torch
@@ -23,8 +22,8 @@ def initialize_system():
23
  if f.endswith(".pdf")]
24
 
25
  text_splitter = RecursiveCharacterTextSplitter(
26
- chunk_size=500, # Smaller chunks for CPU
27
- chunk_overlap=50
28
  )
29
 
30
  texts = []
@@ -42,7 +41,7 @@ def initialize_system():
42
  # Vector store
43
  vector_store = FAISS.from_documents(texts, embeddings)
44
 
45
- # Load model without quantization
46
  tokenizer = AutoTokenizer.from_pretrained(
47
  MODEL_NAME,
48
  trust_remote_code=True,
@@ -53,7 +52,8 @@ def initialize_system():
53
  MODEL_NAME,
54
  trust_remote_code=True,
55
  torch_dtype=torch.float16,
56
- device_map="cpu" # Force CPU
 
57
  )
58
 
59
  return vector_store, model, tokenizer
@@ -61,46 +61,59 @@ def initialize_system():
61
  try:
62
  vector_store, model, tokenizer = initialize_system()
63
  print("✅ System initialized successfully")
 
64
  except Exception as e:
65
  print(f"❌ Initialization failed: {str(e)}")
66
  raise
67
 
68
  def generate_response(query):
69
  try:
70
- docs = vector_store.similarity_search(query, k=1) # Less context
 
71
  context = "\n".join([d.page_content for d in docs])
72
 
 
73
  prompt = f"""<|system|>
74
- Answer using: {context}
75
- - Max 1 sentence
76
- - If unsure: "I'll check with the team"</s>
 
 
77
  <|user|>{query}</s>
78
  <|assistant|>"""
79
 
80
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
81
  outputs = model.generate(
82
- **inputs,
83
- max_new_tokens=100,
84
- temperature=0.1
 
 
85
  )
86
 
87
- return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
 
88
 
89
  except Exception as e:
90
  return "Please try again later."
91
 
92
- # Simplified interface
93
- with gr.Blocks() as demo:
94
- gr.Markdown("# Customer Service Chatbot")
95
- chatbot = gr.Chatbot()
96
- msg = gr.Textbox(label="Your question")
97
- clear = gr.ClearButton([msg, chatbot])
 
 
98
 
 
 
99
  def respond(message, history):
100
  response = generate_response(message)
101
  history.append((message, response))
102
  return "", history
103
-
 
104
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
105
 
106
- demo.launch()
 
 
1
  import gradio as gr
2
  import os
3
  import torch
 
22
  if f.endswith(".pdf")]
23
 
24
  text_splitter = RecursiveCharacterTextSplitter(
25
+ chunk_size=1000, # Increased chunk size for better context
26
+ chunk_overlap=200
27
  )
28
 
29
  texts = []
 
41
  # Vector store
42
  vector_store = FAISS.from_documents(texts, embeddings)
43
 
44
+ # Load model with memory optimization
45
  tokenizer = AutoTokenizer.from_pretrained(
46
  MODEL_NAME,
47
  trust_remote_code=True,
 
52
  MODEL_NAME,
53
  trust_remote_code=True,
54
  torch_dtype=torch.float16,
55
+ device_map="auto",
56
+ low_cpu_mem_usage=True
57
  )
58
 
59
  return vector_store, model, tokenizer
 
61
  try:
62
  vector_store, model, tokenizer = initialize_system()
63
  print("✅ System initialized successfully")
64
+ print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.1f}GB") if torch.cuda.is_available() else None
65
  except Exception as e:
66
  print(f"❌ Initialization failed: {str(e)}")
67
  raise
68
 
69
  def generate_response(query):
70
  try:
71
+ # Context retrieval
72
+ docs = vector_store.similarity_search(query, k=3)
73
  context = "\n".join([d.page_content for d in docs])
74
 
75
+ # Optimized prompt
76
  prompt = f"""<|system|>
77
+ You are a customer service expert. Answer using:
78
+ {context}
79
+ - Be concise (2-3 sentences)
80
+ - If information is missing: "Let me check with the team"
81
+ </s>
82
  <|user|>{query}</s>
83
  <|assistant|>"""
84
 
85
+ inputs = tokenizer(prompt, return_tensors="pt")
86
  outputs = model.generate(
87
+ inputs.input_ids,
88
+ max_new_tokens=300,
89
+ temperature=0.3,
90
+ do_sample=True,
91
+ pad_token_id=tokenizer.eos_token_id
92
  )
93
 
94
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
95
+ return response.split("<|assistant|>")[-1].strip()
96
 
97
  except Exception as e:
98
  return "Please try again later."
99
 
100
+ # Enhanced interface
101
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
+ gr.Markdown("# Enterprise Customer Support")
103
+ with gr.Row():
104
+ chatbot = gr.Chatbot(height=500, label="Conversation")
105
+ with gr.Row():
106
+ msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
107
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
108
 
109
+ clear = gr.ClearButton([msg, chatbot])
110
+
111
  def respond(message, history):
112
  response = generate_response(message)
113
  history.append((message, response))
114
  return "", history
115
+
116
+ submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
117
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
118
 
119
+ demo.launch(server_port=7860)