random2222 commited on
Commit
4d6816c
Β·
verified Β·
1 Parent(s): 2d88065

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -28
app.py CHANGED
@@ -22,7 +22,7 @@ def initialize_system():
22
  if f.endswith(".pdf")]
23
 
24
  text_splitter = RecursiveCharacterTextSplitter(
25
- chunk_size=1000, # Increased chunk size for better context
26
  chunk_overlap=200
27
  )
28
 
@@ -41,17 +41,14 @@ def initialize_system():
41
  # Vector store
42
  vector_store = FAISS.from_documents(texts, embeddings)
43
 
44
- # Load model with memory optimization
45
- tokenizer = AutoTokenizer.from_pretrained(
46
- MODEL_NAME,
47
- trust_remote_code=True,
48
- padding_side="left"
49
- )
50
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
  MODEL_NAME,
53
  trust_remote_code=True,
54
- torch_dtype=torch.float16,
55
  device_map="auto",
56
  low_cpu_mem_usage=True
57
  )
@@ -61,7 +58,11 @@ def initialize_system():
61
  try:
62
  vector_store, model, tokenizer = initialize_system()
63
  print("βœ… System initialized successfully")
64
- print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.1f}GB") if torch.cuda.is_available() else None
 
 
 
 
65
  except Exception as e:
66
  print(f"❌ Initialization failed: {str(e)}")
67
  raise
@@ -71,18 +72,15 @@ def generate_response(query):
71
  # Context retrieval
72
  docs = vector_store.similarity_search(query, k=3)
73
  context = "\n".join([d.page_content for d in docs])
 
 
 
 
 
 
 
74
 
75
- # Optimized prompt
76
- prompt = f"""<|system|>
77
- You are a customer service expert. Answer using:
78
- {context}
79
- - Be concise (2-3 sentences)
80
- - If information is missing: "Let me check with the team"
81
- </s>
82
- <|user|>{query}</s>
83
- <|assistant|>"""
84
-
85
- inputs = tokenizer(prompt, return_tensors="pt")
86
  outputs = model.generate(
87
  inputs.input_ids,
88
  max_new_tokens=300,
@@ -92,20 +90,21 @@ def generate_response(query):
92
  )
93
 
94
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
95
- return response.split("<|assistant|>")[-1].strip()
96
 
97
  except Exception as e:
98
- return "Please try again later."
99
 
100
- # Enhanced interface
101
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
- gr.Markdown("# Enterprise Customer Support")
103
- with gr.Row():
104
- chatbot = gr.Chatbot(height=500, label="Conversation")
 
105
  with gr.Row():
106
  msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
107
  submit_btn = gr.Button("Send", variant="primary", scale=1)
108
-
109
  clear = gr.ClearButton([msg, chatbot])
110
 
111
  def respond(message, history):
@@ -116,4 +115,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
116
  submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
117
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
118
 
119
- demo.launch(server_port=7860)
 
22
  if f.endswith(".pdf")]
23
 
24
  text_splitter = RecursiveCharacterTextSplitter(
25
+ chunk_size=1000,
26
  chunk_overlap=200
27
  )
28
 
 
41
  # Vector store
42
  vector_store = FAISS.from_documents(texts, embeddings)
43
 
44
+ # Load model and tokenizer
45
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
46
+ tokenizer.pad_token = tokenizer.eos_token # Fix padding issue
 
 
 
47
 
48
  model = AutoModelForCausalLM.from_pretrained(
49
  MODEL_NAME,
50
  trust_remote_code=True,
51
+ torch_dtype=torch.float32 if not torch.cuda.is_available() else torch.float16,
52
  device_map="auto",
53
  low_cpu_mem_usage=True
54
  )
 
58
  try:
59
  vector_store, model, tokenizer = initialize_system()
60
  print("βœ… System initialized successfully")
61
+ if torch.cuda.is_available():
62
+ print("πŸš€ Using CUDA")
63
+ print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
64
+ else:
65
+ print("🧠 Using CPU")
66
  except Exception as e:
67
  print(f"❌ Initialization failed: {str(e)}")
68
  raise
 
72
  # Context retrieval
73
  docs = vector_store.similarity_search(query, k=3)
74
  context = "\n".join([d.page_content for d in docs])
75
+
76
+ # Prompt template optimized for Phi-2
77
+ prompt = f"""Context:
78
+ {context}
79
+
80
+ Question: {query}
81
+ Answer:"""
82
 
83
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
84
  outputs = model.generate(
85
  inputs.input_ids,
86
  max_new_tokens=300,
 
90
  )
91
 
92
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
93
+ return response.split("Answer:")[-1].strip()
94
 
95
  except Exception as e:
96
+ return "Sorry, an error occurred while generating a response."
97
 
98
+ # Gradio UI
99
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
+ gr.Markdown("# 🧠 Enterprise Customer Support Chatbot")
101
+
102
+ chatbot = gr.Chatbot(height=500, label="Conversation")
103
+
104
  with gr.Row():
105
  msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
106
  submit_btn = gr.Button("Send", variant="primary", scale=1)
107
+
108
  clear = gr.ClearButton([msg, chatbot])
109
 
110
  def respond(message, history):
 
115
  submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
116
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
117
 
118
+ demo.launch(server_port=7860)