Spaces:

random2222
/

tryagain

Build error

App Files Files Community

random2222 commited on Apr 13

Commit

2d88065

verified ·

1 Parent(s): 2ae3591

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -22

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py (CPU-optimized)
 import gradio as gr
 import os
 import torch
@@ -23,8 +22,8 @@ def initialize_system():
                 if f.endswith(".pdf")]
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,  # Smaller chunks for CPU
-        chunk_overlap=50
     )
     texts = []
@@ -42,7 +41,7 @@ def initialize_system():
     # Vector store
     vector_store = FAISS.from_documents(texts, embeddings)
-    # Load model without quantization
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_NAME,
         trust_remote_code=True,
@@ -53,7 +52,8 @@ def initialize_system():
         MODEL_NAME,
         trust_remote_code=True,
         torch_dtype=torch.float16,
-        device_map="cpu"  # Force CPU
     )
     return vector_store, model, tokenizer
@@ -61,46 +61,59 @@ def initialize_system():
 try:
     vector_store, model, tokenizer = initialize_system()
     print("✅ System initialized successfully")
 except Exception as e:
     print(f"❌ Initialization failed: {str(e)}")
     raise
 def generate_response(query):
     try:
-        docs = vector_store.similarity_search(query, k=1)  # Less context
         context = "\n".join([d.page_content for d in docs])
         prompt = f"""<|system|>
-        Answer using: {context}
-        - Max 1 sentence
-        - If unsure: "I'll check with the team"</s>
         <|user|>{query}</s>
         <|assistant|>"""
-        inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
         outputs = model.generate(
-            **inputs,
-            max_new_tokens=100,
-            temperature=0.1
         )
-        return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
     except Exception as e:
         return "Please try again later."
-# Simplified interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Customer Service Chatbot")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="Your question")
-    clear = gr.ClearButton([msg, chatbot])
     def respond(message, history):
         response = generate_response(message)
         history.append((message, response))
         return "", history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
-demo.launch()

 import gradio as gr
 import os
 import torch
                 if f.endswith(".pdf")]
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,  # Increased chunk size for better context
+        chunk_overlap=200
     )
     texts = []
     # Vector store
     vector_store = FAISS.from_documents(texts, embeddings)
+    # Load model with memory optimization
     tokenizer = AutoTokenizer.from_pretrained(
         MODEL_NAME,
         trust_remote_code=True,
         MODEL_NAME,
         trust_remote_code=True,
         torch_dtype=torch.float16,
+        device_map="auto",
+        low_cpu_mem_usage=True
     )
     return vector_store, model, tokenizer
 try:
     vector_store, model, tokenizer = initialize_system()
     print("✅ System initialized successfully")
+    print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.1f}GB") if torch.cuda.is_available() else None
 except Exception as e:
     print(f"❌ Initialization failed: {str(e)}")
     raise
 def generate_response(query):
     try:
+        # Context retrieval
+        docs = vector_store.similarity_search(query, k=3)
         context = "\n".join([d.page_content for d in docs])
+        # Optimized prompt
         prompt = f"""<|system|>
+        You are a customer service expert. Answer using:
+        {context}
+        - Be concise (2-3 sentences)
+        - If information is missing: "Let me check with the team"
+        </s>
         <|user|>{query}</s>
         <|assistant|>"""
+        inputs = tokenizer(prompt, return_tensors="pt")
         outputs = model.generate(
+            inputs.input_ids,
+            max_new_tokens=300,
+            temperature=0.3,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
         )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response.split("<|assistant|>")[-1].strip()
     except Exception as e:
         return "Please try again later."
+# Enhanced interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Enterprise Customer Support")
+    with gr.Row():
+        chatbot = gr.Chatbot(height=500, label="Conversation")
+    with gr.Row():
+        msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
+        submit_btn = gr.Button("Send", variant="primary", scale=1)
+    clear = gr.ClearButton([msg, chatbot])
     def respond(message, history):
         response = generate_response(message)
         history.append((message, response))
         return "", history
+    submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
+demo.launch(server_port=7860)