Spaces:

random2222
/

tryagain

Build error

App Files Files Community

random2222 commited on Apr 13

Commit

7804d65

verified ·

1 Parent(s): 92e169f

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -41

app.py CHANGED Viewed

@@ -5,28 +5,27 @@ from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 # Configuration
-DOCS_DIR = "business_docs"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # System Initialization
 def initialize_system():
     # Validate documents folder
     if not os.path.exists(DOCS_DIR):
-        raise FileNotFoundError(f"📁 Missing business documents folder: {DOCS_DIR}")
-    # Load and process PDFs
     pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
     if not pdf_files:
-        raise ValueError(f"❌ No PDFs found in {DOCS_DIR}")
-    # Process documents
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=800,  # Reduced for free tier memory
-        chunk_overlap=100
     )
     documents = []
@@ -35,57 +34,49 @@ def initialize_system():
             loader = PyPDFLoader(pdf_path)
             documents.extend(loader.load_and_split(text_splitter))
         except Exception as e:
-            print(f"⚠️ Error processing {pdf_path}: {str(e)}")
-    # Create embeddings with explicit settings
     embeddings = HuggingFaceEmbeddings(
         model_name=EMBEDDING_MODEL,
         model_kwargs={'device': 'cpu'},
-        encode_kwargs={'normalize_embeddings': True},
-        cache_folder="/tmp/sentence_transformers"
     )
     vector_store = FAISS.from_documents(documents, embeddings)
-    # 4-bit quantization config
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.float16,
-    )
-    # Load model with error handling
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
-            quantization_config=bnb_config,
-            device_map="auto",
-            trust_remote_code=True
         )
     except Exception as e:
-        raise RuntimeError(f"🤖 Model loading failed: {str(e)}")
     return vector_store, model, tokenizer
 # Initialize system
 try:
     vector_store, model, tokenizer = initialize_system()
-    print("🚀 System ready with business documents")
 except Exception as e:
-    print(f"🔥 Critical error: {str(e)}")
     raise
 # Response Generation
 def generate_response(query):
     try:
         # Context retrieval
-        docs = vector_store.similarity_search(query, k=2)  # Reduced context chunks
         context = "\n".join([d.page_content for d in docs])
-        # Zephyr prompt template
         prompt = f"""<|system|>
-        Answer ONLY using the business documents. Unknown answers: "I don't have that information."
         Context: {context}</s>
         <|user|>
@@ -94,11 +85,11 @@ def generate_response(query):
         """
         # Generate response
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs.input_ids,
-            max_new_tokens=256,  # Reduced for faster responses
-            temperature=0.2,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
@@ -106,18 +97,14 @@ def generate_response(query):
         return response.split("<|assistant|>")[-1].strip()
     except Exception as e:
-        return f"⚠️ Error: Please try again. Details: {str(e)[:100]}"
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📞 Business Support Assistant")
-    with gr.Row():
-        gr.Image(value="https://placehold.co/100x30?text=Company+Logo", width=100)
-        gr.Markdown("Ask questions about our services and policies")
-    chatbot = gr.Chatbot(height=350)
-    msg = gr.Textbox(placeholder="Type your question...", label="")
     clear = gr.Button("Clear History")
     def respond(message, history):

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from transformers import AutoModelForCausalLM, AutoTokenizer
 # Configuration
+DOCS_DIR = ".business_docs"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+MODEL_NAME = "microsoft/phi-3-mini-4k-instruct"  # CPU-optimized model
 # System Initialization
 def initialize_system():
     # Validate documents folder
     if not os.path.exists(DOCS_DIR):
+        raise FileNotFoundError(f"Missing documents folder: {DOCS_DIR}")
+    # Process PDFs
     pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
     if not pdf_files:
+        raise ValueError(f"No PDFs found in {DOCS_DIR}")
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=512,  # Optimized for CPU
+        chunk_overlap=50
     )
     documents = []
             loader = PyPDFLoader(pdf_path)
             documents.extend(loader.load_and_split(text_splitter))
         except Exception as e:
+            print(f"Error processing {pdf_path}: {str(e)}")
+    # Create embeddings
     embeddings = HuggingFaceEmbeddings(
         model_name=EMBEDDING_MODEL,
         model_kwargs={'device': 'cpu'},
+        encode_kwargs={'normalize_embeddings': True}
     )
     vector_store = FAISS.from_documents(documents, embeddings)
+    # Load CPU-optimized model
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
+            trust_remote_code=True,
+            torch_dtype=torch.float32,
+            device_map="cpu"
         )
     except Exception as e:
+        raise RuntimeError(f"Model loading failed: {str(e)}")
     return vector_store, model, tokenizer
 # Initialize system
 try:
     vector_store, model, tokenizer = initialize_system()
+    print("✅ System ready with business documents")
 except Exception as e:
+    print(f"❌ Initialization failed: {str(e)}")
     raise
 # Response Generation
 def generate_response(query):
     try:
         # Context retrieval
+        docs = vector_store.similarity_search(query, k=2)
         context = "\n".join([d.page_content for d in docs])
+        # Phi-3 prompt template
         prompt = f"""<|system|>
+        Answer ONLY using the business documents. Respond to unknown queries with: "This information is not available in our current documentation."
         Context: {context}</s>
         <|user|>
         """
         # Generate response
+        inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
         outputs = model.generate(
             inputs.input_ids,
+            max_new_tokens=200,
+            temperature=0.1,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
         return response.split("<|assistant|>")[-1].strip()
     except Exception as e:
+        return f"Error: Please try again. ({str(e)[:50]})"
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📚 Business Documentation Assistant")
+    chatbot = gr.Chatbot(height=300)
+    msg = gr.Textbox(placeholder="Ask about our services...", label="")
     clear = gr.Button("Clear History")
     def respond(message, history):