random2222 commited on
Commit
91b268b
·
verified ·
1 Parent(s): 8ca4c0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -51
app.py CHANGED
@@ -1,28 +1,30 @@
1
  import gradio as gr
2
  import os
3
- from langchain.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.vectorstores import FAISS
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
8
 
9
  # Configuration
10
  DOCS_DIR = "business_docs"
11
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
12
  MODEL_NAME = "microsoft/phi-2"
13
 
14
- # Initialize system components
15
  def initialize_system():
16
- # Load and process PDFs
17
  if not os.path.exists(DOCS_DIR):
18
- raise FileNotFoundError(f"'{DOCS_DIR}' folder not found")
19
-
20
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR)
21
  if f.endswith(".pdf")]
 
 
22
 
 
23
  text_splitter = RecursiveCharacterTextSplitter(
24
- chunk_size=1000,
25
- chunk_overlap=200
26
  )
27
 
28
  texts = []
@@ -31,79 +33,76 @@ def initialize_system():
31
  pages = loader.load_and_split(text_splitter)
32
  texts.extend(pages)
33
 
 
 
 
 
 
 
 
34
  # Create vector store
35
- embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
36
  vector_store = FAISS.from_documents(texts, embeddings)
37
 
38
- # Load Phi-2 model with 4-bit quantization
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_NAME,
42
  trust_remote_code=True,
43
  device_map="auto",
44
- load_in_4bit=True
 
45
  )
46
 
47
  return vector_store, model, tokenizer
48
 
49
  try:
50
  vector_store, model, tokenizer = initialize_system()
51
- print("System ready with business documents loaded")
52
  except Exception as e:
53
- raise RuntimeError(f"Initialization failed: {str(e)}")
54
 
55
- # Response generation
56
  def generate_response(query):
57
- # Retrieve relevant context
58
- docs = vector_store.similarity_search(query, k=3)
59
- context = "\n".join([doc.page_content for doc in docs])
60
-
61
- # Create custom prompt template
62
- prompt = f"""Instruct: Answer the customer's question using only the provided context.
63
- If you don't know the answer, say 'I need to check with our team about that.'
64
 
 
 
65
  Context: {context}
66
-
67
- Question: {query}
 
 
68
 
69
  Answer:"""
70
 
71
- # Generate response
72
  inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
73
  outputs = model.generate(
74
  **inputs,
75
- max_new_tokens=300,
76
- temperature=0.2,
77
- repetition_penalty=1.2,
78
- do_sample=True
79
  )
80
 
81
- # Decode and clean response
82
- full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
- answer = full_text.split("Answer:")[-1].strip()
84
- return answer.split("\n\n")[0] # Return first paragraph
85
 
86
- # Chat interface
87
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
88
- gr.Markdown("# Customer Care Assistant")
89
- gr.Markdown("Ask questions about our products/services")
 
 
90
 
91
- chatbot = gr.Chatbot(height=400)
92
- msg = gr.Textbox(label="Type your question here...")
93
- clear = gr.Button("Clear History")
94
-
95
- def respond(message, chat_history):
96
  try:
97
  response = generate_response(message)
98
- if not response:
99
- response = "I need to verify that information. Please contact [email protected]"
100
  except Exception as e:
101
- response = "Apologies, I'm experiencing technical difficulties. Please try again later."
102
-
103
- chat_history.append((message, response))
104
- return "", chat_history
105
 
106
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
107
- clear.click(lambda: None, None, chatbot, queue=False)
108
 
109
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import os
3
+ from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
  # Configuration
10
  DOCS_DIR = "business_docs"
11
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
12
  MODEL_NAME = "microsoft/phi-2"
13
 
 
14
  def initialize_system():
15
+ # Verify documents
16
  if not os.path.exists(DOCS_DIR):
17
+ raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
18
+
19
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR)
20
  if f.endswith(".pdf")]
21
+ if not pdf_files:
22
+ raise ValueError(f"No PDFs found in {DOCS_DIR}")
23
 
24
+ # Process documents
25
  text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=800, # Reduced for Phi-2's context window
27
+ chunk_overlap=100
28
  )
29
 
30
  texts = []
 
33
  pages = loader.load_and_split(text_splitter)
34
  texts.extend(pages)
35
 
36
+ # Create embeddings
37
+ embeddings = HuggingFaceEmbeddings(
38
+ model_name=EMBEDDING_MODEL,
39
+ model_kwargs={'device': 'cpu'}, # Force CPU for compatibility
40
+ encode_kwargs={'normalize_embeddings': False}
41
+ )
42
+
43
  # Create vector store
 
44
  vector_store = FAISS.from_documents(texts, embeddings)
45
 
46
+ # Load Phi-2 with 4-bit quantization
47
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
48
  model = AutoModelForCausalLM.from_pretrained(
49
  MODEL_NAME,
50
  trust_remote_code=True,
51
  device_map="auto",
52
+ load_in_4bit=True,
53
+ torch_dtype=torch.float16
54
  )
55
 
56
  return vector_store, model, tokenizer
57
 
58
  try:
59
  vector_store, model, tokenizer = initialize_system()
60
+ print("System initialized successfully")
61
  except Exception as e:
62
+ raise RuntimeError(f"Initialization error: {str(e)}")
63
 
 
64
  def generate_response(query):
65
+ # Retrieve context
66
+ docs = vector_store.similarity_search(query, k=2) # Fewer docs for Phi-2
67
+ context = "\n".join([d.page_content for d in docs])
 
 
 
 
68
 
69
+ # Phi-2 specific prompt format
70
+ prompt = f"""Question: {query}
71
  Context: {context}
72
+ Instructions:
73
+ - Answer only using the context
74
+ - Keep responses under 3 sentences
75
+ - If unsure, say "I'll need to check with the team"
76
 
77
  Answer:"""
78
 
 
79
  inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
80
  outputs = model.generate(
81
  **inputs,
82
+ max_new_tokens=200,
83
+ temperature=0.1,
84
+ do_sample=True,
85
+ pad_token_id=tokenizer.eos_token_id
86
  )
87
 
88
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
89
+ return response.split("Answer:")[-1].strip()
 
 
90
 
91
+ # Simplified Gradio interface
92
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
93
+ gr.Markdown("# Customer Service Chatbot")
94
+ chatbot = gr.Chatbot()
95
+ msg = gr.Textbox(label="Your question")
96
+ clear = gr.ClearButton([msg, chatbot])
97
 
98
+ def respond(message, history):
 
 
 
 
99
  try:
100
  response = generate_response(message)
101
+ return response
 
102
  except Exception as e:
103
+ return "I'm having trouble answering that right now. Please try again later."
 
 
 
104
 
105
+ msg.submit(respond, [msg, chatbot], chatbot)
106
+ msg.submit(lambda: "", None, msg)
107
 
108
+ demo.launch(server_port=7860)