random2222 commited on
Commit
2d8c319
·
verified ·
1 Parent(s): 569e45d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -86
app.py CHANGED
@@ -1,119 +1,109 @@
1
- import os
2
  import gradio as gr
3
- import torch
4
- from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_community.vectorstores import FAISS
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
  # Configuration
11
  DOCS_DIR = "business_docs"
12
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
- MODEL_NAME = "microsoft/phi-3-mini-4k-instruct" # CPU-optimized model
14
 
15
- # System Initialization
16
  def initialize_system():
17
- # Validate documents folder
18
  if not os.path.exists(DOCS_DIR):
19
- raise FileNotFoundError(f"Missing documents folder: {DOCS_DIR}")
 
 
 
20
 
21
- # Process PDFs
22
- pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
23
- if not pdf_files:
24
- raise ValueError(f"No PDFs found in {DOCS_DIR}")
25
-
26
  text_splitter = RecursiveCharacterTextSplitter(
27
- chunk_size=512, # Optimized for CPU
28
- chunk_overlap=50
29
  )
30
 
31
- documents = []
32
- for pdf_path in pdf_files:
33
- try:
34
- loader = PyPDFLoader(pdf_path)
35
- documents.extend(loader.load_and_split(text_splitter))
36
- except Exception as e:
37
- print(f"Error processing {pdf_path}: {str(e)}")
38
 
39
- # Create embeddings
40
- embeddings = HuggingFaceEmbeddings(
41
- model_name=EMBEDDING_MODEL,
42
- model_kwargs={'device': 'cpu'},
43
- encode_kwargs={'normalize_embeddings': True}
44
- )
45
-
46
- vector_store = FAISS.from_documents(documents, embeddings)
47
 
48
- # Load CPU-optimized model
49
- try:
50
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
51
- model = AutoModelForCausalLM.from_pretrained(
52
- MODEL_NAME,
53
- trust_remote_code=True,
54
- torch_dtype=torch.float32,
55
- device_map="cpu"
56
- )
57
- except Exception as e:
58
- raise RuntimeError(f"Model loading failed: {str(e)}")
59
 
60
  return vector_store, model, tokenizer
61
 
62
- # Initialize system
63
  try:
64
  vector_store, model, tokenizer = initialize_system()
65
- print("System ready with business documents")
66
  except Exception as e:
67
- print(f"Initialization failed: {str(e)}")
68
- raise
69
 
70
- # Response Generation
71
  def generate_response(query):
72
- try:
73
- # Context retrieval
74
- docs = vector_store.similarity_search(query, k=2)
75
- context = "\n".join([d.page_content for d in docs])
76
-
77
- # Phi-3 prompt template
78
- prompt = f"""<|system|>
79
- Answer ONLY using the business documents. Respond to unknown queries with: "This information is not available in our current documentation."
80
-
81
- Context: {context}</s>
82
- <|user|>
83
- {query}</s>
84
- <|assistant|>
85
- """
86
-
87
- # Generate response
88
- inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
89
- outputs = model.generate(
90
- inputs.input_ids,
91
- max_new_tokens=200,
92
- temperature=0.1,
93
- do_sample=True,
94
- pad_token_id=tokenizer.eos_token_id
95
- )
96
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
97
- return response.split("<|assistant|>")[-1].strip()
98
 
99
- except Exception as e:
100
- return f"Error: Please try again. ({str(e)[:50]})"
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- # Gradio Interface
103
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
104
- gr.Markdown("# 📚 Business Documentation Assistant")
 
105
 
106
- chatbot = gr.Chatbot(height=300)
107
- msg = gr.Textbox(placeholder="Ask about our services...", label="")
108
  clear = gr.Button("Clear History")
109
 
110
- def respond(message, history):
111
- response = generate_response(message)
112
- history.append((message, response))
113
- return "", history
 
 
 
 
 
 
114
 
115
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
116
  clear.click(lambda: None, None, chatbot, queue=False)
117
 
118
- if __name__ == "__main__":
119
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
  import gradio as gr
2
+ import os
3
+ from langchain.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
8
 
9
  # Configuration
10
  DOCS_DIR = "business_docs"
11
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
12
+ MODEL_NAME = "microsoft/phi-2"
13
 
14
+ # Initialize system components
15
  def initialize_system():
16
+ # Load and process PDFs
17
  if not os.path.exists(DOCS_DIR):
18
+ raise FileNotFoundError(f"'{DOCS_DIR}' folder not found")
19
+
20
+ pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR)
21
+ if f.endswith(".pdf")]
22
 
 
 
 
 
 
23
  text_splitter = RecursiveCharacterTextSplitter(
24
+ chunk_size=1000,
25
+ chunk_overlap=200
26
  )
27
 
28
+ texts = []
29
+ for pdf in pdf_files:
30
+ loader = PyPDFLoader(pdf)
31
+ pages = loader.load_and_split(text_splitter)
32
+ texts.extend(pages)
 
 
33
 
34
+ # Create vector store
35
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
36
+ vector_store = FAISS.from_documents(texts, embeddings)
 
 
 
 
 
37
 
38
+ # Load Phi-2 model with 4-bit quantization
39
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ MODEL_NAME,
42
+ trust_remote_code=True,
43
+ device_map="auto",
44
+ load_in_4bit=True
45
+ )
 
 
 
46
 
47
  return vector_store, model, tokenizer
48
 
 
49
  try:
50
  vector_store, model, tokenizer = initialize_system()
51
+ print("System ready with business documents loaded")
52
  except Exception as e:
53
+ raise RuntimeError(f"Initialization failed: {str(e)}")
 
54
 
55
+ # Response generation
56
  def generate_response(query):
57
+ # Retrieve relevant context
58
+ docs = vector_store.similarity_search(query, k=3)
59
+ context = "\n".join([doc.page_content for doc in docs])
60
+
61
+ # Create custom prompt template
62
+ prompt = f"""Instruct: Answer the customer's question using only the provided context.
63
+ If you don't know the answer, say 'I need to check with our team about that.'
64
+
65
+ Context: {context}
66
+
67
+ Question: {query}
68
+
69
+ Answer:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # Generate response
72
+ inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to(model.device)
73
+ outputs = model.generate(
74
+ **inputs,
75
+ max_new_tokens=300,
76
+ temperature=0.2,
77
+ repetition_penalty=1.2,
78
+ do_sample=True
79
+ )
80
+
81
+ # Decode and clean response
82
+ full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
+ answer = full_text.split("Answer:")[-1].strip()
84
+ return answer.split("\n\n")[0] # Return first paragraph
85
 
86
+ # Chat interface
87
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
88
+ gr.Markdown("# Customer Care Assistant")
89
+ gr.Markdown("Ask questions about our products/services")
90
 
91
+ chatbot = gr.Chatbot(height=400)
92
+ msg = gr.Textbox(label="Type your question here...")
93
  clear = gr.Button("Clear History")
94
 
95
+ def respond(message, chat_history):
96
+ try:
97
+ response = generate_response(message)
98
+ if not response:
99
+ response = "I need to verify that information. Please contact [email protected]"
100
+ except Exception as e:
101
+ response = "Apologies, I'm experiencing technical difficulties. Please try again later."
102
+
103
+ chat_history.append((message, response))
104
+ return "", chat_history
105
 
106
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
107
  clear.click(lambda: None, None, chatbot, queue=False)
108
 
109
+ demo.launch(server_name="0.0.0.0", server_port=7860)