random2222 commited on
Commit
7804d65
Β·
verified Β·
1 Parent(s): 92e169f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -41
app.py CHANGED
@@ -5,28 +5,27 @@ from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
 
10
  # Configuration
11
- DOCS_DIR = "business_docs"
12
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
- MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
14
 
15
  # System Initialization
16
  def initialize_system():
17
  # Validate documents folder
18
  if not os.path.exists(DOCS_DIR):
19
- raise FileNotFoundError(f"πŸ“ Missing business documents folder: {DOCS_DIR}")
20
 
21
- # Load and process PDFs
22
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
23
  if not pdf_files:
24
- raise ValueError(f"❌ No PDFs found in {DOCS_DIR}")
25
 
26
- # Process documents
27
  text_splitter = RecursiveCharacterTextSplitter(
28
- chunk_size=800, # Reduced for free tier memory
29
- chunk_overlap=100
30
  )
31
 
32
  documents = []
@@ -35,57 +34,49 @@ def initialize_system():
35
  loader = PyPDFLoader(pdf_path)
36
  documents.extend(loader.load_and_split(text_splitter))
37
  except Exception as e:
38
- print(f"⚠️ Error processing {pdf_path}: {str(e)}")
39
 
40
- # Create embeddings with explicit settings
41
  embeddings = HuggingFaceEmbeddings(
42
  model_name=EMBEDDING_MODEL,
43
  model_kwargs={'device': 'cpu'},
44
- encode_kwargs={'normalize_embeddings': True},
45
- cache_folder="/tmp/sentence_transformers"
46
  )
47
 
48
  vector_store = FAISS.from_documents(documents, embeddings)
49
 
50
- # 4-bit quantization config
51
- bnb_config = BitsAndBytesConfig(
52
- load_in_4bit=True,
53
- bnb_4bit_quant_type="nf4",
54
- bnb_4bit_compute_dtype=torch.float16,
55
- )
56
-
57
- # Load model with error handling
58
  try:
59
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
60
  model = AutoModelForCausalLM.from_pretrained(
61
  MODEL_NAME,
62
- quantization_config=bnb_config,
63
- device_map="auto",
64
- trust_remote_code=True
65
  )
66
  except Exception as e:
67
- raise RuntimeError(f"πŸ€– Model loading failed: {str(e)}")
68
 
69
  return vector_store, model, tokenizer
70
 
71
  # Initialize system
72
  try:
73
  vector_store, model, tokenizer = initialize_system()
74
- print("πŸš€ System ready with business documents")
75
  except Exception as e:
76
- print(f"πŸ”₯ Critical error: {str(e)}")
77
  raise
78
 
79
  # Response Generation
80
  def generate_response(query):
81
  try:
82
  # Context retrieval
83
- docs = vector_store.similarity_search(query, k=2) # Reduced context chunks
84
  context = "\n".join([d.page_content for d in docs])
85
 
86
- # Zephyr prompt template
87
  prompt = f"""<|system|>
88
- Answer ONLY using the business documents. Unknown answers: "I don't have that information."
89
 
90
  Context: {context}</s>
91
  <|user|>
@@ -94,11 +85,11 @@ def generate_response(query):
94
  """
95
 
96
  # Generate response
97
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
98
  outputs = model.generate(
99
  inputs.input_ids,
100
- max_new_tokens=256, # Reduced for faster responses
101
- temperature=0.2,
102
  do_sample=True,
103
  pad_token_id=tokenizer.eos_token_id
104
  )
@@ -106,18 +97,14 @@ def generate_response(query):
106
  return response.split("<|assistant|>")[-1].strip()
107
 
108
  except Exception as e:
109
- return f"⚠️ Error: Please try again. Details: {str(e)[:100]}"
110
 
111
  # Gradio Interface
112
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
113
- gr.Markdown("# πŸ“ž Business Support Assistant")
114
-
115
- with gr.Row():
116
- gr.Image(value="https://placehold.co/100x30?text=Company+Logo", width=100)
117
- gr.Markdown("Ask questions about our services and policies")
118
 
119
- chatbot = gr.Chatbot(height=350)
120
- msg = gr.Textbox(placeholder="Type your question...", label="")
121
  clear = gr.Button("Clear History")
122
 
123
  def respond(message, history):
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
10
  # Configuration
11
+ DOCS_DIR = ".business_docs"
12
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
+ MODEL_NAME = "microsoft/phi-3-mini-4k-instruct" # CPU-optimized model
14
 
15
  # System Initialization
16
  def initialize_system():
17
  # Validate documents folder
18
  if not os.path.exists(DOCS_DIR):
19
+ raise FileNotFoundError(f"Missing documents folder: {DOCS_DIR}")
20
 
21
+ # Process PDFs
22
  pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
23
  if not pdf_files:
24
+ raise ValueError(f"No PDFs found in {DOCS_DIR}")
25
 
 
26
  text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size=512, # Optimized for CPU
28
+ chunk_overlap=50
29
  )
30
 
31
  documents = []
 
34
  loader = PyPDFLoader(pdf_path)
35
  documents.extend(loader.load_and_split(text_splitter))
36
  except Exception as e:
37
+ print(f"Error processing {pdf_path}: {str(e)}")
38
 
39
+ # Create embeddings
40
  embeddings = HuggingFaceEmbeddings(
41
  model_name=EMBEDDING_MODEL,
42
  model_kwargs={'device': 'cpu'},
43
+ encode_kwargs={'normalize_embeddings': True}
 
44
  )
45
 
46
  vector_store = FAISS.from_documents(documents, embeddings)
47
 
48
+ # Load CPU-optimized model
 
 
 
 
 
 
 
49
  try:
50
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
51
  model = AutoModelForCausalLM.from_pretrained(
52
  MODEL_NAME,
53
+ trust_remote_code=True,
54
+ torch_dtype=torch.float32,
55
+ device_map="cpu"
56
  )
57
  except Exception as e:
58
+ raise RuntimeError(f"Model loading failed: {str(e)}")
59
 
60
  return vector_store, model, tokenizer
61
 
62
  # Initialize system
63
  try:
64
  vector_store, model, tokenizer = initialize_system()
65
+ print("βœ… System ready with business documents")
66
  except Exception as e:
67
+ print(f"❌ Initialization failed: {str(e)}")
68
  raise
69
 
70
  # Response Generation
71
  def generate_response(query):
72
  try:
73
  # Context retrieval
74
+ docs = vector_store.similarity_search(query, k=2)
75
  context = "\n".join([d.page_content for d in docs])
76
 
77
+ # Phi-3 prompt template
78
  prompt = f"""<|system|>
79
+ Answer ONLY using the business documents. Respond to unknown queries with: "This information is not available in our current documentation."
80
 
81
  Context: {context}</s>
82
  <|user|>
 
85
  """
86
 
87
  # Generate response
88
+ inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
89
  outputs = model.generate(
90
  inputs.input_ids,
91
+ max_new_tokens=200,
92
+ temperature=0.1,
93
  do_sample=True,
94
  pad_token_id=tokenizer.eos_token_id
95
  )
 
97
  return response.split("<|assistant|>")[-1].strip()
98
 
99
  except Exception as e:
100
+ return f"Error: Please try again. ({str(e)[:50]})"
101
 
102
  # Gradio Interface
103
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
104
+ gr.Markdown("# πŸ“š Business Documentation Assistant")
 
 
 
 
105
 
106
+ chatbot = gr.Chatbot(height=300)
107
+ msg = gr.Textbox(placeholder="Ask about our services...", label="")
108
  clear = gr.Button("Clear History")
109
 
110
  def respond(message, history):