amiguel commited on
Commit
1836de9
Β·
verified Β·
1 Parent(s): 3472886

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -73
app.py CHANGED
@@ -16,38 +16,35 @@ except ImportError:
16
  "Please install it using: `pip install peft`"
17
  )
18
 
 
 
 
19
  # Set page configuration
20
  st.set_page_config(
21
- page_title="WizNerd Insp",
22
  page_icon="πŸš€",
23
  layout="centered"
24
  )
25
 
26
- # Model names
27
- BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" #"meta-llama/Meta-Llama-3.1-8B-Instruct", "google-bert/bert-base-uncased" #"meta-llama/Meta-Llama-3.1-8B-Instruct" #"HuggingFaceTB/SmolLM2-360M"
28
  MODEL_OPTIONS = {
29
- "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw", #"amiguel/instruct_BERT-base-uncased_model", #"amiguel/playbook_FT",#"amiguel/SmolLM2-360M-concise-reasoning",
30
  "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
31
- "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora" # Hypothetical, adjust if needed
32
  }
33
 
34
- # Title with rocket emojis
35
  st.title("πŸš€ WizNerd Insp πŸš€")
36
 
37
- # Configure Avatars
38
  USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
39
  BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
40
 
41
- # Sidebar configuration
42
  with st.sidebar:
43
- st.header("Authentication πŸ”’")
44
- hf_token = st.text_input("Hugging Face Token", type="password",
45
- help="Get your token from https://huggingface.co/settings/tokens")
46
-
47
  st.header("Model Selection πŸ€–")
48
  model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
49
  selected_model = MODEL_OPTIONS[model_type]
50
-
51
  st.header("Upload Documents πŸ“‚")
52
  uploaded_file = st.file_uploader(
53
  "Choose a PDF or XLSX file",
@@ -55,11 +52,11 @@ with st.sidebar:
55
  label_visibility="collapsed"
56
  )
57
 
58
- # Initialize chat history
59
  if "messages" not in st.session_state:
60
  st.session_state.messages = []
61
 
62
- # File processing function
63
  @st.cache_data
64
  def process_file(uploaded_file):
65
  if uploaded_file is None:
@@ -76,51 +73,42 @@ def process_file(uploaded_file):
76
  st.error(f"πŸ“„ Error processing file: {str(e)}")
77
  return ""
78
 
79
- # Model loading function
80
  @st.cache_resource
81
- def load_model(hf_token, model_type, selected_model):
82
  try:
83
- if not hf_token:
84
- st.error("πŸ” Authentication required! Please provide a Hugging Face token.")
85
- return None
86
-
87
- login(token=hf_token)
88
-
89
- # Load tokenizer
90
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=hf_token)
91
-
92
- # Load model based on type
93
  if model_type == "Full Fine-Tuned":
94
- # Load full fine-tuned model directly
95
  model = AutoModelForCausalLM.from_pretrained(
96
  selected_model,
97
  torch_dtype=torch.bfloat16,
98
  device_map="auto",
99
- token=hf_token
100
  )
101
  else:
102
- # Load base model and apply PEFT adapter
103
  base_model = AutoModelForCausalLM.from_pretrained(
104
  BASE_MODEL_NAME,
105
  torch_dtype=torch.bfloat16,
106
  device_map="auto",
107
- token=hf_token
108
  )
109
  model = PeftModel.from_pretrained(
110
  base_model,
111
  selected_model,
112
  torch_dtype=torch.bfloat16,
113
- is_trainable=False, # Inference mode
114
- token=hf_token
115
  )
116
-
117
  return model, tokenizer
118
-
119
  except Exception as e:
120
  st.error(f"πŸ€– Model loading failed: {str(e)}")
121
  return None
122
 
123
- # Generation function with KV caching
124
  def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
125
  full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
126
 
@@ -147,81 +135,68 @@ def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=Tru
147
  Thread(target=model.generate, kwargs=generation_kwargs).start()
148
  return streamer
149
 
150
- # Display chat messages
151
  for message in st.session_state.messages:
152
- try:
153
- avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
154
- with st.chat_message(message["role"], avatar=avatar):
155
- st.markdown(message["content"])
156
- except:
157
- with st.chat_message(message["role"]):
158
- st.markdown(message["content"])
159
-
160
- # Chat input handling
161
  if prompt := st.chat_input("Ask your inspection question..."):
162
- if not hf_token:
163
- st.error("πŸ”‘ Authentication required!")
164
- st.stop()
165
 
166
- # Load model if not already loaded or if model type changed
167
  if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
168
- model_data = load_model(hf_token, model_type, selected_model)
169
  if model_data is None:
170
- st.error("Failed to load model. Please check your token and try again.")
171
  st.stop()
172
-
173
  st.session_state.model, st.session_state.tokenizer = model_data
174
  st.session_state.model_type = model_type
175
-
176
  model = st.session_state.model
177
  tokenizer = st.session_state.tokenizer
178
-
179
- # Add user message
180
  with st.chat_message("user", avatar=USER_AVATAR):
181
  st.markdown(prompt)
182
  st.session_state.messages.append({"role": "user", "content": prompt})
183
 
184
- # Process file
185
  file_context = process_file(uploaded_file)
186
-
187
- # Generate response with KV caching
188
  if model and tokenizer:
189
  try:
190
  with st.chat_message("assistant", avatar=BOT_AVATAR):
191
  start_time = time.time()
192
  streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)
193
-
194
  response_container = st.empty()
195
  full_response = ""
196
-
197
  for chunk in streamer:
198
  cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
199
  full_response += cleaned_chunk + " "
200
  response_container.markdown(full_response + "β–Œ", unsafe_allow_html=True)
201
-
202
- # Calculate performance metrics
203
  end_time = time.time()
204
  input_tokens = len(tokenizer(prompt)["input_ids"])
205
  output_tokens = len(tokenizer(full_response)["input_ids"])
206
  speed = output_tokens / (end_time - start_time)
207
-
208
- # Calculate costs (hypothetical pricing model)
209
- input_cost = (input_tokens / 1000000) * 5 # $5 per million input tokens
210
- output_cost = (output_tokens / 1000000) * 15 # $15 per million output tokens
211
  total_cost_usd = input_cost + output_cost
212
- total_cost_aoa = total_cost_usd * 1160 # Convert to AOA (Angolan Kwanza)
213
-
214
- # Display metrics
215
  st.caption(
216
  f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
217
  f"πŸ•’ Speed: {speed:.1f}t/s | πŸ’° Cost (USD): ${total_cost_usd:.4f} | "
218
  f"πŸ’΅ Cost (AOA): {total_cost_aoa:.4f}"
219
  )
220
-
221
  response_container.markdown(full_response)
222
  st.session_state.messages.append({"role": "assistant", "content": full_response})
223
-
224
  except Exception as e:
225
  st.error(f"⚑ Generation error: {str(e)}")
226
  else:
227
- st.error("πŸ€– Model not loaded!")
 
16
  "Please install it using: `pip install peft`"
17
  )
18
 
19
+ # πŸ” Hardcoded Hugging Face Token
20
+ HF_TOKEN = HF_TOKEN # Replace with your actual token
21
+
22
  # Set page configuration
23
  st.set_page_config(
24
+ page_title="Assistente LGT | Angola",
25
  page_icon="πŸš€",
26
  layout="centered"
27
  )
28
 
29
+ # Model base and options
30
+ BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
31
  MODEL_OPTIONS = {
32
+ "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw",
33
  "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
34
+ "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"
35
  }
36
 
 
37
  st.title("πŸš€ WizNerd Insp πŸš€")
38
 
 
39
  USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
40
  BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
41
 
42
+ # Sidebar
43
  with st.sidebar:
 
 
 
 
44
  st.header("Model Selection πŸ€–")
45
  model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
46
  selected_model = MODEL_OPTIONS[model_type]
47
+
48
  st.header("Upload Documents πŸ“‚")
49
  uploaded_file = st.file_uploader(
50
  "Choose a PDF or XLSX file",
 
52
  label_visibility="collapsed"
53
  )
54
 
55
+ # Session state
56
  if "messages" not in st.session_state:
57
  st.session_state.messages = []
58
 
59
+ # File processor
60
  @st.cache_data
61
  def process_file(uploaded_file):
62
  if uploaded_file is None:
 
73
  st.error(f"πŸ“„ Error processing file: {str(e)}")
74
  return ""
75
 
76
+ # Model loader
77
  @st.cache_resource
78
+ def load_model(model_type, selected_model):
79
  try:
80
+ login(token=HF_TOKEN)
81
+
82
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=HF_TOKEN)
83
+
 
 
 
 
 
 
84
  if model_type == "Full Fine-Tuned":
 
85
  model = AutoModelForCausalLM.from_pretrained(
86
  selected_model,
87
  torch_dtype=torch.bfloat16,
88
  device_map="auto",
89
+ token=HF_TOKEN
90
  )
91
  else:
 
92
  base_model = AutoModelForCausalLM.from_pretrained(
93
  BASE_MODEL_NAME,
94
  torch_dtype=torch.bfloat16,
95
  device_map="auto",
96
+ token=HF_TOKEN
97
  )
98
  model = PeftModel.from_pretrained(
99
  base_model,
100
  selected_model,
101
  torch_dtype=torch.bfloat16,
102
+ is_trainable=False,
103
+ token=HF_TOKEN
104
  )
 
105
  return model, tokenizer
106
+
107
  except Exception as e:
108
  st.error(f"πŸ€– Model loading failed: {str(e)}")
109
  return None
110
 
111
+ # Generation function
112
  def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
113
  full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
114
 
 
135
  Thread(target=model.generate, kwargs=generation_kwargs).start()
136
  return streamer
137
 
138
+ # Display chat history
139
  for message in st.session_state.messages:
140
+ avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
141
+ with st.chat_message(message["role"], avatar=avatar):
142
+ st.markdown(message["content"])
143
+
144
+ # Prompt interaction
 
 
 
 
145
  if prompt := st.chat_input("Ask your inspection question..."):
 
 
 
146
 
147
+ # Load model if necessary
148
  if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
149
+ model_data = load_model(model_type, selected_model)
150
  if model_data is None:
151
+ st.error("Failed to load model.")
152
  st.stop()
153
+
154
  st.session_state.model, st.session_state.tokenizer = model_data
155
  st.session_state.model_type = model_type
156
+
157
  model = st.session_state.model
158
  tokenizer = st.session_state.tokenizer
159
+
 
160
  with st.chat_message("user", avatar=USER_AVATAR):
161
  st.markdown(prompt)
162
  st.session_state.messages.append({"role": "user", "content": prompt})
163
 
 
164
  file_context = process_file(uploaded_file)
165
+
 
166
  if model and tokenizer:
167
  try:
168
  with st.chat_message("assistant", avatar=BOT_AVATAR):
169
  start_time = time.time()
170
  streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)
171
+
172
  response_container = st.empty()
173
  full_response = ""
174
+
175
  for chunk in streamer:
176
  cleaned_chunk = chunk.replace("<think>", "").replace("</think>", "").strip()
177
  full_response += cleaned_chunk + " "
178
  response_container.markdown(full_response + "β–Œ", unsafe_allow_html=True)
179
+
 
180
  end_time = time.time()
181
  input_tokens = len(tokenizer(prompt)["input_ids"])
182
  output_tokens = len(tokenizer(full_response)["input_ids"])
183
  speed = output_tokens / (end_time - start_time)
184
+
185
+ input_cost = (input_tokens / 1_000_000) * 5
186
+ output_cost = (output_tokens / 1_000_000) * 15
 
187
  total_cost_usd = input_cost + output_cost
188
+ total_cost_aoa = total_cost_usd * 1160
189
+
 
190
  st.caption(
191
  f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
192
  f"πŸ•’ Speed: {speed:.1f}t/s | πŸ’° Cost (USD): ${total_cost_usd:.4f} | "
193
  f"πŸ’΅ Cost (AOA): {total_cost_aoa:.4f}"
194
  )
195
+
196
  response_container.markdown(full_response)
197
  st.session_state.messages.append({"role": "assistant", "content": full_response})
198
+
199
  except Exception as e:
200
  st.error(f"⚑ Generation error: {str(e)}")
201
  else:
202
+ st.error("πŸ€– Model not loaded!")