Avinash109 commited on
Commit
3d4f049
·
verified ·
1 Parent(s): c036bc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import datetime
5
 
@@ -20,12 +20,19 @@ st.session_state.setdefault('messages', [])
20
  @st.cache_resource
21
  def load_model():
22
  model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
 
 
 
 
 
 
 
23
  tokenizer = AutoTokenizer.from_pretrained(model_name)
24
  model = AutoModelForCausalLM.from_pretrained(
25
  model_name,
 
26
  torch_dtype=torch.float16,
27
- device_map="auto",
28
- load_in_8bit=True # Optional: Use if supported for reduced memory usage
29
  )
30
  return tokenizer, model
31
 
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
  import datetime
5
 
 
20
  @st.cache_resource
21
  def load_model():
22
  model_name = "Qwen/Qwen2.5-Coder-32B-Instruct" # Replace with the correct model path
23
+
24
+ # Define BitsAndBytesConfig for 8-bit quantization
25
+ quantization_config = BitsAndBytesConfig(
26
+ load_in_8bit=True, # Enable 8-bit loading
27
+ llm_int8_enable_fp32_cpu_offload=True # Optional: Enables offloading to CPU
28
+ )
29
+
30
  tokenizer = AutoTokenizer.from_pretrained(model_name)
31
  model = AutoModelForCausalLM.from_pretrained(
32
  model_name,
33
+ quantization_config=quantization_config,
34
  torch_dtype=torch.float16,
35
+ device_map="auto"
 
36
  )
37
  return tokenizer, model
38