Spaces:

Avinash109
/

qwen2.5

Sleeping

Avinash109 commited on Nov 12, 2024

Commit

3d4f049

verified ·

1 Parent(s): c036bc9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import datetime
@@ -20,12 +20,19 @@ st.session_state.setdefault('messages', [])
 @st.cache_resource
 def load_model():
     model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with the correct model path
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
-        device_map="auto",
-        load_in_8bit=True  # Optional: Use if supported for reduced memory usage
     )
     return tokenizer, model

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 import datetime
 @st.cache_resource
 def load_model():
     model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"  # Replace with the correct model path
+    # Define BitsAndBytesConfig for 8-bit quantization
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=True,                        # Enable 8-bit loading
+        llm_int8_enable_fp32_cpu_offload=True     # Optional: Enables offloading to CPU
+    )
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        quantization_config=quantization_config,
         torch_dtype=torch.float16,
+        device_map="auto"
     )
     return tokenizer, model