DesiredName commited on
Commit
51e3565
·
verified ·
1 Parent(s): 9481fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -1,9 +1,25 @@
1
  from fastapi import FastAPI
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import uvicorn
4
 
5
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
6
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  app = FastAPI()
9
 
 
1
  from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import uvicorn
4
 
5
+ bnb_config = BitsAndBytesConfig(
6
+ load_in_4bit=True, # Enable 4-bit quantization
7
+ bnb_4bit_quant_type="nf4", # Use normalized float 4
8
+ bnb_4bit_compute_dtype="float16", # Faster computations
9
+ bnb_4bit_use_double_quant=True # Extra compression
10
+ )
11
+
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ "Qwen/Qwen-7B-Chat",
14
+ quantization_config=bnb_config,
15
+ device_map="auto", # Auto-distribute across CPU/GPU
16
+ trust_remote_code=True # Required for Qwen!
17
+ )
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(
20
+ "Qwen/Qwen-7B-Chat",
21
+ trust_remote_code=True
22
+ )
23
 
24
  app = FastAPI()
25