malvin noel commited on
Commit
ac57303
Β·
1 Parent(s): ef107f6

change script

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -41,25 +41,22 @@ ASSETS_DIRS = (
41
  # ────────────────────────────────────────────────────────
42
  # CONFIGURATION
43
  # ────────────────────────────────────────────────────────
44
- MODEL_ID = os.getenv("MODEL_ID", "unsloth/Qwen3-30B-A3B")
45
- USE_INT8 = os.getenv("USE_INT8", "0") == "1" # set env var to 1 for 8‑bit
46
- THINKING = os.getenv("DEFAULT_THINKING", "1") == "1" # default reasoning mode
47
 
48
- DTYPE = torch.bfloat16 # Hopper bf16 fast‑path
49
- bnb_cfg = BitsAndBytesConfig(load_in_8bit=True) if USE_INT8 else None
50
-
51
- print(f"πŸ”„ Loading {MODEL_ID} ({'8‑bit' if USE_INT8 else 'bf16'}) …")
52
 
53
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
54
  model = AutoModelForCausalLM.from_pretrained(
55
  MODEL_ID,
56
- device_map="auto",
57
- torch_dtype=(None if USE_INT8 else DTYPE),
58
- quantization_config=bnb_cfg,
59
  trust_remote_code=True,
60
- ).eval()
61
- DEVICE = model.device
62
- print("βœ… Model ready.")
 
 
63
 
64
  for d in ASSETS_DIRS:
65
  os.makedirs(d, exist_ok=True)
 
41
  # ────────────────────────────────────────────────────────
42
  # CONFIGURATION
43
  # ────────────────────────────────────────────────────────
44
+ MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-8B")
45
+ DTYPE = torch.bfloat16 # or torch.float16
 
46
 
47
+ print(f"πŸ”„ Loading {MODEL_ID} (dtype = {DTYPE}) …")
 
 
 
48
 
49
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
50
+
51
  model = AutoModelForCausalLM.from_pretrained(
52
  MODEL_ID,
53
+ torch_dtype=DTYPE,
 
 
54
  trust_remote_code=True,
55
+ )
56
+ model.to("cuda" if torch.cuda.is_available() else "cpu") # single-device move
57
+
58
+ DEVICE = next(model.parameters()).device
59
+ print(f"βœ… Model ready on {DEVICE}.")
60
 
61
  for d in ASSETS_DIRS:
62
  os.makedirs(d, exist_ok=True)