Spaces:

asadsandhu
/

RAGnosis

Sleeping

asadsandhu commited on Jul 8

Commit

0619f86

1 Parent(s): ff04da1

Fixed.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,12 +30,14 @@ bnb_config = BitsAndBytesConfig(
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    quantization_config=bnb_config
-)
 # ----------------------
 # RAG Functions
@@ -69,7 +71,8 @@ You are a medical assistant trained on clinical reasoning data. Given the follow
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,

 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 generation_model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto" if torch.cuda.is_available() else None,
+    quantization_config=bnb_config if torch.cuda.is_available() else None
+).to(device)
 # ----------------------
 # RAG Functions
     return prompt
 def generate_local_answer(prompt, max_new_tokens=512):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output = generation_model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,