dasomaru commited on
Commit
f0633ef
Β·
verified Β·
1 Parent(s): a91f908

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -19,13 +19,16 @@ model = AutoModelForCausalLM.from_pretrained(
19
  @spaces.GPU(duration=300)
20
  def generate_response(query):
21
  # πŸš€ generate_response ν•¨μˆ˜ μ•ˆμ—μ„œ 맀번 λ‘œλ“œ
22
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
23
- model = AutoModelForCausalLM.from_pretrained(
24
- model_name,
25
- torch_dtype=torch.float16,
26
- device_map="auto", # βœ… μ€‘μš”: μžλ™μœΌλ‘œ GPU ν• λ‹Ή
27
- trust_remote_code=True,
28
- )
 
 
 
29
 
30
  # 1. 검색
31
  top_k = 5
 
19
  @spaces.GPU(duration=300)
20
  def generate_response(query):
21
  # πŸš€ generate_response ν•¨μˆ˜ μ•ˆμ—μ„œ 맀번 λ‘œλ“œ
22
+ # tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
23
+ # model = AutoModelForCausalLM.from_pretrained(
24
+ # model_name,
25
+ # torch_dtype=torch.float16,
26
+ # device_map="auto", # βœ… μ€‘μš”: μžλ™μœΌλ‘œ GPU ν• λ‹Ή
27
+ # trust_remote_code=True,
28
+ # )
29
+ tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
30
+ model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
31
+ model.to("cuda")
32
 
33
  # 1. 검색
34
  top_k = 5