dasomaru commited on
Commit
576739c
ยท
verified ยท
1 Parent(s): 336f693

Update generator/llm_inference.py

Browse files
Files changed (1) hide show
  1. generator/llm_inference.py +30 -27
generator/llm_inference.py CHANGED
@@ -1,27 +1,30 @@
1
- from transformers import pipeline
2
-
3
- # 1. ๋ชจ๋ธ ๋กœ๋“œ (์ตœ์ดˆ 1๋ฒˆ๋งŒ ๋กœ๋“œ๋จ)
4
- generator = pipeline(
5
- "text-generation",
6
- model="dasomaru/gemma-3-4bit-it-demo", # ๋„ค๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ ์ด๋ฆ„
7
- tokenizer="dasomaru/gemma-3-4bit-it-demo",
8
- device=0, # CUDA:0 ์‚ฌ์šฉ (GPU). CPU๋งŒ ์žˆ์œผ๋ฉด device=-1
9
- max_new_tokens=512,
10
- temperature=0.7,
11
- top_p=0.9,
12
- repetition_penalty=1.1
13
- )
14
-
15
- # 2. ๋‹ต๋ณ€ ์ƒ์„ฑ ํ•จ์ˆ˜
16
- def generate_answer(prompt: str) -> str:
17
- """
18
- ์ž…๋ ฅ๋ฐ›์€ ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋ชจ๋ธ์ด ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•œ๋‹ค.
19
- """
20
- print(f"๐Ÿ”ต Prompt Length: {len(prompt)} characters") # ์ถ”๊ฐ€!
21
- outputs = generator(
22
- prompt,
23
- do_sample=True,
24
- top_k=50,
25
- num_return_sequences=1
26
- )
27
- return outputs[0]["generated_text"].strip()
 
 
 
 
1
+ from transformers import pipeline
2
+ import spaces
3
+
4
+ # 1. ๋ชจ๋ธ ๋กœ๋“œ (์ตœ์ดˆ 1๋ฒˆ๋งŒ ๋กœ๋“œ๋จ)
5
+ generator = pipeline(
6
+ "text-generation",
7
+ model="dasomaru/gemma-3-4bit-it-demo", # ๋„ค๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ ์ด๋ฆ„
8
+ tokenizer="dasomaru/gemma-3-4bit-it-demo",
9
+ device=0, # CUDA:0 ์‚ฌ์šฉ (GPU). CPU๋งŒ ์žˆ์œผ๋ฉด device=-1
10
+ max_new_tokens=512,
11
+ temperature=0.7,
12
+ top_p=0.9,
13
+ repetition_penalty=1.1
14
+ )
15
+
16
+ # 2. ๋‹ต๋ณ€ ์ƒ์„ฑ ํ•จ์ˆ˜
17
+ @spaces.GPU(duration=300)
18
+ def generate_answer(prompt: str) -> str:
19
+ """
20
+ ์ž…๋ ฅ๋ฐ›์€ ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋ชจ๋ธ์ด ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•œ๋‹ค.
21
+ """
22
+ print(f"๐Ÿ”ต Prompt Length: {len(prompt)} characters") # ์ถ”๊ฐ€!
23
+ outputs = generator(
24
+ prompt,
25
+ do_sample=True,
26
+ top_k=50,
27
+ num_return_sequences=1
28
+ )
29
+ return outputs[0]["generated_text"].strip()
30
+