Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import random
|
@@ -12,7 +13,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
12 |
REPO = "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF"
|
13 |
FILE = "smollm2-1.7b-instruct-q4_k_m.gguf"
|
14 |
|
15 |
-
TIMEOUT =
|
16 |
|
17 |
MAX_SEED = 9007199254740991
|
18 |
|
@@ -68,13 +69,17 @@ def generate(prompt, temperature, top_p, top_k, repetition_penalty, max_tokens,
|
|
68 |
finally:
|
69 |
timer.cancel()
|
70 |
|
|
|
|
|
|
|
|
|
71 |
# Initialize
|
72 |
model_base = "Any"
|
73 |
model_quant = "Any Quant"
|
74 |
|
75 |
with gr.Blocks() as demo:
|
76 |
-
gr.Markdown("# 👁️🗨️
|
77 |
-
gr.Markdown("• ⚡ A text generation inference for
|
78 |
gr.Markdown("• ⚠️ **WARNING!** The inference is very slow due to the model being **HUGE**; it takes about 10 seconds before it starts generating. Please avoid high max token parameters and sending large amounts of text. Note it uses CPU because running it on GPU overloads the model.")
|
79 |
gr.Markdown(f"• 🔗 Link to models: [{model_base}]({model_base}) (BASE), [{model_quant}]({model_quant}) (QUANT)")
|
80 |
|
|
|
1 |
+
# Imports
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
import random
|
|
|
13 |
REPO = "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF"
|
14 |
FILE = "smollm2-1.7b-instruct-q4_k_m.gguf"
|
15 |
|
16 |
+
TIMEOUT = 60
|
17 |
|
18 |
MAX_SEED = 9007199254740991
|
19 |
|
|
|
69 |
finally:
|
70 |
timer.cancel()
|
71 |
|
72 |
+
@spaces.GPU(duration=15)
|
73 |
+
def gpu():
|
74 |
+
return
|
75 |
+
|
76 |
# Initialize
|
77 |
model_base = "Any"
|
78 |
model_quant = "Any Quant"
|
79 |
|
80 |
with gr.Blocks() as demo:
|
81 |
+
gr.Markdown("# 👁️🗨️ LM")
|
82 |
+
gr.Markdown("• ⚡ A text generation inference for any quant models.")
|
83 |
gr.Markdown("• ⚠️ **WARNING!** The inference is very slow due to the model being **HUGE**; it takes about 10 seconds before it starts generating. Please avoid high max token parameters and sending large amounts of text. Note it uses CPU because running it on GPU overloads the model.")
|
84 |
gr.Markdown(f"• 🔗 Link to models: [{model_base}]({model_base}) (BASE), [{model_quant}]({model_quant}) (QUANT)")
|
85 |
|