Staticaliza commited on
Commit
eef102c
·
verified ·
1 Parent(s): 278edce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os
3
  import random
@@ -12,7 +13,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
12
  REPO = "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF"
13
  FILE = "smollm2-1.7b-instruct-q4_k_m.gguf"
14
 
15
- TIMEOUT = 30
16
 
17
  MAX_SEED = 9007199254740991
18
 
@@ -68,13 +69,17 @@ def generate(prompt, temperature, top_p, top_k, repetition_penalty, max_tokens,
68
  finally:
69
  timer.cancel()
70
 
 
 
 
 
71
  # Initialize
72
  model_base = "Any"
73
  model_quant = "Any Quant"
74
 
75
  with gr.Blocks() as demo:
76
- gr.Markdown("# 👁️‍🗨️ WizardLM")
77
- gr.Markdown("• ⚡ A text generation inference for one of the best open-source text models: WizardLM-2-8x22B.")
78
  gr.Markdown("• ⚠️ **WARNING!** The inference is very slow due to the model being **HUGE**; it takes about 10 seconds before it starts generating. Please avoid high max token parameters and sending large amounts of text. Note it uses CPU because running it on GPU overloads the model.")
79
  gr.Markdown(f"• 🔗 Link to models: [{model_base}]({model_base}) (BASE), [{model_quant}]({model_quant}) (QUANT)")
80
 
 
1
+ # Imports
2
  import gradio as gr
3
  import os
4
  import random
 
13
  REPO = "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF"
14
  FILE = "smollm2-1.7b-instruct-q4_k_m.gguf"
15
 
16
+ TIMEOUT = 60
17
 
18
  MAX_SEED = 9007199254740991
19
 
 
69
  finally:
70
  timer.cancel()
71
 
72
+ @spaces.GPU(duration=15)
73
+ def gpu():
74
+ return
75
+
76
  # Initialize
77
  model_base = "Any"
78
  model_quant = "Any Quant"
79
 
80
  with gr.Blocks() as demo:
81
+ gr.Markdown("# 👁️‍🗨️ LM")
82
+ gr.Markdown("• ⚡ A text generation inference for any quant models.")
83
  gr.Markdown("• ⚠️ **WARNING!** The inference is very slow due to the model being **HUGE**; it takes about 10 seconds before it starts generating. Please avoid high max token parameters and sending large amounts of text. Note it uses CPU because running it on GPU overloads the model.")
84
  gr.Markdown(f"• 🔗 Link to models: [{model_base}]({model_base}) (BASE), [{model_quant}]({model_quant}) (QUANT)")
85