kaizen9 commited on
Commit
61fc039
·
1 Parent(s): 33f0dc1
Files changed (2) hide show
  1. app.py +19 -18
  2. requirements.txt +2 -0
app.py CHANGED
@@ -2,23 +2,24 @@ import os
2
  import gradio as gr
3
  from openai import OpenAI
4
 
5
- # Pick up secrets from HF Space
6
  BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
7
  API_KEY = os.getenv("HF_TOKEN")
8
- MODEL_ID = "kaizen9/qsft_30_6000_v2"
9
 
10
- client = OpenAI(
11
- base_url=f"{BASE}/v1",
12
- api_key=API_KEY,
13
- )
14
 
15
  def build_messages(history, user_msg, system_msg):
16
  msgs = []
17
- if system_msg.strip():
18
  msgs.append({"role": "system", "content": system_msg.strip()})
19
  for u, a in history:
20
- if u: msgs.append({"role": "user", "content": u})
21
- if a: msgs.append({"role": "assistant", "content": a})
 
 
22
  msgs.append({"role": "user", "content": user_msg})
23
  return msgs
24
 
@@ -32,7 +33,6 @@ def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
32
  max_tokens=int(max_tokens),
33
  stream=True,
34
  )
35
-
36
  partial = ""
37
  for chunk in stream:
38
  delta = chunk.choices[0].delta
@@ -40,23 +40,24 @@ def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
40
  partial += delta.content
41
  yield partial
42
 
43
- with gr.Blocks() as demo:
44
- gr.Markdown("# QSFT Chat UI")
45
-
46
  system_box = gr.Textbox(
47
  label="System prompt",
48
  value="You are a helpful assistant.",
49
  lines=2,
50
  )
51
- temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
52
- topp = gr.Slider(0.0, 1.0, 0.95, step=0.01, label="Top-p")
53
- maxt = gr.Slider(16, 4096, 512, step=16, label="Max tokens")
 
54
 
55
  gr.ChatInterface(
56
  fn=chat_fn,
57
  additional_inputs=[system_box, temp, topp, maxt],
58
- retry_btn=True,
59
- undo_btn=True,
 
60
  )
61
 
62
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from openai import OpenAI
4
 
 
5
  BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
6
  API_KEY = os.getenv("HF_TOKEN")
7
+ MODEL_ID = os.getenv("MODEL_ID", "kaizen9/qsft_30_6000_v2")
8
 
9
+ if not BASE or not API_KEY:
10
+ raise RuntimeError("Set HF_ENDPOINT_URL and HF_TOKEN in Settings → Repository secrets.")
11
+
12
+ client = OpenAI(base_url=f"{BASE}/v1", api_key=API_KEY)
13
 
14
  def build_messages(history, user_msg, system_msg):
15
  msgs = []
16
+ if system_msg and system_msg.strip():
17
  msgs.append({"role": "system", "content": system_msg.strip()})
18
  for u, a in history:
19
+ if u:
20
+ msgs.append({"role": "user", "content": u})
21
+ if a:
22
+ msgs.append({"role": "assistant", "content": a})
23
  msgs.append({"role": "user", "content": user_msg})
24
  return msgs
25
 
 
33
  max_tokens=int(max_tokens),
34
  stream=True,
35
  )
 
36
  partial = ""
37
  for chunk in stream:
38
  delta = chunk.choices[0].delta
 
40
  partial += delta.content
41
  yield partial
42
 
43
+ with gr.Blocks(title="QSFT Chat") as demo:
44
+ gr.Markdown("# QSFT Chat\nTalk to your HF Inference Endpoint via OpenAI /v1.")
 
45
  system_box = gr.Textbox(
46
  label="System prompt",
47
  value="You are a helpful assistant.",
48
  lines=2,
49
  )
50
+ with gr.Row():
51
+ temp = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
52
+ topp = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top-p")
53
+ maxt = gr.Slider(16, 4096, value=512, step=16, label="Max tokens")
54
 
55
  gr.ChatInterface(
56
  fn=chat_fn,
57
  additional_inputs=[system_box, temp, topp, maxt],
58
+ submit_btn="Send",
59
+ stop_btn="Stop",
60
+ multimodal=False,
61
  )
62
 
63
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio>=4.44.0
2
  openai>=1.40.0
 
 
 
1
  gradio>=4.44.0
2
  openai>=1.40.0
3
+ httpx>=0.27.0
4
+