kaizen9 commited on
Commit
511336b
·
1 Parent(s): 6220bad
Files changed (1) hide show
  1. app.py +51 -108
app.py CHANGED
@@ -1,120 +1,63 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- from typing import List, Dict, Optional
4
 
5
- # Your endpoint root (no trailing /v1 here; the client adds it for chat)
6
- ENDPOINT_URL = "https://x6leavj4hgm2fdyx.us-east-2.aws.endpoints.huggingface.cloud/v1/"
7
- def respond(
8
- user_msg: str,
9
- history: List[Dict[str, str]],
10
- system_message: str,
11
- max_tokens: int,
12
- temperature: float,
13
- top_p: float,
14
- hf_token: Optional[gr.OAuthToken], # from LoginButton (kept)
15
- pat_override: str, # NEW: user-pasted PAT (password field)
16
- ):
17
- """
18
- Use PAT override if provided; otherwise fall back to LoginButton token.
19
- NOTE: OAuth token from LoginButton usually lacks `inference.endpoints.infer.write`,
20
- so for Inference Endpoints you almost always need to paste a PAT here.
21
- """
22
 
23
- # Choose a token: prefer user-supplied PAT with endpoints write scope
24
- token = pat_override.strip() or (getattr(hf_token, "token", None) if hf_token else None)
25
- if not token:
26
- yield "🔒 Please click **Login** OR paste a **Hugging Face PAT** with `inference.endpoints.infer.write`."
27
- return
28
 
29
- client = InferenceClient(base_url=ENDPOINT_URL, token=token)
 
 
 
 
 
 
 
 
30
 
31
- # Build messages (OpenAI-style)
32
- messages = []
33
- if system_message:
34
- messages.append({"role": "system", "content": system_message})
35
- messages.extend(history or [])
36
- messages.append({"role": "user", "content": user_msg})
 
 
 
 
37
 
38
- # Try OpenAI-compatible chat route first: /v1/chat/completions
39
- try:
40
- out = ""
41
- for chunk in client.chat_completion(
42
- messages=messages,
43
- max_tokens=max_tokens,
44
- temperature=temperature,
45
- top_p=top_p,
46
- stream=True,
47
- ):
48
- tok = ""
49
- if getattr(chunk, "choices", None) and getattr(chunk.choices[0], "delta", None):
50
- tok = chunk.choices[0].delta.content or ""
51
- out += tok
52
- yield out
53
- return
54
- except Exception as e_chat:
55
- chat_err = str(e_chat)
56
 
57
- # Fallback to plain generation (for non-OpenAI runtimes)
58
- try:
59
- def to_prompt(msgs: List[Dict[str, str]]) -> str:
60
- lines = []
61
- for m in msgs:
62
- role = m.get("role", "user")
63
- content = m.get("content", "")
64
- tag = {"system": "SYSTEM", "user": "USER"}.get(role, "ASSISTANT")
65
- lines.append(f"[{tag}] {content}")
66
- lines.append("[ASSISTANT]")
67
- return "\n".join(lines)
68
 
69
- prompt = to_prompt(messages)
70
- out = ""
71
- for tok in client.text_generation(
72
- prompt,
73
- max_new_tokens=max_tokens,
74
- temperature=temperature,
75
- top_p=top_p,
76
- stream=True,
77
- return_full_text=False,
78
- ):
79
- piece = getattr(tok, "token", tok)
80
- if isinstance(piece, dict) and "text" in piece:
81
- piece = piece["text"]
82
- out += str(piece)
83
- yield out
84
- except Exception as e_gen:
85
- yield (
86
- "❗ Endpoint call failed.\n\n"
87
- f"• Chat API error: {chat_err}\n"
88
- f"• Text-generation fallback error: {e_gen}\n\n"
89
- "Most likely cause: the token used does NOT have `inference.endpoints.infer.write`.\n"
90
- "Paste a PAT with that scope in the sidebar."
91
- )
92
 
93
- # --- UI ---
94
- chat = gr.ChatInterface(
95
- respond,
96
- type="messages",
97
- additional_inputs=[
98
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
99
- gr.Slider(1, 4096, value=512, step=1, label="Max new tokens"),
100
- gr.Slider(0.0, 4.0, value=0.7, step=0.1, label="Temperature"),
101
- gr.Slider(0.0, 1.0, value=0.95, step=0.05, label="Top-p"),
102
- # NEW: secure PAT override
103
- gr.Textbox(value="", label="HF PAT (with `inference.endpoints.infer.write`)", type="password"),
104
- ],
105
- )
106
-
107
- with gr.Blocks() as demo:
108
- with gr.Sidebar():
109
- gr.Markdown("### Hugging Face Login (optional)")
110
- gr.LoginButton()
111
- gr.Markdown(
112
- "**Important:** Inference Endpoints require a PAT with\n"
113
- "`inference.endpoints.infer.write`. The Login token usually does **not** have this.\n"
114
- "Paste a PAT in the password field if you see 403 errors."
115
- )
116
- gr.Markdown(f"**Endpoint**: `{ENDPOINT_URL}`")
117
- chat.render()
118
 
119
  if __name__ == "__main__":
120
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
+ from openai import OpenAI
 
4
 
5
+ # Pick up secrets from HF Space
6
+ BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
7
+ API_KEY = os.getenv("HF_TOKEN")
8
+ MODEL_ID = "kaizen9/qsft_30_6000_v2"
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ client = OpenAI(
11
+ base_url=f"{BASE}/v1",
12
+ api_key=API_KEY,
13
+ )
 
14
 
15
+ def build_messages(history, user_msg, system_msg):
16
+ msgs = []
17
+ if system_msg.strip():
18
+ msgs.append({"role": "system", "content": system_msg.strip()})
19
+ for u, a in history:
20
+ if u: msgs.append({"role": "user", "content": u})
21
+ if a: msgs.append({"role": "assistant", "content": a})
22
+ msgs.append({"role": "user", "content": user_msg})
23
+ return msgs
24
 
25
+ def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
26
+ msgs = build_messages(history, message, system_message)
27
+ stream = client.chat.completions.create(
28
+ model=MODEL_ID,
29
+ messages=msgs,
30
+ temperature=float(temperature),
31
+ top_p=float(top_p),
32
+ max_tokens=int(max_tokens),
33
+ stream=True,
34
+ )
35
 
36
+ partial = ""
37
+ for chunk in stream:
38
+ delta = chunk.choices[0].delta
39
+ if delta and delta.content:
40
+ partial += delta.content
41
+ yield partial
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ with gr.Blocks() as demo:
44
+ gr.Markdown("# QSFT Chat UI")
 
 
 
 
 
 
 
 
 
45
 
46
+ system_box = gr.Textbox(
47
+ label="System prompt",
48
+ value="You are a helpful assistant.",
49
+ lines=2,
50
+ )
51
+ temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
52
+ topp = gr.Slider(0.0, 1.0, 0.95, step=0.01, label="Top-p")
53
+ maxt = gr.Slider(16, 4096, 512, step=16, label="Max tokens")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ gr.ChatInterface(
56
+ fn=chat_fn,
57
+ additional_inputs=[system_box, temp, topp, maxt],
58
+ retry_btn=True,
59
+ undo_btn=True,
60
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
  demo.launch()