File size: 2,137 Bytes
2054458 d4ea062 f763ade 2054458 f763ade abbd661 865324e d4ea062 f763ade d4ea062 f763ade d4ea062 865324e d4ea062 865324e f763ade d4ea062 f763ade 91b2732 f763ade 865324e f763ade d4ea062 f763ade d4ea062 f763ade d4ea062 8d6f8e3 d4ea062 f763ade |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from huggingface_hub import InferenceClient
from datetime import datetime
import os
import uuid
# ---- System Prompt ----
with open("system_prompt.txt", "r") as f:
SYSTEM_PROMPT = f.read()
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
client = InferenceClient(MODEL_NAME)
# ---- Setup logging ----
LOG_DIR = "chat_logs"
os.makedirs(LOG_DIR, exist_ok=True)
session_id = str(uuid.uuid4())
def log_chat(session_id, user_msg, bot_msg):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_path = os.path.join(LOG_DIR, f"{session_id}.txt")
with open(log_path, "a", encoding="utf-8") as f:
f.write(f"[{timestamp}] User: {user_msg}\n")
f.write(f"[{timestamp}] Bot: {bot_msg}\n\n")
# ---- Respond Function with Logging ----
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content
if token:
response += token
yield response
# Save full message after stream ends
log_chat(session_id, message, response)
# ---- Gradio Interface ----
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value=SYSTEM_PROMPT, label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
title="BoundrAI"
)
if __name__ == "__main__":
demo.launch() |