File size: 2,137 Bytes
2054458
d4ea062
f763ade
 
 
2054458
f763ade
abbd661
865324e
 
d4ea062
 
 
f763ade
 
 
 
 
 
 
 
 
 
 
d4ea062
f763ade
d4ea062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865324e
d4ea062
865324e
f763ade
d4ea062
 
 
 
 
 
f763ade
 
 
 
91b2732
f763ade
 
865324e
f763ade
d4ea062
 
 
 
 
 
f763ade
d4ea062
f763ade
d4ea062
8d6f8e3
d4ea062
f763ade
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from huggingface_hub import InferenceClient
from datetime import datetime
import os
import uuid

# ---- System Prompt ----
with open("system_prompt.txt", "r") as f:
    SYSTEM_PROMPT = f.read()

MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
client = InferenceClient(MODEL_NAME)

# ---- Setup logging ----
LOG_DIR = "chat_logs"
os.makedirs(LOG_DIR, exist_ok=True)
session_id = str(uuid.uuid4())

def log_chat(session_id, user_msg, bot_msg):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_path = os.path.join(LOG_DIR, f"{session_id}.txt")
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(f"[{timestamp}] User: {user_msg}\n")
        f.write(f"[{timestamp}] Bot: {bot_msg}\n\n")

# ---- Respond Function with Logging ----
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content
        if token:
            response += token
            yield response

    # Save full message after stream ends
    log_chat(session_id, message, response)

# ---- Gradio Interface ----
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value=SYSTEM_PROMPT, label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    title="BoundrAI"
)

if __name__ == "__main__":
    demo.launch()