File size: 1,853 Bytes
9cfe82c
 
ee43960
9cfe82c
ed6f3c7
9cfe82c
 
ed6f3c7
 
d88b860
ed6f3c7
 
 
d88b860
ed6f3c7
 
4b4b581
 
ed6f3c7
 
d88b860
ed6f3c7
 
 
ee43960
ed6f3c7
d88b860
ed6f3c7
d88b860
9cfe82c
 
59414a8
 
ed6f3c7
 
9cfe82c
ed6f3c7
d88b860
ee43960
ed6f3c7
9cfe82c
ed6f3c7
9cfe82c
 
 
ed6f3c7
4df4931
9cfe82c
4b4b581
9cfe82c
 
ee43960
a32839b
9cfe82c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
from huggingface_hub import InferenceClient
from collections import defaultdict

# Initialize model client
client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")

# Memory storage
session_histories = defaultdict(list)  # Stores full chat history per session

def format_chat_history(history):
    """Formats history in a structured way for LLaMA models."""
    chat_str = ""  
    for user_msg, bot_response in history:
        chat_str += f"User: {user_msg}\nAI: {bot_response}\n"
    return chat_str.strip()  # Remove unnecessary spaces

def respond(message, history, system_message, max_tokens, temperature, top_p):
    session_id = id(history)  # Unique session ID
    session_history = session_histories[session_id]  # Retrieve stored history

    # Add user message to history
    formatted_history = format_chat_history(session_history)
    full_input = f"{system_message}\n\n{formatted_history}\nUser: {message}\nAI:"

    # Generate response
    response = client.text_generation(
        full_input,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )

    # Extract & clean response
    cleaned_response = response.strip()

    # Update chat history
    session_history.append((message, cleaned_response))

    return cleaned_response

# Gradio Chat Interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are an AI assistant that remembers previous conversations.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=250, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()