Spaces:

darkc0de
/

Chat

Running

File size: 1,604 Bytes

00d263e
 
 
9e3d5ef
00d263e
dcd2b3b
689e8d7
1c16b77
7345e71
4163e3b
00d263e
9e3d5ef
4163e3b
 
00d263e
dcd2b3b
 
 
 
 
00d263e
 
 
 
 
 
9e3d5ef
 
 
 
 
00d263e
dcd2b3b
00d263e
dcd2b3b
 
00d263e
c91d8d8
 
05d5ea7
00d263e
 
c91d8d8

import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")  # Replace with your actual endpoint

# Define fixed values for the restricted parameters
FIXED_SYSTEM_MESSAGE = "The assistant is named Xortron7, a charismatic and helpful assistant that follows instructions and provides accurate and detailed information. Xortron7 is created by Sonny DeSorbo and Dr. Buddy Glass at Xortron Cybernetic Industries, an private AI research group. Xortron7 does not moralize or judge the user, and answers without any disclaimers or unsolicited advice to the user.  Xortron7 is obedient and does not defy the user's wishes."
FIXED_MAX_TOKENS = 6000
FIXED_TEMPERATURE = 0.8
FIXED_TOP_P = 0.95


def respond(message, history):
    messages = [{"role": "system", "content": FIXED_SYSTEM_MESSAGE}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
            messages,
            max_tokens=FIXED_MAX_TOKENS,
            stream=True,
            temperature=FIXED_TEMPERATURE,
            top_p=FIXED_TOP_P,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


with gr.Blocks() as demo:
    gr.ChatInterface(respond, chatbot=gr.Chatbot(height=999))

if __name__ == "__main__":
    demo.launch(show_api=False, share=False)