Spaces:

wifix199
/

LumiVoice

Running

File size: 2,315 Bytes

f152a90
b207a62
c75f1ad
c088d8d
b207a62
c088d8d
b207a62
 
c088d8d
 
 
 
 
 
b90bc2b
64b5005
b207a62
 
 
 
 
 
 
 
 
 
c088d8d
 
 
 
 
b207a62
 
 
 
 
c088d8d
 
 
 
 
 
 
 
 
 
 
 
 
 
b207a62
 
 
 
 
c088d8d
b207a62
 
c088d8d
 
b207a62
 
 
 
c088d8d
b207a62
 
f152a90
c088d8d
 
f152a90
 
b207a62

import gradio as gr
from huggingface_hub import InferenceClient
import os

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/en/guides/inference
"""

# Retrieve the Hugging Face token
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
    raise ValueError("Please set the HF_TOKEN environment variable with your Hugging Face API token.")

# Initialize the InferenceClient with a correct model
client = InferenceClient("models/meta-llama/Llama-3.2-1B", token=hf_token)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for user_input, assistant_response in history:
        if user_input:
            messages.append({"role": "user", "content": user_input})
        if assistant_response:
            messages.append({"role": "assistant", "content": assistant_response})

    messages.append({"role": "user", "content": message})

    response = ""

    # Start the chat completion
    try:
        for msg in client.chat_completion(
            messages=messages,
            max_new_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = msg.delta.get("content", "")
            response += token
            yield response
    except Exception as e:
        yield f"Error during inference: {e}"

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.01, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.01,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Chat with Llama 2",
    description="A chat interface using Llama 2 model via Hugging Face Inference API.",
)

if __name__ == "__main__":
    demo.launch()