VoiceAIWithDrNovaQuantum

Sleeping

File size: 5,342 Bytes

fbd3a0e

import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
from huggingface_hub import InferenceClient
import re
from streaming_stt_nemo import Model
import torch
import random
import pandas as pd
from datetime import datetime
import base64
import io
import json

default_lang = "en"
engines = { default_lang: Model(default_lang) }

def transcribe(audio):
    lang = "en"
    model = engines[lang]
    text = model.stt_file(audio)[0]
    return text

HF_TOKEN = os.environ.get("HF_TOKEN", None)

def client_fn(model):
    if "Mixtral" in model:
        return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
    elif "Llama" in model:
        return InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
    elif "Mistral" in model:
        return InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
    elif "Phi" in model:
        return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
    else: 
        return InferenceClient("microsoft/Phi-3-mini-4k-instruct")

def randomize_seed_fn(seed: int) -> int:
    seed = random.randint(0, 999999)
    return seed

system_instructions1 = """
[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark.' 
Keep conversation friendly, short, clear, and concise. 
Avoid unnecessary introductions and answer the user's questions directly. 
Respond in a normal, conversational manner while being friendly and helpful.
[USER]
"""

# Initialize an empty DataFrame to store the history
history_df = pd.DataFrame(columns=['Timestamp', 'Request', 'Response'])

def save_history():
    history_df.to_json('chat_history.json', orient='records')

def load_history():
    global history_df
    try:
        history_df = pd.read_json('chat_history.json', orient='records')
    except FileNotFoundError:
        history_df = pd.DataFrame(columns=['Timestamp', 'Request', 'Response'])
    return history_df

def models(text, model="Mixtral 8x7B", seed=42):
    global history_df
    
    seed = int(randomize_seed_fn(seed))
    generator = torch.Generator().manual_seed(seed)  
    
    client = client_fn(model)
    
    generate_kwargs = dict(
        max_new_tokens=300,
        seed=seed
    )    
    formatted_prompt = system_instructions1 + text + "[JARVIS]"
    stream = client.text_generation(
        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        if not response.token.text == "</s>":
            output += response.token.text
    
    # Add the current interaction to the history DataFrame
    new_row = pd.DataFrame({
        'Timestamp': [datetime.now().strftime("%Y-%m-%d %H:%M:%S")],  # Convert to string
        'Request': [text],
        'Response': [output]
    })
    history_df = pd.concat([history_df, new_row], ignore_index=True)
    save_history()
    
    return output

async def respond(audio, model, seed):
    user = transcribe(audio)
    reply = models(user, model, seed)
    communicate = edge_tts.Communicate(reply)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path, gr.Audio.update(interactive=True)

def display_history():
    return load_history()

def download_history():
    csv_buffer = io.StringIO()
    history_df.to_csv(csv_buffer, index=False)
    csv_string = csv_buffer.getvalue()
    b64 = base64.b64encode(csv_string.encode()).decode()
    href = f'data:text/csv;base64,{b64}'
    return gr.HTML(f'<a href="{href}" download="chat_history.csv">Download Chat History</a>')

DESCRIPTION = """ # <center><b>JARVIS⚡</b></center>
        ### <center>A personal Assistant of Tony Stark for YOU
        ### <center>Voice Chat with your personal Assistant</center>
        """

with gr.Blocks(css="style.css") as demo:    
    gr.Markdown(DESCRIPTION)
    with gr.Row():
        select = gr.Dropdown([
            'Mixtral 8x7B',
            'Llama 3 8B',
            'Mistral 7B v0.3',
            'Phi 3 mini',
        ],
        value="Mistral 7B v0.3",
        label="Model"
        )
        seed = gr.Slider(
            label="Seed",
            minimum=0,
            maximum=999999,
            step=1,
            value=0,
            visible=False
        )
    
    input_audio = gr.Audio(label="User", sources="microphone", type="filepath")
    output_audio = gr.Audio(label="AI", type="filepath", autoplay=True)
    
    # Add a DataFrame to display the history
    history_display = gr.DataFrame(label="Query History")
    
    # Add a download button for the history
    download_button = gr.Button("Download History")
    download_link = gr.HTML()
    
    def process_audio(audio, model, seed):
        response, audio_update = asyncio.run(respond(audio, model, seed))
        return response, audio_update, display_history()

    input_audio.change(
        fn=process_audio,
        inputs=[input_audio, select, seed],
        outputs=[output_audio, input_audio, history_display]
    )
    
    # Connect the download button to the download function
    download_button.click(fn=download_history, outputs=[download_link])

    # Load history when the page is refreshed
    demo.load(fn=display_history, outputs=[history_display])

if __name__ == "__main__":
    demo.queue(max_size=200).launch(share=True)