File size: 4,943 Bytes
29a4795
38c463d
 
 
29a4795
38c463d
 
 
29a4795
38c463d
 
 
 
 
29a4795
 
38c463d
 
 
 
 
 
 
 
29a4795
38c463d
 
 
 
29a4795
38c463d
 
 
 
 
 
 
 
 
 
 
 
 
29a4795
38c463d
 
 
 
 
29a4795
38c463d
29a4795
 
38c463d
 
 
 
 
 
 
 
29a4795
 
38c463d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29a4795
38c463d
 
 
 
29a4795
38c463d
 
 
 
 
 
29a4795
38c463d
 
 
 
 
 
 
 
29a4795
38c463d
 
 
 
29a4795
38c463d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29a4795
 
38c463d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import gradio as gr
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

# Azure Inference setup
url = os.getenv("Azure_Endpoint")
api_key = AzureKeyCredential(os.getenv("Azure_API_KEY"))

# Initialize the ChatCompletionsClient
client = ChatCompletionsClient(
    endpoint=url,
    credential=api_key,
    stream=True
)

# Get and print model information (optional)
try:
    model_info = client.get_model_info()
    print("Model name:", model_info.model_name)
    print("Model type:", model_info.model_type)
    print("Model provider name:", model_info.model_provider_name)
except Exception as e:
    print("Could not get model info:", str(e))

# Configuration parameters
default_temperature = 0.8
default_max_tokens = 2048
default_top_p = 0.1

# Example prompts that users can try
example_prompts = [
    "I have $20,000 in my savings account, where I receive a 4% profit per year and payments twice a year. Can you please tell me how long it will take for me to become a millionaire?",
    "I have total $500 create a plan with travel and food",
    "I have $1000 and 5 years. Is it better to invest in a stock paying $15 quarterly dividends or in a 5% annual savings account?"
]
            
def get_azure_response(message, chat_history, temperature, max_tokens, top_p):
    """
    Function to get a response from the Azure Phi-4 model
    """
    # Prepare conversation history in the format expected by Azure
    messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
    
    # Add conversation history
    for human, assistant in chat_history:
        messages.append({"role": "user", "content": human})
        if assistant:  # Only add non-empty assistant messages
            messages.append({"role": "assistant", "content": assistant})
    
    # Add the current message
    messages.append({"role": "user", "content": message})
    
    # Prepare the payload
    payload = {
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "presence_penalty": 0,
        "frequency_penalty": 0,
    }
    
    # Get response
    try:
        print("Sending request to Azure...")
        response = client.complete(payload)
        reply = response.choices[0].message.content
        
        # Print usage statistics
        print(f"Usage - Prompt tokens: {response.usage.prompt_tokens}, "
              f"Completion tokens: {response.usage.completion_tokens}, "
              f"Total tokens: {response.usage.total_tokens}")
        
        return reply
    except Exception as e:
        print(f"Error getting response: {str(e)}")
        return f"Error: {str(e)}"

# Create the Gradio interface
with gr.Blocks(title="Phi-4-mini Chatbot") as demo:
    gr.Markdown("Chat with the Phi-4 mini model hosted on Azure AI")
    
    # Create a chatbot component
    chatbot = gr.Chatbot(height=300)
    msg = gr.Textbox(label="Type your message here", placeholder="Ask me anything...", lines=1)
    clear = gr.Button("Clear Conversation")
    
    # Add examples section
    with gr.Accordion("Try these examples", open=True):
        examples = gr.Examples(
            examples=example_prompts,
            inputs=msg
        )
    
    # Add model parameter controls
    with gr.Accordion("Model Parameters", open=False):
        temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=default_temperature, step=0.1, 
                                label="Temperature (higher = more creative, lower = more focused)")
        max_tokens_slider = gr.Slider(minimum=100, maximum=4096, value=default_max_tokens, step=100, 
                                      label="Max Tokens (maximum length of response)")
        top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=default_top_p, step=0.1, 
                                label="Top P (diversity of response)")
    
    # Simplified chat function that handles both sending and receiving messages
    def chat(message, history, temperature, max_tokens, top_p):
        if not message.strip():
            return "", history
        
        # Get response from Azure
        response = get_azure_response(message, history, temperature, max_tokens, top_p)
        
        # Add the exchange to history
        history.append((message, response))
        
        return "", history  # Clear the input field after sending
    
    # Function to clear the conversation
    def clear_conversation():
        return [], default_temperature, default_max_tokens, default_top_p
    
    # Set up event handlers - simplified approach
    msg.submit(chat, [msg, chatbot, temp_slider, max_tokens_slider, top_p_slider], [msg, chatbot])
    clear.click(clear_conversation, None, [chatbot, temp_slider, max_tokens_slider, top_p_slider])

# Launch the app
demo.launch(debug=True)  # Set share=True to generate a public URL for testing