EduuGomes commited on
Commit
d725876
·
verified ·
1 Parent(s): 0858032

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -26
app.py CHANGED
@@ -1,26 +1,177 @@
1
- # Install transformers from source - only needed for versions <= v4.34
2
- # pip install git+https://github.com/huggingface/transformers.git
3
- # pip install accelerate
4
-
5
- import torch
6
- from transformers import pipeline
7
-
8
- pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
9
-
10
- # We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
11
- messages = [
12
- {
13
- "role": "system",
14
- "content": "You are a friendly chatbot who always responds in the style of a pirate",
15
- },
16
- {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
17
- ]
18
- prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
19
- outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
20
- print(outputs[0]["generated_text"])
21
- # <|system|>
22
- # You are a friendly chatbot who always responds in the style of a pirate.</s>
23
- # <|user|>
24
- # How many helicopters can a human eat in one sitting?</s>
25
- # <|assistant|>
26
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+ from datetime import datetime
5
+
6
+ # App title and description
7
+ APP_TITLE = "NO GPU, Multi LLMs Uses"
8
+ APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
9
+
10
+ # Load environment variables
11
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
12
+ client = OpenAI(
13
+ base_url="https://api-inference.huggingface.co/v1/",
14
+ api_key=ACCESS_TOKEN,
15
+ )
16
+
17
+ # Model categories for better organization
18
+ MODEL_CATEGORIES = {
19
+ "Qwen": [
20
+ "Qwen/Qwen2.5-72B-Instruct",
21
+ "Qwen/Qwen2.5-3B-Instruct",
22
+ "Qwen/Qwen2.5-0.5B-Instruct",
23
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
24
+ ],
25
+ "Meta LLaMa": [
26
+ "meta-llama/Llama-3.3-70B-Instruct",
27
+ "meta-llama/Llama-3.1-70B-Instruct",
28
+ "meta-llama/Llama-3.0-70B-Instruct",
29
+ "meta-llama/Llama-3.2-3B-Instruct",
30
+ "meta-llama/Llama-3.2-1B-Instruct",
31
+ "meta-llama/Llama-3.1-8B-Instruct",
32
+ ],
33
+ "Mistral": [
34
+ "mistralai/Mistral-Nemo-Instruct-2407",
35
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
36
+ "mistralai/Mistral-7B-Instruct-v0.3",
37
+ "mistralai/Mistral-7B-Instruct-v0.2",
38
+ ],
39
+ "Microsoft Phi": [
40
+ "microsoft/Phi-3.5-mini-instruct",
41
+ "microsoft/Phi-3-mini-128k-instruct",
42
+ "microsoft/Phi-3-mini-4k-instruct",
43
+ ],
44
+ "Other Models": [
45
+
46
+ "NousResearch/Hermes-3-Llama-3.1-8B",
47
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
48
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
49
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
50
+ "HuggingFaceH4/zephyr-7b-beta",
51
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
52
+ "tiiuae/falcon-7b-instruct",
53
+ "01-ai/Yi-1.5-34B-Chat",
54
+ ]
55
+ }
56
+
57
+ # Flatten the model list
58
+ ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models]
59
+
60
+ def get_model_info(model_name):
61
+ parts = model_name.split('/')
62
+ if len(parts) != 2:
63
+ return f"**Model:** {model_name}\n**Format:** Unknown"
64
+ org, model = parts
65
+ import re
66
+ size_match = re.search(r'(\d+\.?\d*)B', model)
67
+ size = size_match.group(1) + "B" if size_match else "Unknown"
68
+ return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
69
+
70
+ def respond(
71
+ message,
72
+ history,
73
+ system_message,
74
+ max_tokens,
75
+ temperature,
76
+ top_p,
77
+ frequency_penalty,
78
+ seed,
79
+ selected_model
80
+ ):
81
+ # Prepare messages
82
+ if seed == -1:
83
+ seed = None
84
+ messages = [{"role": "system", "content": system_message}]
85
+ for user_msg, assistant_msg in history:
86
+ if user_msg:
87
+ messages.append({"role": "user", "content": user_msg})
88
+ if assistant_msg:
89
+ messages.append({"role": "assistant", "content": assistant_msg})
90
+ messages.append({"role": "user", "content": message})
91
+
92
+ model_to_use = selected_model or ALL_MODELS[0]
93
+
94
+ new_history = list(history) + [(message, "")]
95
+ current_response = ""
96
+ try:
97
+ for chunk in client.chat.completions.create(
98
+ model=model_to_use,
99
+ max_tokens=max_tokens,
100
+ stream=True,
101
+ temperature=temperature,
102
+ top_p=top_p,
103
+ frequency_penalty=frequency_penalty,
104
+ seed=seed,
105
+ messages=messages,
106
+ ):
107
+ delta = chunk.choices[0].delta.content
108
+ if delta:
109
+ current_response += delta
110
+ new_history[-1] = (message, current_response)
111
+ yield new_history
112
+ except Exception as e:
113
+ err = f"Error: {e}"
114
+ new_history[-1] = (message, err)
115
+ yield new_history
116
+
117
+ with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
118
+ gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}")
119
+
120
+ with gr.Row():
121
+ with gr.Column(scale=2):
122
+ # Model selection via Dropdown
123
+ selected_model = gr.Dropdown(
124
+ choices=ALL_MODELS,
125
+ value=ALL_MODELS[0],
126
+ label="Select Model"
127
+ )
128
+ model_info = gr.Markdown(get_model_info(ALL_MODELS[0]))
129
+
130
+ def update_info(model_name):
131
+ return get_model_info(model_name)
132
+ selected_model.change(
133
+ fn=update_info,
134
+ inputs=[selected_model],
135
+ outputs=[model_info]
136
+ )
137
+
138
+ # Conversation settings
139
+ system_message = gr.Textbox(
140
+ value="You are a helpful assistant.",
141
+ label="System Prompt",
142
+ lines=2
143
+ )
144
+
145
+ max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens")
146
+ temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
147
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
148
+ freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
149
+ seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)")
150
+
151
+ with gr.Column(scale=3):
152
+ chatbot = gr.Chatbot()
153
+ msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
154
+ send_btn = gr.Button("Send")
155
+
156
+ send_btn.click(
157
+ fn=respond,
158
+ inputs=[
159
+ msg, chatbot, system_message,
160
+ max_tokens, temperature, top_p,
161
+ freq_penalty, seed, selected_model
162
+ ],
163
+ outputs=[chatbot],
164
+ queue=True
165
+ )
166
+ msg.submit(
167
+ fn=respond,
168
+ inputs=[
169
+ msg, chatbot, system_message,
170
+ max_tokens, temperature, top_p,
171
+ freq_penalty, seed, selected_model
172
+ ],
173
+ outputs=[chatbot],
174
+ queue=True
175
+ )
176
+
177
+ demo.launch()