ginipick commited on
Commit
da4dba6
·
verified ·
1 Parent(s): c3dadf6

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -280
app.py DELETED
@@ -1,280 +0,0 @@
1
- import spaces
2
- import json
3
- import subprocess
4
- import os
5
- from llama_cpp import Llama
6
- from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
7
- from llama_cpp_agent.providers import LlamaCppPythonProvider
8
- from llama_cpp_agent.chat_history import BasicChatHistory
9
- from llama_cpp_agent.chat_history.messages import Roles
10
- import gradio as gr
11
- from huggingface_hub import hf_hub_download
12
-
13
- llm = None
14
- llm_model = None
15
-
16
- # 모델 이름과 경로를 정의
17
- MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
18
-
19
- # 모델 다운로드
20
- model_path = hf_hub_download(
21
- repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
22
- filename=MISTRAL_MODEL_NAME,
23
- local_dir="./models"
24
- )
25
-
26
- print(f"Downloaded model path: {model_path}")
27
-
28
- css = """
29
- .bubble-wrap {
30
- padding-top: calc(var(--spacing-xl) * 3) !important;
31
- }
32
- .message-row {
33
- justify-content: space-evenly !important;
34
- width: 100% !important;
35
- max-width: 100% !important;
36
- margin: calc(var(--spacing-xl)) 0 !important;
37
- padding: 0 calc(var(--spacing-xl) * 3) !important;
38
- }
39
- .flex-wrap.user {
40
- border-bottom-right-radius: var(--radius-lg) !important;
41
- }
42
- .flex-wrap.bot {
43
- border-bottom-left-radius: var(--radius-lg) !important;
44
- }
45
- .message.user{
46
- padding: 10px;
47
- }
48
- .message.bot{
49
- text-align: right;
50
- width: 100%;
51
- padding: 10px;
52
- border-radius: 10px;
53
- }
54
- .message-bubble-border {
55
- border-radius: 6px !important;
56
- }
57
- .message-buttons {
58
- justify-content: flex-end !important;
59
- }
60
- .message-buttons-left {
61
- align-self: end !important;
62
- }
63
- .message-buttons-bot, .message-buttons-user {
64
- right: 10px !important;
65
- left: auto !important;
66
- bottom: 2px !important;
67
- }
68
- .dark.message-bubble-border {
69
- border-color: #343140 !important;
70
- }
71
- .dark.user {
72
- background: #1e1c26 !important;
73
- }
74
- .dark.assistant.dark, .dark.pending.dark {
75
- background: #16141c !important;
76
- }
77
- """
78
-
79
- def get_messages_formatter_type(model_name):
80
- if "Mistral" in model_name or "BitSix" in model_name:
81
- return MessagesFormatterType.CHATML # Mistral 계열 모델은 ChatML 형식 사용
82
- else:
83
- raise ValueError(f"Unsupported model: {model_name}")
84
-
85
- @spaces.GPU(duration=120)
86
- def respond(
87
- message,
88
- history: list[tuple[str, str]],
89
- system_message,
90
- max_tokens,
91
- temperature,
92
- top_p,
93
- top_k,
94
- repeat_penalty,
95
- ):
96
- global llm
97
- global llm_model
98
-
99
- chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
100
-
101
- # 모델 파일 경로 확인
102
- model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
103
-
104
- print(f"Model path: {model_path}")
105
-
106
- if not os.path.exists(model_path):
107
- print(f"Warning: Model file not found at {model_path}")
108
- print(f"Available files in ./models: {os.listdir('./models')}")
109
-
110
- if llm is None or llm_model != MISTRAL_MODEL_NAME:
111
- llm = Llama(
112
- model_path=model_path,
113
- flash_attn=True,
114
- n_gpu_layers=81,
115
- n_batch=1024,
116
- n_ctx=8192,
117
- )
118
- llm_model = MISTRAL_MODEL_NAME
119
-
120
- provider = LlamaCppPythonProvider(llm)
121
-
122
- agent = LlamaCppAgent(
123
- provider,
124
- system_prompt=f"{system_message}",
125
- predefined_messages_formatter_type=chat_template,
126
- debug_output=True
127
- )
128
-
129
- settings = provider.get_provider_default_settings()
130
- settings.temperature = temperature
131
- settings.top_k = top_k
132
- settings.top_p = top_p
133
- settings.max_tokens = max_tokens
134
- settings.repeat_penalty = repeat_penalty
135
- settings.stream = True
136
-
137
- messages = BasicChatHistory()
138
-
139
- for msn in history:
140
- user = {
141
- 'role': Roles.user,
142
- 'content': msn[0]
143
- }
144
- assistant = {
145
- 'role': Roles.assistant,
146
- 'content': msn[1]
147
- }
148
- messages.add_message(user)
149
- messages.add_message(assistant)
150
-
151
- stream = agent.get_chat_response(
152
- message,
153
- llm_sampling_settings=settings,
154
- chat_history=messages,
155
- returns_streaming_generator=True,
156
- print_output=False
157
- )
158
-
159
- outputs = ""
160
- for output in stream:
161
- outputs += output
162
- yield outputs
163
-
164
- PLACEHOLDER = """
165
- <div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
166
- <figure style="margin: 0;max-width: 200px;min-height: 300px;">
167
- <img src="https://huggingface.co/spaces/ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503/resolve/main/llama.jpg" alt="Logo" style="width: 100%; height: 100%; border-radius: 8px;">
168
- </figure>
169
- <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
170
- <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;">Ginigen Private-BitSix</h2>
171
- <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.</p>
172
- <div style="display: flex; justify-content: space-between; align-items: center;">
173
- <div style="display: flex; flex-flow: column; justify-content: space-between;">
174
- <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
175
- Private BitSix Mistral Small 3.1 24B Instruct
176
- </span>
177
- </div>
178
- <div style="display: flex; justify-content: flex-end; align-items: center;">
179
- <a href="https://discord.gg/openfreeai" target="_blank" rel="noreferrer" style="padding: .5rem;">
180
- <svg width="24" height="24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" viewBox="0 5 30.67 23.25">
181
- <title>Discord</title>
182
- <path d="M26.0015 6.9529C24.0021 6.03845 21.8787 5.37198 19.6623 5C19.3833 5.48048 19.0733 6.13144 18.8563 6.64292C16.4989 6.30193 14.1585 6.30193 11.8336 6.64292C11.6166 6.13144 11.2911 5.48048 11.0276 5C8.79575 5.37198 6.67235 6.03845 4.6869 6.9529C0.672601 12.8736 -0.41235 18.6548 0.130124 24.3585C2.79599 26.2959 5.36889 27.4739 7.89682 28.2489C8.51679 27.4119 9.07477 26.5129 9.55525 25.5675C8.64079 25.2265 7.77283 24.808 6.93587 24.312C7.15286 24.1571 7.36986 23.9866 7.57135 23.8161C12.6241 26.1255 18.0969 26.1255 23.0876 23.8161C23.3046 23.9866 23.5061 24.1571 23.7231 24.312C22.8861 24.808 22.0182 25.2265 21.1037 25.5675C21.5842 26.5129 22.1422 27.4119 22.7621 28.2489C25.2885 27.4739 27.8769 26.2959 30.5288 24.3585C31.1952 17.7559 29.4733 12.0212 26.0015 6.9529ZM10.2527 20.8402C8.73376 20.8402 7.49382 19.4608 7.49382 17.7714C7.49382 16.082 8.70276 14.7025 10.2527 14.7025C11.7871 14.7025 13.0425 16.082 13.0115 17.7714C13.0115 19.4608 11.7871 20.8402 10.2527 20.8402ZM20.4373 20.8402C18.9183 20.8402 17.6768 19.4608 17.6768 17.7714C17.6768 16.082 18.8873 14.7025 20.4373 14.7025C21.9717 14.7025 23.2271 16.082 23.1961 17.7714C23.1961 19.4608 21.9872 20.8402 20.4373 20.8402Z"></path>
183
- </svg>
184
- </a>
185
- </div>
186
- </div>
187
- </div>
188
- </div>
189
- """
190
-
191
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
192
- body_background_fill_dark="#16141c",
193
- block_background_fill_dark="#16141c",
194
- block_border_width="1px",
195
- block_title_background_fill_dark="#1e1c26",
196
- input_background_fill_dark="#292733",
197
- button_secondary_background_fill_dark="#24212b",
198
- border_color_accent_dark="#343140",
199
- border_color_primary_dark="#343140",
200
- background_fill_secondary_dark="#16141c",
201
- color_accent_soft_dark="transparent",
202
- code_background_fill_dark="#292733",
203
- ), css=css) as demo:
204
-
205
- gr.Markdown("# Ginigen Private AI")
206
- gr.Markdown("6BIT 양자화로 모델 크기는 줄이고 성능은 유지하는 프라이버시 중심 AI 솔루션.")
207
-
208
- chatbot = gr.Chatbot(
209
- value=[[None, PLACEHOLDER]],
210
- height=600,
211
- avatar_images=(None, "https://huggingface.co/spaces/ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503/resolve/main/llama.jpg"),
212
- show_copy_button=True,
213
- show_label=False,
214
- elem_id="chatbot"
215
- )
216
-
217
- with gr.Row():
218
- with gr.Column(scale=4):
219
- with gr.Row():
220
- msg = gr.Textbox(
221
- show_label=False,
222
- placeholder="메시지를 입력하세요...",
223
- container=False,
224
- scale=8
225
- )
226
- submit_btn = gr.Button("전송", variant="primary", scale=1)
227
-
228
- with gr.Column(scale=1):
229
- system_message = gr.Textbox(
230
- value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
231
- label="시스템 메시지",
232
- lines=5
233
- )
234
-
235
- with gr.Accordion("고급 설정", open=False):
236
- max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수")
237
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
238
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
239
- top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
240
- repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
241
-
242
- clear_btn = gr.Button("대화 초기화")
243
-
244
- # Event handlers
245
- submit_btn.click(
246
- fn=lambda chatbot: chatbot[1:] if chatbot and len(chatbot) > 0 and chatbot[0][0] is None else chatbot,
247
- inputs=chatbot,
248
- outputs=chatbot,
249
- queue=False
250
- ).then(
251
- fn=respond,
252
- inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
253
- outputs=chatbot,
254
- queue=True
255
- ).then(
256
- fn=lambda: "",
257
- inputs=None,
258
- outputs=msg
259
- )
260
-
261
- msg.submit(
262
- fn=lambda chatbot: chatbot[1:] if chatbot and len(chatbot) > 0 and chatbot[0][0] is None else chatbot,
263
- inputs=chatbot,
264
- outputs=chatbot,
265
- queue=False
266
- ).then(
267
- fn=respond,
268
- inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
269
- outputs=chatbot,
270
- queue=True
271
- ).then(
272
- fn=lambda: "",
273
- inputs=None,
274
- outputs=msg
275
- )
276
-
277
- clear_btn.click(lambda: [[None, PLACEHOLDER]], None, chatbot, queue=False)
278
-
279
- if __name__ == "__main__":
280
- demo.launch()