ginipick commited on
Commit
632d6e5
ยท
verified ยท
1 Parent(s): 7a7a507

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +230 -0
app-backup.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import json
3
+ import subprocess
4
+ import os
5
+ from llama_cpp import Llama
6
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
7
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent.chat_history import BasicChatHistory
9
+ from llama_cpp_agent.chat_history.messages import Roles
10
+ import gradio as gr
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ llm = None
14
+ llm_model = None
15
+
16
+ # ๋ชจ๋ธ ์ด๋ฆ„๊ณผ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
17
+ MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
18
+
19
+ # ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
20
+ model_path = hf_hub_download(
21
+ repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
22
+ filename=MISTRAL_MODEL_NAME,
23
+ local_dir="./models"
24
+ )
25
+
26
+ print(f"Downloaded model path: {model_path}")
27
+
28
+ css = """
29
+ .bubble-wrap {
30
+ padding-top: calc(var(--spacing-xl) * 3) !important;
31
+ }
32
+ .message-row {
33
+ justify-content: space-evenly !important;
34
+ width: 100% !important;
35
+ max-width: 100% !important;
36
+ margin: calc(var(--spacing-xl)) 0 !important;
37
+ padding: 0 calc(var(--spacing-xl) * 3) !important;
38
+ }
39
+ .flex-wrap.user {
40
+ border-bottom-right-radius: var(--radius-lg) !important;
41
+ }
42
+ .flex-wrap.bot {
43
+ border-bottom-left-radius: var(--radius-lg) !important;
44
+ }
45
+ .message.user{
46
+ padding: 10px;
47
+ }
48
+ .message.bot{
49
+ text-align: right;
50
+ width: 100%;
51
+ padding: 10px;
52
+ border-radius: 10px;
53
+ }
54
+ .message-bubble-border {
55
+ border-radius: 6px !important;
56
+ }
57
+ .message-buttons {
58
+ justify-content: flex-end !important;
59
+ }
60
+ .message-buttons-left {
61
+ align-self: end !important;
62
+ }
63
+ .message-buttons-bot, .message-buttons-user {
64
+ right: 10px !important;
65
+ left: auto !important;
66
+ bottom: 2px !important;
67
+ }
68
+ .dark.message-bubble-border {
69
+ border-color: #343140 !important;
70
+ }
71
+ .dark.user {
72
+ background: #1e1c26 !important;
73
+ }
74
+ .dark.assistant.dark, .dark.pending.dark {
75
+ background: #16141c !important;
76
+ }
77
+ """
78
+
79
+ def get_messages_formatter_type(model_name):
80
+ if "Mistral" in model_name or "BitSix" in model_name:
81
+ return MessagesFormatterType.CHATML # Mistral ๊ณ„์—ด ๋ชจ๋ธ์€ ChatML ํ˜•์‹ ์‚ฌ์šฉ
82
+ else:
83
+ raise ValueError(f"Unsupported model: {model_name}")
84
+
85
+ @spaces.GPU(duration=120)
86
+ def respond(
87
+ message,
88
+ history: list[tuple[str, str]],
89
+ system_message,
90
+ max_tokens,
91
+ temperature,
92
+ top_p,
93
+ top_k,
94
+ repeat_penalty,
95
+ ):
96
+ global llm
97
+ global llm_model
98
+
99
+ chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
100
+
101
+ # ๋ชจ๋ธ ํŒŒ์ผ ๊ฒฝ๋กœ ํ™•์ธ
102
+ model_path = os.path.join("./models", MISTRAL_MODEL_NAME)
103
+
104
+ print(f"Model path: {model_path}")
105
+
106
+ if not os.path.exists(model_path):
107
+ print(f"Warning: Model file not found at {model_path}")
108
+ print(f"Available files in ./models: {os.listdir('./models')}")
109
+
110
+ if llm is None or llm_model != MISTRAL_MODEL_NAME:
111
+ llm = Llama(
112
+ model_path=model_path,
113
+ flash_attn=True,
114
+ n_gpu_layers=81,
115
+ n_batch=1024,
116
+ n_ctx=8192,
117
+ )
118
+ llm_model = MISTRAL_MODEL_NAME
119
+
120
+ provider = LlamaCppPythonProvider(llm)
121
+
122
+ agent = LlamaCppAgent(
123
+ provider,
124
+ system_prompt=f"{system_message}",
125
+ predefined_messages_formatter_type=chat_template,
126
+ debug_output=True
127
+ )
128
+
129
+ settings = provider.get_provider_default_settings()
130
+ settings.temperature = temperature
131
+ settings.top_k = top_k
132
+ settings.top_p = top_p
133
+ settings.max_tokens = max_tokens
134
+ settings.repeat_penalty = repeat_penalty
135
+ settings.stream = True
136
+
137
+ messages = BasicChatHistory()
138
+
139
+ for msn in history:
140
+ user = {
141
+ 'role': Roles.user,
142
+ 'content': msn[0]
143
+ }
144
+ assistant = {
145
+ 'role': Roles.assistant,
146
+ 'content': msn[1]
147
+ }
148
+ messages.add_message(user)
149
+ messages.add_message(assistant)
150
+
151
+ stream = agent.get_chat_response(
152
+ message,
153
+ llm_sampling_settings=settings,
154
+ chat_history=messages,
155
+ returns_streaming_generator=True,
156
+ print_output=False
157
+ )
158
+
159
+ outputs = ""
160
+ for output in stream:
161
+ outputs += output
162
+ yield outputs
163
+
164
+ PLACEHOLDER = """
165
+ <div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 6px; border-width: 1px; border-color: #e5e7eb; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
166
+ <figure style="margin: 0;max-width: 200px;min-height: 300px;">
167
+ <img src="https://huggingface.co/spaces/ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503/resolve/main/llama.jpg" alt="Logo" style="width: 100%; height: 100%; border-radius: 8px;">
168
+ </figure>
169
+ <div style="padding: .5rem 1.5rem;display: flex;flex-direction: column;justify-content: space-evenly;">
170
+ <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;">Ginigen Private-BitSix</h2>
171
+ <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.</p>
172
+ <div style="display: flex; justify-content: space-between; align-items: center;">
173
+ <div style="display: flex; flex-flow: column; justify-content: space-between;">
174
+ <span style="display: inline-flex; align-items: center; border-radius: 0.375rem; background-color: rgba(229, 70, 77, 0.1); padding: 0.1rem 0.75rem; font-size: 0.75rem; font-weight: 500; color: #f88181; margin-bottom: 2.5px;">
175
+ Private BitSix Mistral Small 3.1 24B Instruct
176
+ </span>
177
+ </div>
178
+ <div style="display: flex; justify-content: flex-end; align-items: center;">
179
+ <a href="https://discord.gg/openfreeai" target="_blank" rel="noreferrer" style="padding: .5rem;">
180
+ <svg width="24" height="24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" viewBox="0 5 30.67 23.25">
181
+ <title>Discord</title>
182
+ <path d="M26.0015 6.9529C24.0021 6.03845 21.8787 5.37198 19.6623 5C19.3833 5.48048 19.0733 6.13144 18.8563 6.64292C16.4989 6.30193 14.1585 6.30193 11.8336 6.64292C11.6166 6.13144 11.2911 5.48048 11.0276 5C8.79575 5.37198 6.67235 6.03845 4.6869 6.9529C0.672601 12.8736 -0.41235 18.6548 0.130124 24.3585C2.79599 26.2959 5.36889 27.4739 7.89682 28.2489C8.51679 27.4119 9.07477 26.5129 9.55525 25.5675C8.64079 25.2265 7.77283 24.808 6.93587 24.312C7.15286 24.1571 7.36986 23.9866 7.57135 23.8161C12.6241 26.1255 18.0969 26.1255 23.0876 23.8161C23.3046 23.9866 23.5061 24.1571 23.7231 24.312C22.8861 24.808 22.0182 25.2265 21.1037 25.5675C21.5842 26.5129 22.1422 27.4119 22.7621 28.2489C25.2885 27.4739 27.8769 26.2959 30.5288 24.3585C31.1952 17.7559 29.4733 12.0212 26.0015 6.9529ZM10.2527 20.8402C8.73376 20.8402 7.49382 19.4608 7.49382 17.7714C7.49382 16.082 8.70276 14.7025 10.2527 14.7025C11.7871 14.7025 13.0425 16.082 13.0115 17.7714C13.0115 19.4608 11.7871 20.8402 10.2527 20.8402ZM20.4373 20.8402C18.9183 20.8402 17.6768 19.4608 17.6768 17.7714C17.6768 16.082 18.8873 14.7025 20.4373 14.7025C21.9717 14.7025 23.2271 16.082 23.1961 17.7714C23.1961 19.4608 21.9872 20.8402 20.4373 20.8402Z"></path>
183
+ </svg>
184
+ </a>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ """
190
+
191
+ demo = gr.ChatInterface(
192
+ fn=respond,
193
+ title="Ginigen Private AI",
194
+ description="6BIT ์–‘์žํ™”๋กœ ๋ชจ๋ธ ํฌ๊ธฐ๋Š” ์ค„์ด๊ณ  ์„ฑ๋Šฅ์€ ์œ ์ง€ํ•˜๋Š” ํ”„๋ผ์ด๋ฒ„์‹œ ์ค‘์‹ฌ AI ์†”๋ฃจ์…˜.",
195
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
196
+ body_background_fill_dark="#16141c",
197
+ block_background_fill_dark="#16141c",
198
+ block_border_width="1px",
199
+ block_title_background_fill_dark="#1e1c26",
200
+ input_background_fill_dark="#292733",
201
+ button_secondary_background_fill_dark="#24212b",
202
+ border_color_accent_dark="#343140",
203
+ border_color_primary_dark="#343140",
204
+ background_fill_secondary_dark="#16141c",
205
+ color_accent_soft_dark="transparent",
206
+ code_background_fill_dark="#292733",
207
+ ),
208
+ css=css,
209
+ examples=[
210
+ ["์•ˆ๋…•ํ•˜์„ธ์š”, ์ €๋Š” AI์— ๊ด€์‹ฌ์ด ๋งŽ์Šต๋‹ˆ๋‹ค. ์–‘์žํ™”๋ž€ ๋ฌด์—‡์ธ๊ฐ€์š”?"],
211
+ ["๋ฏธ์ŠคํŠธ๋ž„ ๋ชจ๋ธ์˜ ํŠน์ง•์€ ๋ฌด์—‡์ธ๊ฐ€์š”?"],
212
+ ["๊ธด ์ปจํ…์ŠคํŠธ(context)๋ฅผ ์ฒ˜๋ฆฌํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์„ค๋ช…ํ•ด ์ฃผ์„ธ์š”."]
213
+ ],
214
+ additional_inputs=[
215
+ gr.Textbox(
216
+ value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
217
+ label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€",
218
+ lines=5
219
+ ),
220
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜"),
221
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
222
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
223
+ gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
224
+ gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
225
+ ],
226
+ chatbot=gr.Chatbot(placeholder=PLACEHOLDER, type="messages")
227
+ )
228
+
229
+ if __name__ == "__main__":
230
+ demo.launch()