Spaces:

ginigen
/

Mistral-Perflexity

Running on Zero

App Files Files Community

Mistral-Perflexity / app.py

ginipick

Update app.py

b5d9ec0 verified about 2 months ago

raw

history blame

6.96 kB

	import spaces
	import json
	import subprocess
	import os
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider
	from llama_cpp_agent.chat_history import BasicChatHistory
	from llama_cpp_agent.chat_history.messages import Roles
	import gradio as gr
	from huggingface_hub import hf_hub_download

	llm = None
	llm_model = None

	# 모델 이름과 경로를 정의
	MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"

	# 모델 다운로드
	model_path = hf_hub_download(
	repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
	filename=MISTRAL_MODEL_NAME,
	local_dir="./models"
	)

	print(f"Downloaded model path: {model_path}")

	css = """
	.bubble-wrap {
	padding-top: calc(var(--spacing-xl) * 3) !important;
	}
	.message-row {
	justify-content: space-evenly !important;
	width: 100% !important;
	max-width: 100% !important;
	margin: calc(var(--spacing-xl)) 0 !important;
	padding: 0 calc(var(--spacing-xl) * 3) !important;
	}
	.flex-wrap.user {
	border-bottom-right-radius: var(--radius-lg) !important;
	}
	.flex-wrap.bot {
	border-bottom-left-radius: var(--radius-lg) !important;
	}
	.message.user{
	padding: 10px;
	}
	.message.bot{
	text-align: right;
	width: 100%;
	padding: 10px;
	border-radius: 10px;
	}
	.message-bubble-border {
	border-radius: 6px !important;
	}
	.message-buttons {
	justify-content: flex-end !important;
	}
	.message-buttons-left {
	align-self: end !important;
	}
	.message-buttons-bot, .message-buttons-user {
	right: 10px !important;
	left: auto !important;
	bottom: 2px !important;
	}
	.dark.message-bubble-border {
	border-color: #343140 !important;
	}
	.dark.user {
	background: #1e1c26 !important;
	}
	.dark.assistant.dark, .dark.pending.dark {
	background: #16141c !important;
	}
	"""

	def get_messages_formatter_type(model_name):
	if "Mistral" in model_name or "BitSix" in model_name:
	return MessagesFormatterType.CHATML # Mistral 계열 모델은 ChatML 형식 사용
	else:
	raise ValueError(f"Unsupported model: {model_name}")

	@spaces.GPU(duration=120)
	def respond(
	message,
	history: list[dict], # history 항목이 tuple이 아닌 dict 형식으로 전달됨
	system_message,
	max_tokens,
	temperature,
	top_p,
	top_k,
	repeat_penalty,
	):
	global llm
	global llm_model

	chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)

	# 모델 파일 경로 확인
	model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)

	print(f"Model path: {model_path_local}")

	if not os.path.exists(model_path_local):
	print(f"Warning: Model file not found at {model_path_local}")
	print(f"Available files in ./models: {os.listdir('./models')}")

	if llm is None or llm_model != MISTRAL_MODEL_NAME:
	llm = Llama(
	model_path=model_path_local,
	flash_attn=True,
	n_gpu_layers=81,
	n_batch=1024,
	n_ctx=8192,
	)
	llm_model = MISTRAL_MODEL_NAME

	provider = LlamaCppPythonProvider(llm)

	agent = LlamaCppAgent(
	provider,
	system_prompt=f"{system_message}",
	predefined_messages_formatter_type=chat_template,
	debug_output=True
	)

	settings = provider.get_provider_default_settings()
	settings.temperature = temperature
	settings.top_k = top_k
	settings.top_p = top_p
	settings.max_tokens = max_tokens
	settings.repeat_penalty = repeat_penalty
	settings.stream = True

	messages = BasicChatHistory()

	# history의 각 항목이 dict 형식으로 {'user': <user_message>, 'assistant': <assistant_message>} 형태라고 가정
	for msn in history:
	user_message = {
	'role': Roles.user,
	'content': msn.get('user', '')
	}
	assistant_message = {
	'role': Roles.assistant,
	'content': msn.get('assistant', '')
	}
	messages.add_message(user_message)
	messages.add_message(assistant_message)

	stream = agent.get_chat_response(
	message,
	llm_sampling_settings=settings,
	chat_history=messages,
	returns_streaming_generator=True,
	print_output=False
	)

	outputs = ""
	for output in stream:
	outputs += output
	yield outputs


	demo = gr.ChatInterface(
	fn=respond,
	title="Ginigen Private AI",
	description="6BIT 양자화로 모델 크기는 줄이고 성능은 유지하는 프라이버시 중심 AI 솔루션: The Ginigen Private-BitSix framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.",
	theme=gr.themes.Soft(
	primary_hue="violet",
	secondary_hue="violet",
	neutral_hue="gray",
	font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
	).set(
	body_background_fill_dark="#16141c",
	block_background_fill_dark="#16141c",
	block_border_width="1px",
	block_title_background_fill_dark="#1e1c26",
	input_background_fill_dark="#292733",
	button_secondary_background_fill_dark="#24212b",
	border_color_accent_dark="#343140",
	border_color_primary_dark="#343140",
	background_fill_secondary_dark="#16141c",
	color_accent_soft_dark="transparent",
	code_background_fill_dark="#292733",
	),
	css=css,
	examples=[
	["안녕하세요, 저는 AI에 관심이 많습니다. 양자화란 무엇인가요?"],
	["미스트랄 모델의 특징은 무엇인가요?"],
	["긴 컨텍스트(context)를 처리하는 방법을 설명해 주세요."]
	],
	additional_inputs=[
	gr.Textbox(
	value="You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.",
	label="시스템 메시지",
	lines=5
	),
	gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
	gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
	gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
	],
	chatbot=gr.Chatbot(type="messages")
	)

	if __name__ == "__main__":
	demo.launch()