Spaces:

rubenroy
/

Zurich-14B

Running

App Files Files Community

Zurich-14B / app.py

rubenroy

Update app.py

f196196 verified 7 months ago

raw

history blame

4.78 kB

	import gradio as gr
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import torch
	from threading import Thread
	from typing import Iterator

	model_name = "rubenroy/Zurich-14B-GCv2-5m"
	MAX_INPUT_TOKEN_LENGTH = 4096

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	@spaces.GPU
	def generate(message: str, chat_history: list[tuple[str, str]], temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=512, repetition_penalty=1.1) -> Iterator[str]:
	"""Generates text responses using Zurich model with streaming."""

	conversation = []
	for user, assistant in chat_history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")

	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

	input_ids = input_ids.to(model.device)

	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	{"input_ids": input_ids},
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True if float(temperature) > 0 else False,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	repetition_penalty=repetition_penalty
	)

	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	outputs = []
	for text in streamer:
	outputs.append(text)
	yield "".join(outputs)

	TITLE_HTML = """
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
	<style>
	.model-btn {
	background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%);
	color: white !important;
	padding: 0.75rem 1rem;
	border-radius: 0.5rem;
	text-decoration: none !important;
	font-weight: 500;
	transition: all 0.2s ease;
	font-size: 0.9rem;
	display: flex;
	align-items: center;
	justify-content: center;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}
	.model-btn:hover {
	background: linear-gradient(135deg, #1d4ed8 0%, #1e40af 100%);
	box-shadow: 0 4px 6px rgba(0,0,0,0.2);
	}
	.model-section {
	flex: 1;
	max-width: 450px;
	background: rgba(255, 255, 255, 0.05);
	padding: 1.5rem;
	border-radius: 1rem;
	border: 1px solid rgba(255, 255, 255, 0.1);
	backdrop-filter: blur(10px);
	transition: all 0.3s ease;
	}
	</style>

	<div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 1.5rem; border-radius: 1.5rem; text-align: center; margin: 1rem auto; max-width: 1200px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
	<div style="margin-bottom: 1.5rem;">
	<h1 style="font-size: 2.5rem; font-weight: 800; margin: 0; background: linear-gradient(135deg, #60a5fa 0%, #93c5fd 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Zurich</h1>
	<p style="font-size: 1.25rem; color: #94a3b8; margin: 0;">GammaCorpus v2-5m</p>
	</div>
	</div>
	"""

	examples = [
	["Explain quantum computing in simple terms"],
	["Write a short story about a time traveler"],
	["Explain the process of photosynthesis"],
	["Tell me an interesting fact about Palm trees"]
	]

	with gr.Blocks() as demo:
	gr.HTML(TITLE_HTML)

	with gr.Accordion("Generation Settings", open=False):
	with gr.Row():
	with gr.Column():
	temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature", info="Higher values make the output more random")
	top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top P", info="Controls nucleus sampling")
	top_k = gr.Slider(1, 100, value=50, step=1, label="Top K", info="Limits vocabulary choices per step")
	with gr.Column():
	max_new_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max New Tokens", info="Limits response length")
	repetition_penalty = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty", info="Discourages repeated phrases")

	chatbot = gr.ChatInterface(
	fn=generate,
	additional_inputs=[temperature, top_p, top_k, max_new_tokens, repetition_penalty],
	examples=examples
	)

	demo.launch(share=True)