Spaces:

svjack
/

wizardlm-13b-ggml

Sleeping

App Files Files Community

wizardlm-13b-ggml / app.py

svjack

Update app.py

e882568 over 1 year ago

raw

history blame contribute delete

6.9 kB

	import gradio as gr
	from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision,KnownModels
	from huggingface_hub import space_info

	repo_name = "svjack/ggml"
	file_name = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"

	examples = [
	"How to promote Chinese traditional culture ?",
	"Explain the meaning of word Ottoman",
	"Explain the meaning of 👨",
	"Use following emojis to generate a short description of a scene , the emojis are 👨👩🔥❄️",
	"Use following emojis to generate a short description of a scene , the emojis are 🌲🔥👨💦",
	]

	session_config = SessionConfig(threads=2,batch_size=2)
	model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True,model_type=KnownModels.Llama)

	'''
	model_path = "/Users/svjack/Library/Application Support/nomic.ai/GPT4All/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
	model = AutoModel.from_pretrained(model_path,
	model_type=KnownModels.Llama)
	'''

	def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):

	prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
	### Instruction:
	{instruction}
	### Response:
	Answer:"""
	generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
	response = ""
	streamer = model.stream(prompt=prompt,generation_config=generation_config)
	for new_text in streamer:
	response += new_text
	yield response


	with gr.Blocks(
	theme=gr.themes.Soft(),
	css=".disclaimer {font-variant-caps: all-small-caps;}",
	) as demo:
	gr.Markdown(
	"""<h1><center> Wizardlm-13b on CPU in Rust 🦀</center></h1>

	This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) on 2 CPU cores.
	"""
	)

	'''
	markdown_exp_size = "##"
	lora_repo = "svjack/chatglm3-few-shot"
	lora_repo_link = "svjack/chatglm3-few-shot/?input_list_index=1"
	emoji_info = space_info(lora_repo).__dict__["cardData"]["emoji"]
	space_cnt = 1
	task_name = "[---Emojis to Image Prompt---]"
	gr.Markdown(
	value=f"{markdown_exp_size} {task_name} few shot prompt in ChatGLM3 Few Shot space repo (click submit to activate) : [{lora_repo_link}](https://huggingface.co/spaces/{lora_repo_link}) {emoji_info}",
	visible=True,
	elem_id="selected_space",
	)
	'''

	with gr.Row():
	with gr.Column():
	with gr.Row():
	instruction = gr.Textbox(
	placeholder="Enter your question or instruction here",
	label="Question/Instruction",
	elem_id="q-input",
	)
	with gr.Accordion("Advanced Options:", open=False):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	temperature = gr.Slider(
	label="Temperature",
	value=0.8,
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	with gr.Row():
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1.0,
	step=0.01,
	interactive=True,
	info=(
	"Sample from the smallest possible set of tokens whose cumulative probability "
	"exceeds top_p. Set to 1 to disable and sample from all tokens."
	),
	)
	with gr.Column():
	with gr.Row():
	top_k = gr.Slider(
	label="Top-k",
	value=40,
	minimum=5,
	maximum=80,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
	)
	with gr.Column():
	with gr.Row():
	max_new_tokens = gr.Slider(
	label="Maximum new tokens",
	value=256,
	minimum=0,
	maximum=1024,
	step=5,
	interactive=True,
	info="The maximum number of new tokens to generate",
	)

	with gr.Column():
	with gr.Row():
	seed = gr.Number(
	label="Seed",
	value=42,
	interactive=True,
	info="The seed to use for the generation",
	precision=0
	)
	with gr.Row():
	submit = gr.Button("Submit")
	with gr.Row():
	with gr.Box():
	gr.Markdown("Wizardlm-13b")
	output_7b = gr.Markdown()

	with gr.Row():
	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_stream,
	outputs=output_7b,
	)

	submit.click(
	process_stream,
	inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
	outputs=output_7b,
	)
	instruction.submit(
	process_stream,
	inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
	outputs=output_7b,
	)

	with demo:
	gr.HTML(
	'''
	<div style="justify-content: center; display: flex;">
	<iframe
	src="https://svjack-chatglm3-few-shot-demo.hf.space/?input_list_index=1"
	frameborder="0"
	width="1400"
	height="768"
	></iframe>
	</div>
	'''
	)

	demo.queue(max_size=4, concurrency_count=1).launch(debug=True)