import gradio as gr from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision,KnownModels from huggingface_hub import space_info repo_name = "svjack/ggml" file_name = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin" examples = [ "How to promote Chinese traditional culture ?", "Explain the meaning of word Ottoman", "Explain the meaning of 👨", "Use following emojis to generate a short description of a scene , the emojis are 👨👩🔥❄️", "Use following emojis to generate a short description of a scene , the emojis are 🌲🔥👨💦", ] session_config = SessionConfig(threads=2,batch_size=2) model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True,model_type=KnownModels.Llama) ''' model_path = "/Users/svjack/Library/Application Support/nomic.ai/GPT4All/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin" model = AutoModel.from_pretrained(model_path, model_type=KnownModels.Llama) ''' def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed): prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response: Answer:""" generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens) response = "" streamer = model.stream(prompt=prompt,generation_config=generation_config) for new_text in streamer: response += new_text yield response with gr.Blocks( theme=gr.themes.Soft(), css=".disclaimer {font-variant-caps: all-small-caps;}", ) as demo: gr.Markdown( """

Wizardlm-13b on CPU in Rust 🦀

This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) on 2 CPU cores. """ ) ''' markdown_exp_size = "##" lora_repo = "svjack/chatglm3-few-shot" lora_repo_link = "svjack/chatglm3-few-shot/?input_list_index=1" emoji_info = space_info(lora_repo).__dict__["cardData"]["emoji"] space_cnt = 1 task_name = "[---Emojis to Image Prompt---]" gr.Markdown( value=f"{markdown_exp_size} {task_name} few shot prompt in ChatGLM3 Few Shot space repo (click submit to activate) : [{lora_repo_link}](https://huggingface.co/spaces/{lora_repo_link}) {emoji_info}", visible=True, elem_id="selected_space", ) ''' with gr.Row(): with gr.Column(): with gr.Row(): instruction = gr.Textbox( placeholder="Enter your question or instruction here", label="Question/Instruction", elem_id="q-input", ) with gr.Accordion("Advanced Options:", open=False): with gr.Row(): with gr.Column(): with gr.Row(): temperature = gr.Slider( label="Temperature", value=0.8, minimum=0.1, maximum=1.0, step=0.1, interactive=True, info="Higher values produce more diverse outputs", ) with gr.Column(): with gr.Row(): top_p = gr.Slider( label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1.0, step=0.01, interactive=True, info=( "Sample from the smallest possible set of tokens whose cumulative probability " "exceeds top_p. Set to 1 to disable and sample from all tokens." ), ) with gr.Column(): with gr.Row(): top_k = gr.Slider( label="Top-k", value=40, minimum=5, maximum=80, step=1, interactive=True, info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.", ) with gr.Column(): with gr.Row(): max_new_tokens = gr.Slider( label="Maximum new tokens", value=256, minimum=0, maximum=1024, step=5, interactive=True, info="The maximum number of new tokens to generate", ) with gr.Column(): with gr.Row(): seed = gr.Number( label="Seed", value=42, interactive=True, info="The seed to use for the generation", precision=0 ) with gr.Row(): submit = gr.Button("Submit") with gr.Row(): with gr.Box(): gr.Markdown("**Wizardlm-13b**") output_7b = gr.Markdown() with gr.Row(): gr.Examples( examples=examples, inputs=[instruction], cache_examples=False, fn=process_stream, outputs=output_7b, ) submit.click( process_stream, inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], outputs=output_7b, ) instruction.submit( process_stream, inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], outputs=output_7b, ) with demo: gr.HTML( '''
''' ) demo.queue(max_size=4, concurrency_count=1).launch(debug=True)