Spaces:

artificialguybr
/

OPENHERMES-V2.5-DEMO

Runtime error

App Files Files Community

artificialguybr commited on Oct 19, 2023

Commit

dcf6e59

1 Parent(s): 39d4f12

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -123

app.py CHANGED Viewed

@@ -1,134 +1,72 @@
-import os
 import gradio as gr
-import mdtex2html
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, MistralConfig
-# Initialize model and tokenizer
 model_name_or_path = "teknium/OpenHermes-2-Mistral-7B"
-model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
-                                             device_map="auto",
-                                             trust_remote_code=False,
-                                             load_in_8bit=True,
-                                             revision="main")
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
-config = MistralConfig()
-# Text parsing function
-def _parse_text(text):
-    lines = text.split("\n")
-    lines = [line for line in lines if line != ""]
-    count = 0
-    for i, line in enumerate(lines):
-        if "```" in line:
-            count += 1
-            items = line.split("`")
-            if count % 2 == 1:
-                lines[i] = f'<pre><code class="language-{items[-1]}">'
-            else:
-                lines[i] = f"<br></code></pre>"
-        else:
-            if i > 0:
-                if count % 2 == 1:
-                    line = line.replace("`", r"\`")
-                    line = line.replace("<", "&lt;")
-                    line = line.replace(">", "&gt;")
-                    line = line.replace(" ", "&nbsp;")
-                    line = line.replace("*", "&ast;")
-                    line = line.replace("_", "&lowbar;")
-                    line = line.replace("-", "&#45;")
-                    line = line.replace(".", "&#46;")
-                    line = line.replace("!", "&#33;")
-                    line = line.replace("(", "&#40;")
-                    line = line.replace(")", "&#41;")
-                    line = line.replace("$", "&#36;")
-                lines[i] = "<br>" + line
-    text = "".join(lines)
-    return text
-# Demo launching function
-def _launch_demo(args, model, tokenizer, config):
-    def predict(_query, _chatbot, _task_history):
-        print(f"User: {_parse_text(_query)}")
-        _chatbot.append((_parse_text(_query), ""))
-        # Prepare the chat template
-        messages = [
-            {"role": "system", "content": "You are Hermes 2."},
-            {"role": "user", "content": _query}
-        ]
-        # Tokenize using the chat template
-        gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
-        # Debug: Print the type and value of gen_input
-        print("Debug: ", type(gen_input), gen_input)
-        # If gen_input is a dictionary, move it to CUDA
-        if isinstance(gen_input, dict):
-            gen_input = {k: v.to('cuda') for k, v in gen_input.items()}
-        else:
-            gen_input = gen_input.to('cuda')
-        # Generate a response using the model
-        generated_ids = model.generate(**gen_input, max_length=300) if isinstance(gen_input, dict) else model.generate(gen_input, max_length=300)
-        # Decode the generated IDs to text
-        full_response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-        # Update the chatbot state
-        _chatbot[-1] = (_parse_text(_query), _parse_text(full_response))
-        yield _chatbot
-        print(f"History: {_task_history}")
-        _task_history.append((_query, full_response))
-        print(f"OpenHermes: {_parse_text(full_response)}")
-    def regenerate(_chatbot, _task_history):
-        if not _task_history:
-            yield _chatbot
-            return
-        item = _task_history.pop(-1)
-        _chatbot.pop(-1)
-        yield from predict(item[0], _chatbot, _task_history)
-    def reset_user_input():
-        return gr.update(value="")
-    def reset_state(_chatbot, _task_history):
-        _task_history.clear()
-        _chatbot.clear()
-        import gc
-        gc.collect()
-        torch.cuda.empty_cache()
-        return _chatbot
-    with gr.Blocks() as demo:
-        gr.Markdown("""
-    ## OpenHermes V2 - Mistral 7B: Mistral 7B Based by Teknium!
-    **Space created by [@artificialguybr](https://twitter.com/artificialguybr). Model by [@Teknium1](https://twitter.com/Teknium1).Thanks HF for GPU!**
-    **OpenHermes V2 Mistral 7B was trained on 900,000 instructions, and surpasses all previous versions of Hermes 13B and below, and matches 70B on some benchmarks!**
-    """)
-        chatbot = gr.Chatbot(label='OpenHermes-V2', elem_classes="control-height", queue=True)
-        query = gr.Textbox(lines=2, label='Input')
-        task_history = gr.State([])
         with gr.Row():
-            submit_btn = gr.Button("🚀 Submit")
-            empty_btn = gr.Button("🧹 Clear History")
-            regen_btn = gr.Button("🤔️ Regenerate")
-        submit_btn.click(predict, [query, chatbot, task_history], [chatbot], show_progress=True, queue=True)  # Enable queue
-        submit_btn.click(reset_user_input, [], [query], queue=False) #No queue for resetting
-        empty_btn.click(reset_state, [chatbot, task_history], outputs=[chatbot], show_progress=True, queue=False) #No queue for clearing
-        regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True, queue=True)  # Enable queue
-    demo.queue(max_size=20)
-    demo.launch()
-# Main execution
-if __name__ == "__main__":
-    _launch_demo(None, model, tokenizer, config)

 import gradio as gr
+import re
+from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name_or_path = "teknium/OpenHermes-2-Mistral-7B"
+model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
+def make_prediction(prompt, max_tokens=None, temperature=None, top_p=None, top_k=None, repetition_penalty=None):
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    out = model.generate(input_ids, max_length=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
+    text = tokenizer.decode(out[0], skip_special_tokens=True)
+    yield text
+def clear_chat(chat_history_state, chat_message):
+    chat_history_state = []
+    chat_message = ''
+    return chat_history_state, chat_message
+def user(message, history):
+    history = history or []
+    history.append([message, ""])
+    return "", history
+def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
+    history = history or []
+    if system_message.strip():
+        messages = " "+"system\n" + system_message.strip() + "\n" + "\n".join(["\n".join([" "+"user\n"+item[0]+"", " assistant\n"+item[1]+""]) for item in history])
+    else:
+        messages = " "+"system\n" + BASE_SYSTEM_MESSAGE + "\n" + "\n".join(["\n".join([" "+"user\n"+item[0]+"", " assistant\n"+item[1]+""]) for item in history])
+    messages = messages.rstrip()
+    messages = messages.rstrip()
+    if temperature == 0:
+        top_p = 1
+        top_k = -1
+    prediction = make_prediction(messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
+    for tokens in prediction:
+        tokens = re.findall(r'(.*?)(\s|$)', tokens)
+        for subtoken in tokens:
+            subtoken = "".join(subtoken)
+            answer = subtoken
+            history[-1][1] += answer
+            yield history, history, ""
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(f"""## Mistral-7B-OpenOrca Playground Space!""")
+    with gr.Row():
+        chatbot = gr.Chatbot(elem_id="chatbot")
+    with gr.Row():
+        message = gr.Textbox(label="What do you want to chat about?", placeholder="Ask me anything.", lines=3)
+    with gr.Row():
+        submit = gr.Button(value="Send message", variant="secondary")
+        clear = gr.Button(value="New topic", variant="secondary")
+    with gr.Accordion("Show Model Parameters", open=False):
         with gr.Row():
+            with gr.Column():
+                max_tokens = gr.Slider(20, 2500, step=20, value=500)
+                temperature = gr.Slider(0.0, 2.0, step=0.1, value=0.4)
+                top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95)
+                top_k = gr.Slider(1, 100, step=1, value=40)
+                repetition_penalty = gr.Slider(1.0, 2.0, step=0.1, value=1.1)
+        system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, lines=5)
+    chat_history_state = gr.State()
+    clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False)
+    clear.click(lambda: None, None, chatbot, queue=False)
+    submit_click_event = submit.click(fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True).then(fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repetition_penalty], outputs=[chatbot, chat_history_state, message], queue=True)
+    demo.queue(max_size=128, concurrency_count=48)