Spaces:

EduuGomes
/

CachoeiraBot

Running

App Files Files Community

EduuGomes commited on May 31

Commit

d725876

verified ·

1 Parent(s): 0858032

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -26

app.py CHANGED Viewed

@@ -1,26 +1,177 @@
-# Install transformers from source - only needed for versions <= v4.34
-# pip install git+https://github.com/huggingface/transformers.git
-# pip install accelerate
-import torch
-from transformers import pipeline
-pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
-# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
-messages = [
-    {
-        "role": "system",
-        "content": "You are a friendly chatbot who always responds in the style of a pirate",
-    },
-    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
-]
-prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
-print(outputs[0]["generated_text"])
-# <|system|>
-# You are a friendly chatbot who always responds in the style of a pirate.</s>
-# <|user|>
-# How many helicopters can a human eat in one sitting?</s>
-# <|assistant|>
-# ...

+import gradio as gr
+from openai import OpenAI
+import os
+from datetime import datetime
+# App title and description
+APP_TITLE = "NO GPU, Multi LLMs Uses"
+APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
+# Load environment variables
+ACCESS_TOKEN = os.getenv("HF_TOKEN")
+client = OpenAI(
+    base_url="https://api-inference.huggingface.co/v1/",
+    api_key=ACCESS_TOKEN,
+)
+# Model categories for better organization
+MODEL_CATEGORIES = {
+    "Qwen": [
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Qwen/Qwen2.5-3B-Instruct",
+        "Qwen/Qwen2.5-0.5B-Instruct",
+        "Qwen/Qwen2.5-Coder-32B-Instruct",
+    ],
+    "Meta LLaMa": [
+        "meta-llama/Llama-3.3-70B-Instruct",
+        "meta-llama/Llama-3.1-70B-Instruct",
+        "meta-llama/Llama-3.0-70B-Instruct",
+        "meta-llama/Llama-3.2-3B-Instruct",
+        "meta-llama/Llama-3.2-1B-Instruct",
+        "meta-llama/Llama-3.1-8B-Instruct",
+    ],
+    "Mistral": [
+        "mistralai/Mistral-Nemo-Instruct-2407",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        "mistralai/Mistral-7B-Instruct-v0.2",
+    ],
+    "Microsoft Phi": [
+        "microsoft/Phi-3.5-mini-instruct",
+        "microsoft/Phi-3-mini-128k-instruct",
+        "microsoft/Phi-3-mini-4k-instruct",
+    ],
+    "Other Models": [
+        "NousResearch/Hermes-3-Llama-3.1-8B",
+        "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        "HuggingFaceH4/zephyr-7b-beta",
+        "HuggingFaceTB/SmolLM2-360M-Instruct",
+        "tiiuae/falcon-7b-instruct",
+        "01-ai/Yi-1.5-34B-Chat",
+    ]
+}
+# Flatten the model list
+ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models]
+def get_model_info(model_name):
+    parts = model_name.split('/')
+    if len(parts) != 2:
+        return f"**Model:** {model_name}\n**Format:** Unknown"
+    org, model = parts
+    import re
+    size_match = re.search(r'(\d+\.?\d*)B', model)
+    size = size_match.group(1) + "B" if size_match else "Unknown"
+    return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
+def respond(
+    message,
+    history,
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    frequency_penalty,
+    seed,
+    selected_model
+):
+    # Prepare messages
+    if seed == -1:
+        seed = None
+    messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    model_to_use = selected_model or ALL_MODELS[0]
+    new_history = list(history) + [(message, "")]
+    current_response = ""
+    try:
+        for chunk in client.chat.completions.create(
+            model=model_to_use,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            seed=seed,
+            messages=messages,
+        ):
+            delta = chunk.choices[0].delta.content
+            if delta:
+                current_response += delta
+                new_history[-1] = (message, current_response)
+                yield new_history
+    except Exception as e:
+        err = f"Error: {e}"
+        new_history[-1] = (message, err)
+        yield new_history
+with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Model selection via Dropdown
+            selected_model = gr.Dropdown(
+                choices=ALL_MODELS,
+                value=ALL_MODELS[0],
+                label="Select Model"
+            )
+            model_info = gr.Markdown(get_model_info(ALL_MODELS[0]))
+            def update_info(model_name):
+                return get_model_info(model_name)
+            selected_model.change(
+                fn=update_info,
+                inputs=[selected_model],
+                outputs=[model_info]
+            )
+            # Conversation settings
+            system_message = gr.Textbox(
+                value="You are a helpful assistant.",
+                label="System Prompt",
+                lines=2
+            )
+            max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens")
+            temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
+            top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
+            freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
+            seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)")
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
+            send_btn = gr.Button("Send")
+            send_btn.click(
+                fn=respond,
+                inputs=[
+                    msg, chatbot, system_message,
+                    max_tokens, temperature, top_p,
+                    freq_penalty, seed, selected_model
+                ],
+                outputs=[chatbot],
+                queue=True
+            )
+            msg.submit(
+                fn=respond,
+                inputs=[
+                    msg, chatbot, system_message,
+                    max_tokens, temperature, top_p,
+                    freq_penalty, seed, selected_model
+                ],
+                outputs=[chatbot],
+                queue=True
+            )
+    demo.launch()