gemma-2-2b-it

Sleeping

App Files Files Community

BryanBradfo commited on Apr 5

Commit

ade8c92

1 Parent(s): 43339e8

try of adding icons

Browse files

Files changed (1) hide show

app.py +271 -173

app.py CHANGED Viewed

@@ -1,182 +1,280 @@
 import os
 import queue
-import gradio as gr
 from threading import Thread
-from typing import List, Tuple
-############################################################
-# EXAMPLE: Dummy function to simulate streaming generation #
-############################################################
-def stream_generate(message: str, history: List[Tuple[str, str]]):
-    """
-    A sample streaming generator. Replace with your actual
-    model code & logic. This uses a simple placeholder
-    approach that yields tokens from a static text for demo.
-    """
-    # Simulate conversation: user -> system -> user -> ...
-    # We'll just produce a contrived "thinking" response.
-    # In your real code, you'd do something like:
-    # for token in your_model_stream:
-    #     yield token
-    # or wrap your generation with a queue and catch queue.Empty
-    response_text = "This is an example answer to your query.\nFeel free to replace me with real model output!"
-    output = ""
-    for char in response_text:
-        output += char
-        yield output
-def respond(message: str, chat_history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], str]:
-    """
-    Called by the UI to get the next response from the chatbot.
-    Returns updated chat history and an empty string to clear the user input.
-    """
-    # Add the user’s message to conversation
-    chat_history.append((message, ""))
-    # We’ll stream the response from our generator
     outputs = []
     try:
-        for partial_text in stream_generate(message, chat_history):
-            outputs = chat_history[:-1] + [(message, partial_text)]
-            # Update the chatbot in real-time:
-            yield outputs, ""
     except queue.Empty:
-        # End of stream. Avoid traceback.
-        pass
-    # Once done, finalize the last user -> bot pair
-    final_answer = outputs[-1][1] if outputs else "No response."
-    chat_history[-1] = (message, final_answer)
-    yield chat_history, ""
-############################################################
-# GRADIO BLOCKS UI                                         #
-############################################################
-def launch_app():
-    # A custom CSS snippet to style backgrounds, suggestions, bubbles, etc.
-    custom_css = """
-    :root {
-        --gradient-start: #66AEEF; /* lighter top */
-        --gradient-end:   #F0F8FF; /* very light at bottom */
-    }
-    html, body {
-        margin: 0;
-        padding: 0;
-        background: linear-gradient(to bottom, var(--gradient-start), var(--gradient-end));
-        font-family: "Helvetica", sans-serif;
-        color: #333;
-    }
-    h1 {
-        text-align: center;
-        color: #fff;
-        margin-top: 1.2em;
-    }
-    /* Chatbot styling */
-    .gradio-container {
-        max-width: 800px;
-        margin: 0 auto;
-        padding-bottom: 2rem;
-    }
-    .chatbot {
-        background-color: #F8FDFF !important;
-    }
-    .chatbot .message {
-        border-radius: 8px;
-        margin: 6px;
-        padding: 10px;
-        line-height: 1.4;
-        position: relative;
-    }
-    .chatbot .user .chat-avatar {
-        background: url('user.png') center center no-repeat;
-        background-size: cover;
-    }
-    .chatbot .bot .chat-avatar {
-        background: url('gemma.png') center center no-repeat;
-        background-size: cover;
-    }
-    /* Example suggestions row */
-    .examples-row {
-        display: flex;
-        gap: 8px;
-        flex-wrap: wrap;
-        justify-content: center;
-        margin-bottom: 20px;
-    }
-    .examples-row button {
-        background-color: #EAF4FF;
-        border: 1px solid #66AEEF;
-        border-radius: 8px;
-        padding: 8px 14px;
-        color: #333;
-        cursor: pointer;
-    }
-    .examples-row button:hover {
-        background-color: #D8ECFE;
-    }
-    """
-    with gr.Blocks(css=custom_css) as demo:
-        gr.Markdown("<h1>Hi, I'm Gemma-2 (2B) </h1>")
-        with gr.Row():
-            # We store conversation in a state variable
-            chat_state = gr.State([])
-        # A row of clickable suggestions
-        with gr.Row(elem_id="examples-row", classes="examples-row"):
-            # For each suggestion, we have a button
-            examples = [
-                "Hello there! How are you doing?",
-                "Can you explain briefly what Python is?",
-                "Explain the plot of Cinderella in a sentence.",
-                "How many hours does it take a man to eat a Helicopter?",
-                "Write a 100-word article on 'Benefits of Open-Source in AI research'"
-            ]
-            # We'll create a small function for each button to set the textbox value
-            def set_example_text(example):
-                return example
-            suggestion_buttons = []
-            for ex in examples:
-                btn = gr.Button(ex)
-                btn.click(fn=set_example_text, inputs=[], outputs=[],
-                          _js=f"(x) => {{ document.querySelector('#user_input').value = '{ex}'; }}")
-                suggestion_buttons.append(btn)
-        # Our custom chatbot interface
-        chatbot = gr.Chatbot(
-            label="Gemma Chat",
-            elem_id="chat_window",
-            height=400,
-            avatar_images=("user.png","gemma.png"),
-            # optionally show_copy_button=True,
-        )
-        # A row with user input + submit
-        with gr.Row():
-            user_input = gr.Textbox(
-                label="Your message:",
-                placeholder="Type something...",
-                lines=2,
-                elem_id="user_input"
-            )
-            submit_btn = gr.Button("Send")
-        # Link the `respond` function to handle the conversation
-        submit_btn.click(
-            fn=respond,
-            inputs=[user_input, chat_state],
-            outputs=[chatbot, user_input],
-            queue=True
-        )
-    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
 if __name__ == "__main__":
-    launch_app()

 import os
 import queue
+from collections.abc import Iterator
 from threading import Thread
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+DESCRIPTION = """\
+<h1 style="text-align: center;">Hi, I'm Gemma 2 (2B) 👋</h1>
+This is a demo of <strong>google/gemma-2-2b-it</strong> fine-tuned for instruction following. For more details, please check <a href="https://huggingface.co/blog/gemma2" target="_blank">the post</a>.
+👉 Looking for a larger and more powerful version? Try the 27B version in <a href="https://huggingface.co/chat/models/google/gemma-2-27b-it" target="_blank">HuggingChat</a> and the 9B version in <a href="https://huggingface.co/spaces/huggingface-projects/gemma-2-9b-it" target="_blank">this Space</a>.
+"""
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "google/gemma-2-2b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model.config.sliding_window = 4096
+model.eval()
+@spaces.GPU
+def generate(
+    message: str,
+    chat_history: list[dict],
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+) -> Iterator[str]:
+    """Generate text from the model and stream tokens back to the UI."""
+    conversation = chat_history.copy()
+    conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(model.device)
+    # Stream out tokens
+    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
     outputs = []
     try:
+        for text in streamer:
+            outputs.append(text)
+            yield "".join(outputs)
     except queue.Empty:
+        # End of stream; avoid traceback
+        return
+# Below is the custom Gemini-themed CSS,
+# extended to add user/bot icons and style the built-in "examples" as clickable bubbles.
+gemini_css = """
+:root {
+  --gradient-start: #66AEEF; /* lighter top */
+  --gradient-end:   #F0F8FF; /* very light at bottom */
+}
+/* Overall page & container background gradient */
+html, body, .gradio-container {
+  margin: 0;
+  padding: 0;
+  background: linear-gradient(to bottom, var(--gradient-start), var(--gradient-end));
+  font-family: "Helvetica", sans-serif;
+  color: #333; /* dark gray for better contrast */
+}
+/* Make anchor (link) text a clearly visible dark blue */
+a, a:visited {
+  color: #02497A !important;
+  text-decoration: underline;
+}
+/* Center the top headings in the description */
+.gradio-container h1 {
+  margin-top: 0.8em;
+  margin-bottom: 0.5em;
+  text-align: center;
+  color: #fff; /* White text on top gradient for pop */
+}
+/* Chat container background: a very light blue so it's distinct from the outer gradient */
+.chat-interface, .chat-interface .wrap {
+  background-color: #F8FDFF !important;
+}
+/* Remove harsh frames around chat messages */
+.chat-message {
+  border: none !important;
+  position: relative;
+}
+/* Icons for user and bot messages */
+.chat-message.user::before {
+  content: '';
+  display: inline-block;
+  background: url('user.png') center center no-repeat;
+  background-size: cover;
+  width: 24px;
+  height: 24px;
+  margin-right: 8px;
+  vertical-align: middle;
+}
+.chat-message.bot::before {
+  content: '';
+  display: inline-block;
+  background: url('gemma.png') center center no-repeat;
+  background-size: cover;
+  width: 24px;
+  height: 24px;
+  margin-right: 8px;
+  vertical-align: middle;
+}
+/* User bubble: a deeper blue with white text */
+.chat-message.user {
+  background-color: #0284C7 !important;
+  color: #FFFFFF !important;
+  border-radius: 8px;
+  padding: 8px 12px;
+  margin: 6px 0;
+}
+/* Bot bubble: very light blue with darker text */
+.chat-message.bot {
+  background-color: #EFF8FF !important;
+  color: #333 !important;
+  border-radius: 8px;
+  padding: 8px 12px;
+  margin: 6px 0;
+}
+/* Chat input area */
+.chat-input textarea {
+  background-color: #FFFFFF;
+  color: #333;
+  border: 1px solid #66AEEF;
+  border-radius: 6px;
+  padding: 8px;
+}
+/* Sliders & other controls */
+form.sliders input[type="range"] {
+  accent-color: #66AEEF;
+}
+form.sliders label {
+  color: #333;
+}
+.gradio-button, .chat-send-btn {
+  background-color: #0284C7 !important;
+  color: #FFFFFF !important;
+  border-radius: 5px;
+  border: none;
+  cursor: pointer;
+}
+.gradio-button:hover, .chat-send-btn:hover {
+  background-color: #026FA6 !important;
+}
+/* Style the example "pill" buttons (the built-in ChatInterface examples) */
+.gr-examples {
+  display: flex !important;
+  flex-wrap: wrap;
+  gap: 16px;
+  justify-content: center;
+  margin-bottom: 1em !important;
+}
+.gr-examples button.example {
+  background-color: #EFF8FF !important;
+  border: 1px solid #66AEEF !important;
+  border-radius: 8px !important;
+  color: #333 !important;
+  padding: 10px 16px !important;
+  cursor: pointer !important;
+  transition: background-color 0.2s !important;
+}
+.gr-examples button.example:hover {
+  background-color: #E0F2FF !important;
+}
+/* Additional spacing / small tweaks */
+#duplicate-button {
+  margin: auto;
+  background: #1565c0;
+  border-radius: 100vh;
+  color: #fff;
+}
+"""
+demo = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.6,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.2,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["Hello there! How are you doing?"],
+        ["Can you explain briefly to me what is the Python programming language?"],
+        ["Explain the plot of Cinderella in a sentence."],
+        ["How many hours does it take a man to eat a Helicopter?"],
+        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
+    ],
+    cache_examples=False,
+    type="messages",
+    description=DESCRIPTION,
+    css=gemini_css,
+    fill_height=True,
+)
 if __name__ == "__main__":
+    # You can queue if you want concurrency or streaming
+    demo.queue(max_size=20).launch()