Spaces:
Sleeping
Sleeping
import inspect | |
from typing import get_type_hints, Callable, Any | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# --- Load Model and Tokenizer --- | |
model_id = "unsloth/SmolLM2-135M-Instruct-GGUF" | |
filename = "SmolLM2-135M-Instruct-Q8_0.gguf" | |
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) | |
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) | |
# --- System Prompt Template --- | |
SYSTEM_PROMPT = """You are a helpful AI assistant. Your job is to provide clear and concise responses based on the user's input. | |
Keep your answers straightforward and avoid unnecessary information.""" | |
def parse_docstring(func): | |
doc = inspect.getdoc(func) | |
if not doc: | |
return {"title": "Untitled", "description": ""} | |
lines = doc.splitlines() | |
title = next((line.replace("Title:", "").strip() for line in lines if line.startswith("Title:")), "Untitled") | |
description = "\n".join(line.strip() for line in lines if line.startswith("Description:")) | |
description = description.replace("Description:", "").strip() | |
return {"title": title, "description": description} | |
def gradio_app_with_docs(func: Callable) -> Callable: | |
sig = inspect.signature(func) | |
type_hints = get_type_hints(func) | |
metadata = parse_docstring(func) | |
""" | |
A decorator that automatically builds and launches a Gradio interface | |
based on function type hints. | |
Args: | |
func: A callable with type-hinted parameters and return type. | |
Returns: | |
The wrapped function with a `.launch()` method to start the app. | |
""" | |
def _map_type(t: type) -> gr.Component: | |
if t == str: | |
return gr.Textbox(label="Input") | |
elif t == int: | |
return gr.Number(precision=0) | |
elif t == float: | |
return gr.Number() | |
elif t == bool: | |
return gr.Checkbox() | |
elif hasattr(t, "__origin__") and t.__origin__ == list: | |
elem_type = t.__args__[0] | |
if elem_type == str: | |
return gr.Dropdown(choices=["Option1", "Option2"]) | |
else: | |
raise ValueError(f"Unsupported list element type: {elem_type}") | |
else: | |
raise ValueError(f"Unsupported type: {t}") | |
# Build inputs | |
inputs = [] | |
for name, param in sig.parameters.items(): | |
if name == "self": | |
continue | |
param_type = type_hints.get(name, Any) | |
component = _map_type(param_type) | |
component.label = name.replace("_", " ").title() | |
inputs.append(component) | |
# Build outputs | |
return_type = type_hints.get("return", Any) | |
outputs = _map_type(return_type) | |
# Wrap function with Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown(f"## {metadata['title']}\n{metadata['description']}") | |
gr.Interface(fn=func, inputs=inputs, outputs=outputs) | |
def wrapper(*args, **kwargs): | |
return func(*args, **kwargs) | |
wrapper.launch = lambda: demo.launch() | |
return wrapper | |
def generate_response(prompt: str) -> str: | |
""" | |
Title: Super Tiny GGUF Model on CPU | |
Description: A Simple app to test out the potentials of small GGUF LLM model. | |
Args: | |
prompt (str): A simple prompt. | |
Returns: | |
str: Simplified response. | |
""" | |
# Apply system prompt + user input | |
# full_prompt = f"<|begin_of_text|>System: {SYSTEM_PROMPT}\nUser: {prompt}\nAssistant:" | |
# inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu") | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT}, | |
{"role": "user", "content": prompt} | |
] | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True, | |
enable_thinking=True # Switches between thinking and non-thinking modes. Default is True. | |
) | |
inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=100, | |
# temperature=0.7, | |
# top_p=0.9 | |
) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
if __name__ == "__main__": | |
generate_response.launch() |