wolfofbackstreet commited on
Commit
b1e40ab
·
1 Parent(s): f17f776

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -25
app.py CHANGED
@@ -3,12 +3,16 @@ from typing import get_type_hints, Callable, Any
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
 
6
  model_id = "unsloth/SmolLM2-135M-Instruct-GGUF"
7
  filename = "SmolLM2-135M-Instruct-Q8_0.gguf"
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
10
  model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
11
 
 
 
 
12
 
13
  def parse_docstring(func):
14
  doc = inspect.getdoc(func)
@@ -22,6 +26,7 @@ def parse_docstring(func):
22
 
23
  return {"title": title, "description": description}
24
 
 
25
  def gradio_app_with_docs(func: Callable) -> Callable:
26
  sig = inspect.signature(func)
27
  type_hints = get_type_hints(func)
@@ -30,14 +35,12 @@ def gradio_app_with_docs(func: Callable) -> Callable:
30
  """
31
  A decorator that automatically builds and launches a Gradio interface
32
  based on function type hints.
33
-
34
  Args:
35
  func: A callable with type-hinted parameters and return type.
36
-
37
  Returns:
38
  The wrapped function with a `.launch()` method to start the app.
39
  """
40
- # Infer Gradio components from type hints
41
  def _map_type(t: type) -> gr.Component:
42
  if t == str:
43
  return gr.Textbox(label="Input")
@@ -47,7 +50,7 @@ def gradio_app_with_docs(func: Callable) -> Callable:
47
  return gr.Number()
48
  elif t == bool:
49
  return gr.Checkbox()
50
- elif hasattr(t, "__origin__") and t.__origin__ == list: # Handle List[type]
51
  elem_type = t.__args__[0]
52
  if elem_type == str:
53
  return gr.Dropdown(choices=["Option1", "Option2"])
@@ -56,30 +59,24 @@ def gradio_app_with_docs(func: Callable) -> Callable:
56
  else:
57
  raise ValueError(f"Unsupported type: {t}")
58
 
59
- # Extract function signature and type hints
60
- sig = inspect.signature(func)
61
- type_hints = get_type_hints(func)
62
-
63
- # Map parameters to Gradio inputs
64
  inputs = []
65
  for name, param in sig.parameters.items():
66
  if name == "self":
67
- continue # Skip self in class methods
68
  param_type = type_hints.get(name, Any)
69
  component = _map_type(param_type)
70
  component.label = name.replace("_", " ").title()
71
  inputs.append(component)
72
 
73
- # Map return type to Gradio output
74
  return_type = type_hints.get("return", Any)
75
  outputs = _map_type(return_type)
76
 
77
  # Wrap function with Gradio interface
78
- interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
79
-
80
  with gr.Blocks() as demo:
81
  gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
82
- interface = gr.Interface(fn=func, inputs=inputs, outputs=outputs)
83
 
84
  def wrapper(*args, **kwargs):
85
  return func(*args, **kwargs)
@@ -93,27 +90,38 @@ def generate_response(prompt: str) -> str:
93
  """
94
  Title: Super Tiny GGUF Model on CPU
95
  Description: A Simple app to test out the potentials of small GGUF LLM model.
96
-
97
  Args:
98
  prompt (str): A simple prompt.
99
-
100
  Returns:
101
  str: Simplified response.
102
  """
103
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu") # Move inputs to CPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  outputs = model.generate(
105
  **inputs,
106
- max_new_tokens=50,
107
- temperature=0.7,
108
- top_p=0.9
109
  )
110
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
111
 
112
- # # Example usage
113
- # prompt = "Explain quantum computing in simple terms."
114
- # response = generate_response(prompt)
115
- # print(response)
116
-
117
 
118
  if __name__ == "__main__":
119
  generate_response.launch()
 
3
  import gradio as gr
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
+ # --- Load Model and Tokenizer ---
7
  model_id = "unsloth/SmolLM2-135M-Instruct-GGUF"
8
  filename = "SmolLM2-135M-Instruct-Q8_0.gguf"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
11
  model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
12
 
13
+ # --- System Prompt Template ---
14
+ SYSTEM_PROMPT = """You are a helpful AI assistant. Your job is to provide clear and concise responses based on the user's input.
15
+ Keep your answers straightforward and avoid unnecessary information."""
16
 
17
  def parse_docstring(func):
18
  doc = inspect.getdoc(func)
 
26
 
27
  return {"title": title, "description": description}
28
 
29
+
30
  def gradio_app_with_docs(func: Callable) -> Callable:
31
  sig = inspect.signature(func)
32
  type_hints = get_type_hints(func)
 
35
  """
36
  A decorator that automatically builds and launches a Gradio interface
37
  based on function type hints.
 
38
  Args:
39
  func: A callable with type-hinted parameters and return type.
 
40
  Returns:
41
  The wrapped function with a `.launch()` method to start the app.
42
  """
43
+
44
  def _map_type(t: type) -> gr.Component:
45
  if t == str:
46
  return gr.Textbox(label="Input")
 
50
  return gr.Number()
51
  elif t == bool:
52
  return gr.Checkbox()
53
+ elif hasattr(t, "__origin__") and t.__origin__ == list:
54
  elem_type = t.__args__[0]
55
  if elem_type == str:
56
  return gr.Dropdown(choices=["Option1", "Option2"])
 
59
  else:
60
  raise ValueError(f"Unsupported type: {t}")
61
 
62
+ # Build inputs
 
 
 
 
63
  inputs = []
64
  for name, param in sig.parameters.items():
65
  if name == "self":
66
+ continue
67
  param_type = type_hints.get(name, Any)
68
  component = _map_type(param_type)
69
  component.label = name.replace("_", " ").title()
70
  inputs.append(component)
71
 
72
+ # Build outputs
73
  return_type = type_hints.get("return", Any)
74
  outputs = _map_type(return_type)
75
 
76
  # Wrap function with Gradio interface
 
 
77
  with gr.Blocks() as demo:
78
  gr.Markdown(f"## {metadata['title']}\n{metadata['description']}")
79
+ gr.Interface(fn=func, inputs=inputs, outputs=outputs)
80
 
81
  def wrapper(*args, **kwargs):
82
  return func(*args, **kwargs)
 
90
  """
91
  Title: Super Tiny GGUF Model on CPU
92
  Description: A Simple app to test out the potentials of small GGUF LLM model.
 
93
  Args:
94
  prompt (str): A simple prompt.
 
95
  Returns:
96
  str: Simplified response.
97
  """
98
+ # Apply system prompt + user input
99
+ # full_prompt = f"<|begin_of_text|>System: {SYSTEM_PROMPT}\nUser: {prompt}\nAssistant:"
100
+
101
+ # inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
102
+
103
+ messages = [
104
+ {"role": "system", "content": SYSTEM_PROMPT},
105
+ {"role": "user", "content": prompt}
106
+ ]
107
+
108
+ text = tokenizer.apply_chat_template(
109
+ messages,
110
+ tokenize=False,
111
+ add_generation_prompt=True,
112
+ enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
113
+ )
114
+
115
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
116
+
117
  outputs = model.generate(
118
  **inputs,
119
+ max_new_tokens=100,
120
+ # temperature=0.7,
121
+ # top_p=0.9
122
  )
123
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
124
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
  generate_response.launch()