merterbak commited on
Commit
ba54a13
·
verified ·
1 Parent(s): dc6c5d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -13,7 +13,6 @@ pipe = pipeline(
13
  torch_dtype="auto",
14
  device_map="auto",
15
  )
16
-
17
  def format_conversation_history(chat_history):
18
  messages = []
19
  for item in chat_history:
@@ -23,14 +22,13 @@ def format_conversation_history(chat_history):
23
  content = content[0]["text"] if content and "text" in content[0] else str(content)
24
  messages.append({"role": role, "content": content})
25
  return messages
26
-
27
- @spaces.GPU(duration=60)
28
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
29
  new_message = {"role": "user", "content": input_data}
30
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
31
  processed_history = format_conversation_history(chat_history)
32
  messages = system_message + processed_history + [new_message]
33
-
34
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
35
  generation_kwargs = {
36
  "max_new_tokens": max_new_tokens,
@@ -43,24 +41,25 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
43
  }
44
  thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
45
  thread.start()
46
- #streaming try #1
47
- buffer = ""
48
- full_response = ""
 
49
  for chunk in streamer:
50
- buffer += chunk
51
- parts = re.split(r'(\s+)', buffer)
52
- if re.match(r'\s+', parts[-1]) is not None:
53
- to_append = ''.join(parts)
54
- buffer = ""
 
 
 
55
  else:
56
- to_append = ''.join(parts[:-1])
57
- buffer = parts[-1]
58
- if to_append:
59
- full_response += to_append
60
- yield full_response
61
- if buffer:
62
- full_response += buffer
63
- yield full_response
64
 
65
  demo = gr.ChatInterface(
66
  fn=generate_response,
@@ -84,10 +83,8 @@ demo = gr.ChatInterface(
84
  ],
85
  cache_examples=False,
86
  type="messages",
87
- description="""
88
- # gpt-oss-20b
89
- Wait couple of seconds initially. You can adjust reasoning level in the system prompt like "Reasoning: high.
90
- """,
91
  fill_height=True,
92
  textbox=gr.Textbox(
93
  label="Query Input",
 
13
  torch_dtype="auto",
14
  device_map="auto",
15
  )
 
16
  def format_conversation_history(chat_history):
17
  messages = []
18
  for item in chat_history:
 
22
  content = content[0]["text"] if content and "text" in content[0] else str(content)
23
  messages.append({"role": role, "content": content})
24
  return messages
25
+
26
+ @spaces.GPU()
27
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
28
  new_message = {"role": "user", "content": input_data}
29
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
30
  processed_history = format_conversation_history(chat_history)
31
  messages = system_message + processed_history + [new_message]
 
32
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
33
  generation_kwargs = {
34
  "max_new_tokens": max_new_tokens,
 
41
  }
42
  thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
43
  thread.start()
44
+ # simple formatting without harmony because of no tool usage etc. and experienced hf space problems with harmony
45
+ thinking = ""
46
+ final = ""
47
+ started_final = False
48
  for chunk in streamer:
49
+ if not started_final:
50
+ if "assistantfinal" in chunk.lower():
51
+ split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
52
+ thinking += split_parts[0]
53
+ final += split_parts[1]
54
+ started_final = True
55
+ else:
56
+ thinking += chunk
57
  else:
58
+ final += chunk
59
+ clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
60
+ clean_final = final.strip()
61
+ formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
62
+ yield formatted
 
 
 
63
 
64
  demo = gr.ChatInterface(
65
  fn=generate_response,
 
83
  ],
84
  cache_examples=False,
85
  type="messages",
86
+ description="""# gpt-oss-20b
87
+ Give it a couple of seconds to start. You can adjust reasoning level in the system prompt like "Reasoning: high. Click to view thinking process. Default is on""",
 
 
88
  fill_height=True,
89
  textbox=gr.Textbox(
90
  label="Query Input",