martinvityk commited on
Commit
3012fc9
·
1 Parent(s): e21be92
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -15,29 +15,37 @@ def respond(
15
  temperature,
16
  top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
 
41
 
42
 
43
  """
 
15
  temperature,
16
  top_p,
17
  ):
18
+ prompt_parts = []
19
+ if system_message:
20
+ # Prepend system message. Actual handling depends on model's fine-tuning.
21
+ # For many raw language models, this acts as an initial instruction or context.
22
+ prompt_parts.append(system_message)
23
 
24
+ for user_msg, assistant_msg in history:
25
+ if user_msg:
26
+ prompt_parts.append(f"USER: {user_msg}")
27
+ if assistant_msg:
28
+ prompt_parts.append(f"ASSISTANT: {assistant_msg}")
29
 
30
+ prompt_parts.append(f"USER: {message}")
31
+ prompt_parts.append("ASSISTANT:") # Model will generate content starting from here
32
+
33
+ full_prompt = "\n".join(prompt_parts)
34
 
35
  response = ""
36
 
37
+ for stream_response in client.text_generation(
38
+ prompt=full_prompt,
39
+ max_new_tokens=max_tokens,
40
  stream=True,
41
  temperature=temperature,
42
  top_p=top_p,
43
  ):
44
+ # Ensure we are not processing special tokens, if any
45
+ if not stream_response.token.special:
46
+ token = stream_response.token.text
47
+ response += token
48
+ yield response
49
 
50
 
51
  """