my_test

Sleeping

AlexHung29629 commited on Jul 10

Commit

019333f

verified ·

1 Parent(s): c420ecb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,6 +53,7 @@ if torch.cuda.is_available():
 def generate(
     message: str,
     chat_history: list[dict],
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
@@ -61,7 +62,7 @@ def generate(
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
-    input_ids = tokenizer.apply_chat_template(conversation, chat_template=CHAT_TEMPLATE, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
@@ -91,6 +92,7 @@ def generate(
 demo = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Slider(
             label="Max new tokens",
             minimum=1,

 def generate(
     message: str,
     chat_history: list[dict],
+    chat_template: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation, chat_template=chat_template, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
 demo = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
+        gr.Textbox(placeholder=CHAT_TEMPLATE, label="Chat template"),
         gr.Slider(
             label="Max new tokens",
             minimum=1,