hsuwill000 commited on
Commit
f906bd4
·
verified ·
1 Parent(s): c9ac48b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -13,36 +13,36 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
  def respond(message):
15
  try:
16
- # Record the start time
17
  start_time = time.time()
18
 
19
- # 修改 prompt:在對話中加入指令,要求只輸出最終答案,不顯示推理過程
20
- instruction = "請只輸出最終答案,不要展示任何中間推理過程。"
21
- input_text = f"User: {message}\nAssistant: {instruction}\nAssistant:"
22
-
23
- # Generate response
 
 
 
24
  response = pipe(
25
- input_text,
26
- max_length=1024,
27
  truncation=True,
28
  num_return_sequences=1,
29
- temperature=0.2, # 控制生成多样性
30
- top_p=0.1, # 控制生成质量
 
31
  )
32
  generated_text = response[0]['generated_text'].strip()
33
-
34
- # 提取模型輸出中最後一次出現 "Assistant:" 之後的部分(假設模型按照格式輸出)
35
- # 若模型輸出格式不同,可根據實際情況進行調整
36
  if "Assistant:" in generated_text:
37
  reply = generated_text.split("Assistant:")[-1].strip()
38
  else:
39
  reply = generated_text
40
 
41
- # Calculate inference time
42
  inference_time = time.time() - start_time
43
  print(f"Inference time: {inference_time:.4f} seconds")
44
-
45
- # Return as a tuple (user message, bot reply)
46
  return [(message, reply)]
47
 
48
  except Exception as e:
 
13
 
14
  def respond(message):
15
  try:
 
16
  start_time = time.time()
17
 
18
+ # 強化 Prompt 讓模型輸出更合理
19
+ instruction = (
20
+ "請用簡單、準確的語言回答問題,避免冗長和重複內容。\n"
21
+ "User: " + message + "\n"
22
+ "Assistant: "
23
+ )
24
+
25
+ # Generate response with improved settings
26
  response = pipe(
27
+ instruction,
28
+ max_length=200, # 限制最大輸出長度,防止無限重複
29
  truncation=True,
30
  num_return_sequences=1,
31
+ temperature=0.3, # 保持一定創意但減少胡言亂語
32
+ top_p=0.8, # 增加答案多樣性,減少無意義重複
33
+ repetition_penalty=1.5, # 降低重複字詞的機率
34
  )
35
  generated_text = response[0]['generated_text'].strip()
36
+
37
+ # 提取 "Assistant:" 之後的部分
 
38
  if "Assistant:" in generated_text:
39
  reply = generated_text.split("Assistant:")[-1].strip()
40
  else:
41
  reply = generated_text
42
 
 
43
  inference_time = time.time() - start_time
44
  print(f"Inference time: {inference_time:.4f} seconds")
45
+
 
46
  return [(message, reply)]
47
 
48
  except Exception as e: