Macropodus commited on
Commit
1435c91
·
verified ·
1 Parent(s): c3aa1f7

fix traditional

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -6,16 +6,19 @@
6
 
7
 
8
  import traceback
9
- import time
10
  import copy
 
11
  import sys
12
- import re
13
  import os
 
14
  os.environ["MACRO_CORRECT_FLAG_CSC_TOKEN"] = "1"
15
  os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
16
  os.environ["USE_TORCH"] = "1"
17
 
18
  from macro_correct.pytorch_textcorrection.tcTools import preprocess_same_with_training
 
 
 
19
  from macro_correct import correct_basic
20
  from macro_correct import correct_long
21
  from macro_correct import correct
@@ -79,14 +82,27 @@ def cut_sent_by_stay_and_maxlen(text, max_len=126, return_length=True):
79
 
80
  def macro_correct(text):
81
  print(text)
82
- text = preprocess_same_with_training(text)
83
  texts, texts_length = cut_sent_by_stay_and_maxlen(text, return_length=True)
84
  text_str = ""
85
  text_list = []
86
  for t in texts:
87
  print(t)
88
- text_csc = correct_long(t, num_rethink=2, flag_cut=True)
 
89
  print(text_csc)
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  if text_csc:
91
  text_list.extend(text_csc)
92
  text_str += text_csc[0].get("target")
@@ -120,16 +136,9 @@ if __name__ == '__main__':
120
  macro_correct,
121
  inputs='text',
122
  outputs='text',
123
- title="Chinese Spelling Correction Model Macropodus/macbert4mdcspell_v2",
124
  description="Copy or input error Chinese text. Submit and the machine will correct text.",
125
  article="Link to <a href='https://github.com/yongzhuo/macro-correct' style='color:blue;' target='_blank\'>Github REPO: macro-correct</a>",
126
  examples=examples
127
  ).launch()
128
  # ).launch(server_name="0.0.0.0", server_port=8066, share=False, debug=True)
129
-
130
- """
131
- 赤热的阳光烘烤大地,婵鸣撕破树荫的宁净。少年咬下鲜红西瓜,糖汁溶化在沙摊上。孩童举着冰其淋奔跑,浪花打湿嘻闹的脚丫。威风卷起碎花裙摆,牵牛花在篱笆绽方。这个季结,连空气都浸着清谅的甜。
132
- 炽热的阳光烘烤大地,蝉鸣撕破树荫的宁静。少年咬下鲜红西瓜,糖汁溶化在沙滩上。孩童举着冰淇淋奔跑,浪花打湿嬉闹的脚丫。微风卷起碎花裙摆,牵牛花在篱笆绽放。这个季节,连空气都浸着清凉的甜。
133
-
134
- pyinstaller --onefile app_macro_correct_std.py
135
- """
 
6
 
7
 
8
  import traceback
 
9
  import copy
10
+ import time
11
  import sys
 
12
  import os
13
+ import re
14
  os.environ["MACRO_CORRECT_FLAG_CSC_TOKEN"] = "1"
15
  os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
16
  os.environ["USE_TORCH"] = "1"
17
 
18
  from macro_correct.pytorch_textcorrection.tcTools import preprocess_same_with_training
19
+ from macro_correct.pytorch_textcorrection.tcTools import get_errors_for_difflib
20
+ from macro_correct.pytorch_textcorrection.tcTools import cut_sent_by_maxlen
21
+ from macro_correct.pytorch_textcorrection.tcTools import count_flag_zh
22
  from macro_correct import correct_basic
23
  from macro_correct import correct_long
24
  from macro_correct import correct
 
82
 
83
  def macro_correct(text):
84
  print(text)
 
85
  texts, texts_length = cut_sent_by_stay_and_maxlen(text, return_length=True)
86
  text_str = ""
87
  text_list = []
88
  for t in texts:
89
  print(t)
90
+ t_process = preprocess_same_with_training(t)
91
+ text_csc = correct_long(t_process, num_rethink=2, flag_cut=True, limit_length_char=1)
92
  print(text_csc)
93
+ ### 繁简
94
+ if t != t_process:
95
+ t_correct, errors = get_errors_for_difflib(t_process, t)
96
+ errors_new = []
97
+ for err in errors:
98
+ if count_flag_zh(err[0]) and count_flag_zh(err[1]):
99
+ errors_new.append(err + [1])
100
+ if errors_new:
101
+ if text_csc:
102
+ text_csc[0]["errors"] += errors_new
103
+ else:
104
+ text_csc = [{"source": t, "target": t_process, "errors": errors_new}]
105
+ ### 本身的错误
106
  if text_csc:
107
  text_list.extend(text_csc)
108
  text_str += text_csc[0].get("target")
 
136
  macro_correct,
137
  inputs='text',
138
  outputs='text',
139
+ title="Chinese Spelling Correction Model Macropodus/macbert4csc_v2",
140
  description="Copy or input error Chinese text. Submit and the machine will correct text.",
141
  article="Link to <a href='https://github.com/yongzhuo/macro-correct' style='color:blue;' target='_blank\'>Github REPO: macro-correct</a>",
142
  examples=examples
143
  ).launch()
144
  # ).launch(server_name="0.0.0.0", server_port=8066, share=False, debug=True)