blueradiance commited on
Commit
9d959d2
·
verified ·
1 Parent(s): d1de249

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -97,16 +97,26 @@ def mask_department(text):
97
  return text
98
 
99
  def mask_sensitive_numbers(text):
100
- text = re.sub(r"(\d{3})-(\d{4})-(\d{4})", r"\1-****-\3", text)
 
 
101
  text = re.sub(r"(\d{6})[-](\d)\d{6}", r"*******-\2*****", text)
102
- text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})", lambda m: f"{m.group(1)}-****-****-{m.group(4)}", text)
 
 
103
  text = re.sub(r"(\d{1,3})번지", r"***번지", text)
104
  text = re.sub(r"(\d{1,3})동", r"***동", text)
105
  text = re.sub(r"(\d{1,4})호", r"****호", text)
 
106
  text = re.sub(r"[\w\.-]+@[\w\.-]+", r"******@****", text)
107
- text = re.sub(r"(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})", lambda m: f"{m.group(1)}.{m.group(2)}.*.*", text)
108
- return text
 
 
 
109
 
 
 
110
  def sanitize_sensitive_info(text, keyword_string, replace_word):
111
  text = mask_school_names(text)
112
  text = mask_department(text)
 
97
  return text
98
 
99
  def mask_sensitive_numbers(text):
100
+ # 일반 전화번호: 000-0000-0000
101
+ text = re.sub(r"(\d{2,3})-(\d{3,4})-(\d{4})", r"\1-****-\3", text)
102
+ # 주민등록번호
103
  text = re.sub(r"(\d{6})[-](\d)\d{6}", r"*******-\2*****", text)
104
+ # 카드 번호: 0000-0000-0000-0000 또는 공백 또는 없음
105
+ text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})", r"\1-****-****-\4", text)
106
+ # 번지/동/호
107
  text = re.sub(r"(\d{1,3})번지", r"***번지", text)
108
  text = re.sub(r"(\d{1,3})동", r"***동", text)
109
  text = re.sub(r"(\d{1,4})호", r"****호", text)
110
+ # 이메일
111
  text = re.sub(r"[\w\.-]+@[\w\.-]+", r"******@****", text)
112
+ # IP 주소
113
+ text = re.sub(r"(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})", r"\1.\2.*.*", text)
114
+
115
+ # 👇 추가된 부분: 마침표나 하이픈으로 구분된 3~4세트 숫자들
116
+ text = re.sub(r"(?<!\d)(\d{2,4}[-\.]){2,3}\d{2,4}(?!\d)", lambda m: re.sub(r"\d{2,4}", '***', m.group(0)), text)
117
 
118
+ return text
119
+
120
  def sanitize_sensitive_info(text, keyword_string, replace_word):
121
  text = mask_school_names(text)
122
  text = mask_department(text)