blueradiance commited on
Commit
d290d68
·
verified ·
1 Parent(s): 3b51a16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -97,6 +97,21 @@ def mask_department(text):
97
  text = re.sub(r"([가-힣]{2,20}학과)", lambda m: to_chosung(m.group(1)[:-2]) + "학과", text)
98
  return text
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def sanitize_sensitive_info(text, keyword_string, replace_word):
101
  text = mask_school_names(text)
102
  text = mask_department(text)
@@ -120,6 +135,9 @@ def sanitize_sensitive_info(text, keyword_string, replace_word):
120
  text = re.sub(r"([가-힣]+(대로|로|길))\s?(\d+)(호|번길|가)?", r"\1 ***", text)
121
  text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})",
122
  lambda m: f"{m.group(1)}-****-****-{m.group(4)}", text)
 
 
 
123
  return text
124
 
125
  def final_name_remask_exact_only(text, mapping_dict):
 
97
  text = re.sub(r"([가-힣]{2,20}학과)", lambda m: to_chosung(m.group(1)[:-2]) + "학과", text)
98
  return text
99
 
100
+
101
+ def postprocess_sensitive_patterns(text):
102
+ # IP 주소: 192.168.35.201 → 192.168.*.*
103
+ text = re.sub(r"\b(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\b", r"\1.\2.*.*", text)
104
+
105
+ # 전화번호: 031-987-6543 → 031-***-6543
106
+ text = re.sub(r"\b(\d{2,4})-(\d{3,4})-(\d{4})\b", r"\1-***-\3", text)
107
+
108
+ # 카드번호: 1234-5678-9012-3456 → 1234-****-****-3456
109
+ text = re.sub(r"\b(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})\b", r"\1-****-****-\4", text)
110
+
111
+ return text
112
+
113
+
114
+
115
  def sanitize_sensitive_info(text, keyword_string, replace_word):
116
  text = mask_school_names(text)
117
  text = mask_department(text)
 
135
  text = re.sub(r"([가-힣]+(대로|로|길))\s?(\d+)(호|번길|가)?", r"\1 ***", text)
136
  text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})",
137
  lambda m: f"{m.group(1)}-****-****-{m.group(4)}", text)
138
+ # 📌 후처리 추가
139
+ text = postprocess_sensitive_patterns(text)
140
+
141
  return text
142
 
143
  def final_name_remask_exact_only(text, mapping_dict):