Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -97,6 +97,21 @@ def mask_department(text):
|
|
97 |
text = re.sub(r"([가-힣]{2,20}학과)", lambda m: to_chosung(m.group(1)[:-2]) + "학과", text)
|
98 |
return text
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def sanitize_sensitive_info(text, keyword_string, replace_word):
|
101 |
text = mask_school_names(text)
|
102 |
text = mask_department(text)
|
@@ -120,6 +135,9 @@ def sanitize_sensitive_info(text, keyword_string, replace_word):
|
|
120 |
text = re.sub(r"([가-힣]+(대로|로|길))\s?(\d+)(호|번길|가)?", r"\1 ***", text)
|
121 |
text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})",
|
122 |
lambda m: f"{m.group(1)}-****-****-{m.group(4)}", text)
|
|
|
|
|
|
|
123 |
return text
|
124 |
|
125 |
def final_name_remask_exact_only(text, mapping_dict):
|
|
|
97 |
text = re.sub(r"([가-힣]{2,20}학과)", lambda m: to_chosung(m.group(1)[:-2]) + "학과", text)
|
98 |
return text
|
99 |
|
100 |
+
|
101 |
+
def postprocess_sensitive_patterns(text):
|
102 |
+
# IP 주소: 192.168.35.201 → 192.168.*.*
|
103 |
+
text = re.sub(r"\b(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\b", r"\1.\2.*.*", text)
|
104 |
+
|
105 |
+
# 전화번호: 031-987-6543 → 031-***-6543
|
106 |
+
text = re.sub(r"\b(\d{2,4})-(\d{3,4})-(\d{4})\b", r"\1-***-\3", text)
|
107 |
+
|
108 |
+
# 카드번호: 1234-5678-9012-3456 → 1234-****-****-3456
|
109 |
+
text = re.sub(r"\b(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})\b", r"\1-****-****-\4", text)
|
110 |
+
|
111 |
+
return text
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
def sanitize_sensitive_info(text, keyword_string, replace_word):
|
116 |
text = mask_school_names(text)
|
117 |
text = mask_department(text)
|
|
|
135 |
text = re.sub(r"([가-힣]+(대로|로|길))\s?(\d+)(호|번길|가)?", r"\1 ***", text)
|
136 |
text = re.sub(r"(\d{4})[- ]?(\d{4})[- ]?(\d{4})[- ]?(\d{4})",
|
137 |
lambda m: f"{m.group(1)}-****-****-{m.group(4)}", text)
|
138 |
+
# 📌 후처리 추가
|
139 |
+
text = postprocess_sensitive_patterns(text)
|
140 |
+
|
141 |
return text
|
142 |
|
143 |
def final_name_remask_exact_only(text, mapping_dict):
|