blueradiance commited on
Commit
a759d12
·
verified ·
1 Parent(s): ee2c558

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -14
app.py CHANGED
@@ -27,18 +27,22 @@ def mask_school_names(text):
27
  model_name = "Leo97/KoELECTRA-small-v3-modu-ner"
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
29
  model = AutoModelForTokenClassification.from_pretrained(model_name)
30
- ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
31
 
32
  def extract_names(text):
33
- results = ner_pipeline(text)
 
 
 
 
 
34
  names = []
35
  for entity in results:
36
- if entity["entity_group"] == "PS":
37
  name = entity["word"].replace("##", "").strip()
38
  if len(name) >= 2 and name not in names:
39
  names.append(name)
40
 
41
- # 붙임형 직함 기반
42
  title_suffixes = [
43
  '대표', '이사', '전무', '상무', '부장', '차장', '과장', '대리', '사원', '실장', '팀장', '소장', '국장', '본부장',
44
  '선생님', '교사', '교장', '교감', '부교장', '조교수', '교수', '연구원', '박사', '석사', '학사',
@@ -52,7 +56,6 @@ def extract_names(text):
52
  if name not in names:
53
  names.append(name)
54
 
55
- # 띄어쓰기 있는 지칭어 형태에서도 이름 추출
56
  honorific_suffixes = [
57
  '어머니', '아버지', '엄마', '아빠', '할머니', '할아버지', '외할머니', '외할아버지',
58
  '형', '누나', '언니', '오빠', '동생', '아들', '딸',
@@ -105,12 +108,3 @@ def refactored_mask_names(original_text, names, start_counter=100):
105
 
106
  def to_chosung(text):
107
  CHOSUNG_LIST = [chr(i) for i in range(0x1100, 0x1113)]
108
- result = ""
109
- for ch in text:
110
- if '가' <= ch <= '힣':
111
- code = ord(ch) - ord('가')
112
- cho = code // 588
113
- result += CHOSUNG_LIST[cho]
114
- else:
115
- result += ch
116
- return result
 
27
  model_name = "Leo97/KoELECTRA-small-v3-modu-ner"
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
29
  model = AutoModelForTokenClassification.from_pretrained(model_name)
30
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
31
 
32
  def extract_names(text):
33
+ try:
34
+ results = ner_pipeline(text)
35
+ except Exception as e:
36
+ print("NER 오류 발생:", e)
37
+ return []
38
+
39
  names = []
40
  for entity in results:
41
+ if entity.get("entity_group") == "PS":
42
  name = entity["word"].replace("##", "").strip()
43
  if len(name) >= 2 and name not in names:
44
  names.append(name)
45
 
 
46
  title_suffixes = [
47
  '대표', '이사', '전무', '상무', '부장', '차장', '과장', '대리', '사원', '실장', '팀장', '소장', '국장', '본부장',
48
  '선생님', '교사', '교장', '교감', '부교장', '조교수', '교수', '연구원', '박사', '석사', '학사',
 
56
  if name not in names:
57
  names.append(name)
58
 
 
59
  honorific_suffixes = [
60
  '어머니', '아버지', '엄마', '아빠', '할머니', '할아버지', '외할머니', '외할아버지',
61
  '형', '누나', '언니', '오빠', '동생', '아들', '딸',
 
108
 
109
  def to_chosung(text):
110
  CHOSUNG_LIST = [chr(i) for i in range(0x1100, 0x1113)]