yongyeol commited on
Commit
17f24b1
·
verified ·
1 Parent(s): af29c13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -20,7 +20,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(mes
20
  FONT_PATH = os.path.join(os.path.dirname(__file__), "NanumGothicCoding.ttf")
21
  MIN_FONT_SIZE = 8
22
 
23
-
24
  def get_vision_client():
25
  b64 = os.getenv("GCP_SERVICE_ACCOUNT_JSON")
26
  if not b64:
@@ -37,6 +36,20 @@ def get_vision_client():
37
  vision_client = get_vision_client()
38
  checker = spell_init()
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def normalize_text(text: str) -> str:
42
  return unicodedata.normalize('NFC', text)
@@ -58,7 +71,6 @@ def preprocess_with_adaptive_threshold(img: Image.Image) -> Image.Image:
58
  bgr = cv2.cvtColor(adap, cv2.COLOR_GRAY2BGR)
59
  return Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
60
 
61
-
62
  def ocr_overlay_and_correct_text(img: Image.Image):
63
  corrected_text = ""
64
  overlay = None
@@ -73,9 +85,14 @@ def ocr_overlay_and_correct_text(img: Image.Image):
73
  )
74
  ann = res.full_text_annotation
75
  raw = ann.text.replace('\n', ' ').strip()
76
- logging.info(f"[OCR] Raw: {raw}")
77
- corrected_text = checker(raw)
78
- logging.info(f"[Spell] Corrected: {corrected_text}")
 
 
 
 
 
79
 
80
  syms = []
81
  for pg in ann.pages:
@@ -142,10 +159,13 @@ def ocr_overlay_and_correct_text(img: Image.Image):
142
 
143
  return overlay, corrected_text
144
 
145
-
146
  def text_correct_fn(text):
147
  raw = normalize_text(text.strip())
148
- corrected = checker(raw)
 
 
 
 
149
  return None, corrected
150
 
151
  def img_correct_fn(blob):
@@ -154,7 +174,6 @@ def img_correct_fn(blob):
154
  img = Image.open(io.BytesIO(blob)).convert('RGB')
155
  return ocr_overlay_and_correct_text(img)
156
 
157
-
158
  with gr.Blocks(
159
  css="""
160
  .gradio-container {background-color: #fafaf5}
@@ -201,6 +220,5 @@ with gr.Blocks(
201
  return None, gr.update(label="사진 촬영 및 업로드", interactive=True, value=None), '', gr.update(interactive=False), None, ''
202
  clear_btn.click(on_clear, None, [state, upload, text_in, img_check_btn, img_out, txt_out])
203
 
204
-
205
  if __name__ == '__main__':
206
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
 
20
  FONT_PATH = os.path.join(os.path.dirname(__file__), "NanumGothicCoding.ttf")
21
  MIN_FONT_SIZE = 8
22
 
 
23
  def get_vision_client():
24
  b64 = os.getenv("GCP_SERVICE_ACCOUNT_JSON")
25
  if not b64:
 
36
  vision_client = get_vision_client()
37
  checker = spell_init()
38
 
39
+ # ──────────────────────────────── KoSpellPy 긴 텍스트 안전 처리 ────────────────────────────────
40
+ def chunk_text(text, max_len=500):
41
+ return [text[i:i+max_len] for i in range(0, len(text), max_len)]
42
+
43
+ def safe_kospell_check(text):
44
+ parts = chunk_text(text)
45
+ corrected = []
46
+ for part in parts:
47
+ try:
48
+ corrected.append(checker(part))
49
+ except Exception as e:
50
+ logging.warning(f"[Spell] 일부 텍스트 교정 실패: {e}")
51
+ corrected.append(part) # 오류 발생 시 해당 부분은 원문 사용
52
+ return ' '.join(corrected)
53
 
54
  def normalize_text(text: str) -> str:
55
  return unicodedata.normalize('NFC', text)
 
71
  bgr = cv2.cvtColor(adap, cv2.COLOR_GRAY2BGR)
72
  return Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
73
 
 
74
  def ocr_overlay_and_correct_text(img: Image.Image):
75
  corrected_text = ""
76
  overlay = None
 
85
  )
86
  ann = res.full_text_annotation
87
  raw = ann.text.replace('\n', ' ').strip()
88
+ logging.info(f"[OCR] Raw length: {len(raw)} / Raw: {raw}")
89
+
90
+ try:
91
+ corrected_text = safe_kospell_check(raw)
92
+ logging.info(f"[Spell] Corrected: {corrected_text}")
93
+ except Exception as e:
94
+ logging.error(f"[Spell] 교정 중 오류 발생: {e}")
95
+ corrected_text = raw # 오류 시 원문 반환
96
 
97
  syms = []
98
  for pg in ann.pages:
 
159
 
160
  return overlay, corrected_text
161
 
 
162
  def text_correct_fn(text):
163
  raw = normalize_text(text.strip())
164
+ try:
165
+ corrected = safe_kospell_check(raw)
166
+ except Exception as e:
167
+ logging.error(f"[Spell/TextInput] 교정 중 오류 발생: {e}")
168
+ corrected = raw
169
  return None, corrected
170
 
171
  def img_correct_fn(blob):
 
174
  img = Image.open(io.BytesIO(blob)).convert('RGB')
175
  return ocr_overlay_and_correct_text(img)
176
 
 
177
  with gr.Blocks(
178
  css="""
179
  .gradio-container {background-color: #fafaf5}
 
220
  return None, gr.update(label="사진 촬영 및 업로드", interactive=True, value=None), '', gr.update(interactive=False), None, ''
221
  clear_btn.click(on_clear, None, [state, upload, text_in, img_check_btn, img_out, txt_out])
222
 
 
223
  if __name__ == '__main__':
224
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))