File size: 9,273 Bytes
6fe2a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d759e5
6fe2a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17f24b1
 
 
 
 
 
 
 
 
 
 
 
 
 
af29c13
6fe2a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17f24b1
 
 
 
 
 
 
 
6fe2a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17f24b1
 
 
 
 
6fe2a61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import io
import os
import base64
import json
import logging
import unicodedata
import tempfile
from difflib import SequenceMatcher
from PIL import Image, ImageDraw, ImageFont, ImageOps

import cv2
import numpy as np
import gradio as gr
from google.cloud import vision
from google.oauth2 import service_account
from kospellpy import spell_init

# ──────────────────────────────── 환경 설정 ────────────────────────────────
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
FONT_PATH = os.path.join(os.path.dirname(__file__), "NanumGothicCoding.ttf")
MIN_FONT_SIZE = 8

def get_vision_client():
    b64 = os.getenv("GCP_SERVICE_ACCOUNT_JSON")
    if not b64:
        logging.warning("GCP_SERVICE_ACCOUNT_JSON 환경변수가 설정되지 않았습니다. 기본 인증을 사용합니다.")
        return vision.ImageAnnotatorClient()
    try:
        info = json.loads(base64.b64decode(b64).decode())
        creds = service_account.Credentials.from_service_account_info(info)
        return vision.ImageAnnotatorClient(credentials=creds)
    except Exception as e:
        logging.error(f"Vision API 인증 실패: {e}")
        raise

vision_client = get_vision_client()
checker = spell_init()

# ──────────────────────────────── KoSpellPy 긴 텍스트 안전 처리 ────────────────────────────────
def chunk_text(text, max_len=500):
    return [text[i:i+max_len] for i in range(0, len(text), max_len)]

def safe_kospell_check(text):
    parts = chunk_text(text)
    corrected = []
    for part in parts:
        try:
            corrected.append(checker(part))
        except Exception as e:
            logging.warning(f"[Spell] 일부 텍스트 교정 실패: {e}")
            corrected.append(part)  # 오류 발생 시 해당 부분은 원문 사용
    return ' '.join(corrected)

def normalize_text(text: str) -> str:
    return unicodedata.normalize('NFC', text)

def compute_font_for_word(vertices):
    ys = [v.y for v in vertices]
    bbox_h = max(ys) - min(ys)
    size = max(MIN_FONT_SIZE, int(bbox_h * 0.4))
    try:
        return ImageFont.truetype(FONT_PATH, size)
    except Exception as e:
        print(f"[WARNING] 폰트 로딩 실패: {e}")
        return ImageFont.load_default()

def preprocess_with_adaptive_threshold(img: Image.Image) -> Image.Image:
    cv_img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
    adap = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 10)
    bgr = cv2.cvtColor(adap, cv2.COLOR_GRAY2BGR)
    return Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))

def ocr_overlay_and_correct_text(img: Image.Image):
    corrected_text = ""
    overlay = None
    if img is not None:
        img = ImageOps.exif_transpose(img)
        proc = preprocess_with_adaptive_threshold(img)
        buf = io.BytesIO(); proc.save(buf, format='PNG')

        res = vision_client.document_text_detection(
            image=vision.Image(content=buf.getvalue()),
            image_context={'language_hints': ['ko']}
        )
        ann = res.full_text_annotation
        raw = ann.text.replace('\n', ' ').strip()
        logging.info(f"[OCR] Raw length: {len(raw)} / Raw: {raw}")

        try:
            corrected_text = safe_kospell_check(raw)
            logging.info(f"[Spell] Corrected: {corrected_text}")
        except Exception as e:
            logging.error(f"[Spell] 교정 중 오류 발생: {e}")
            corrected_text = raw  # 오류 시 원문 반환

        syms = []
        for pg in ann.pages:
            for bl in pg.blocks:
                for para in bl.paragraphs:
                    for w in para.words:
                        for s in w.symbols:
                            syms.append({'text': normalize_text(s.text), 'bbox': s.bounding_box.vertices})

        raw_c, corr_c, mapping = list(raw), list(corrected_text), {}
        idx = 0
        for i, ch in enumerate(raw_c):
            if ch.strip():
                mapping[i] = idx
                idx += 1

        sm = SequenceMatcher(None, raw_c, corr_c)
        overlay = img.copy()
        draw = ImageDraw.Draw(overlay)
        col = "#FF3333"

        for tag, i1, i2, j1, j2 in sm.get_opcodes():
            if tag not in ('replace', 'insert'):
                continue
            repl = ''.join(corr_c[j1:j2])
            if tag == 'insert' and repl == ' ':
                repl = 'V'
            valid = (
                [k for k in range(i1, i2) if k in mapping]
                if tag == 'replace'
                else ([max(i1-1, 0)] if max(i1-1, 0) in mapping else [])
            )
            for k in valid:
                sd = mapping[k]
                verts = syms[sd]['bbox']
                xs, ys = [v.x for v in verts], [v.y for v in verts]
                x0, x1, y0, y1 = min(xs), max(xs), min(ys), max(ys)
                ul = y0 + int((y1 - y0) * 0.9)
                draw.line([(x0, ul), (x1, ul)], fill=col, width=3)
            if valid:
                sd = mapping[valid[0]]
                verts = syms[sd]['bbox']
                xs, ys = [v.x for v in verts], [v.y for v in verts]
                x0, x1, y0 = min(xs), max(xs), min(ys)
                if tag == 'insert' and len(repl) == 1 and not repl.isalnum():
                    prev_k = max(i1 - 1, 0)
                    if prev_k in mapping:
                        prev_sd = mapping[prev_k]
                        prev_verts = syms[prev_sd]['bbox']
                        prev_xs = [v.x for v in prev_verts]
                        fx = max(prev_xs + xs)
                        overlay_str = raw_c[prev_k] + repl
                    else:
                        overlay_str, fx = repl, x1
                elif repl == 'V':
                    overlay_str, fx = 'V', x1
                elif not repl.isalnum():
                    overlay_str, fx = repl, x1
                else:
                    overlay_str, fx = repl, x0
                fy = y0
                font = compute_font_for_word(verts)
                draw.text((fx, fy), overlay_str, font=font, fill=col)

    return overlay, corrected_text

def text_correct_fn(text):
    raw = normalize_text(text.strip())
    try:
        corrected = safe_kospell_check(raw)
    except Exception as e:
        logging.error(f"[Spell/TextInput] 교정 중 오류 발생: {e}")
        corrected = raw
    return None, corrected

def img_correct_fn(blob):
    img = None
    if blob:
        img = Image.open(io.BytesIO(blob)).convert('RGB')
    return ocr_overlay_and_correct_text(img)

with gr.Blocks(
    css="""
        .gradio-container {background-color: #fafaf5}
        footer {display: none !important;}
        .gr-box {border: 2px solid black !important;}
        * { font-family: 'Quicksand', ui-sans-serif, sans-serif !important; }
    """,
    theme="dark"
) as demo:
    state = gr.State()
    gr.Markdown("## 📷찰칵! 맞춤법 검사기")
    with gr.Row():
        with gr.Column():
            upload = gr.UploadButton(label='사진 촬영 및 업로드', file_types=['image'], type='binary')
            img_check_btn = gr.Button('✔️검사하기', interactive=False)
        with gr.Column():
            text_in = gr.Textbox(lines=3, placeholder='텍스트를 직접 입력하세요 (선택)', label='💻직접 입력 텍스트')
            text_check_btn = gr.Button('텍스트 검사', interactive=False)

    img_out = gr.Image(type='pil', label='교정 결과')
    txt_out = gr.Textbox(label='교정된 텍스트')
    clear_btn = gr.Button('초기화')

    def on_upload_start():
        return gr.update(label="업로드 중...", interactive=False), gr.update(interactive=False)
    upload.upload(on_upload_start, None, [upload, img_check_btn], queue=False, preprocess=False)

    def on_upload_complete(blob):
        return blob, gr.update(label="업로드 완료", interactive=False), gr.update(interactive=True)
    upload.upload(on_upload_complete, inputs=[upload], outputs=[state, upload, img_check_btn])

    def on_img_check(blob):
        result = img_correct_fn(blob)
        return gr.update(label="사진 촬영 및 업로드", interactive=True, value=None), gr.update(interactive=False), result[0], result[1]
    img_check_btn.click(on_img_check, inputs=[state], outputs=[upload, img_check_btn, img_out, txt_out])

    def enable_text_check(text):
        return gr.update(interactive=bool(text.strip()))
    text_in.change(enable_text_check, inputs=[text_in], outputs=[text_check_btn])

    text_check_btn.click(text_correct_fn, inputs=[text_in], outputs=[img_out, txt_out])

    def on_clear():
        return None, gr.update(label="사진 촬영 및 업로드", interactive=True, value=None), '', gr.update(interactive=False), None, ''
    clear_btn.click(on_clear, None, [state, upload, text_in, img_check_btn, img_out, txt_out])

if __name__ == '__main__':
    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))