Spaces:

aliceblue11
/

image_text

Sleeping

App Files Files Community

aliceblue11 commited on 14 days ago

Commit

776940e

verified ·

1 Parent(s): e0c9260

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -54

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import gradio as gr
 import base64
 import requests
 import json
@@ -7,6 +7,8 @@ import io
 import os
 from typing import Optional, Tuple
 import re
 class KoreanOCRApp:
     def __init__(self):
@@ -26,22 +28,48 @@ class KoreanOCRApp:
         self.project_id = project_id.strip()
         return "✅ 인증 정보가 설정되었습니다."
-    def encode_image_to_base64(self, image: Image.Image) -> str:
-        """이미지를 base64로 인코딩"""
-        buffer = io.BytesIO()
-        # JPEG 형식으로 저장하여 파일 크기 최적화
         if image.mode == 'RGBA':
-            # RGBA 이미지는 RGB로 변환
             background = Image.new('RGB', image.size, (255, 255, 255))
             background.paste(image, mask=image.split()[-1])
             image = background
-        image.save(buffer, format='JPEG', quality=95)
         image_bytes = buffer.getvalue()
         return base64.b64encode(image_bytes).decode('utf-8')
-    def call_gemini_api_direct(self, image_base64: str) -> str:
-        """Gemini API 직접 호출 (Google AI Studio API 사용)"""
         if not self.api_key:
             return "❌ 먼저 API 키를 설정해주세요."
@@ -100,36 +128,62 @@ class KoreanOCRApp:
             ]
         }
-        try:
-            response = requests.post(url, headers=headers, json=payload, timeout=60)
-            if response.status_code == 401:
-                return "❌ API 키가 유효하지 않습니다. Google AI Studio에서 발급받은 올바른 API 키를 입력해주세요."
-            elif response.status_code == 403:
-                return "❌ API 접근 권한이 없습니다. Gemini API가 활성화되어 있는지 확인해주세요."
-            elif response.status_code == 429:
-                return "❌ API 호출 한도를 초과했습니다. 잠시 후 다시 시도해주세요."
-            response.raise_for_status()
-            result = response.json()
-            if "candidates" in result and len(result["candidates"]) > 0:
-                content = result["candidates"][0]["content"]["parts"][0]["text"]
-                return content.strip()
-            elif "error" in result:
-                return f"❌ API 오류: {result['error'].get('message', '알 수 없는 오류')}"
-            else:
-                return "❌ 텍스트를 추출할 수 없습니다. 이미지에 한국어 텍스트가 포함되어 있는지 확인해주세요."
-        except requests.exceptions.RequestException as e:
-            return f"❌ API 호출 오류: {str(e)}"
-        except json.JSONDecodeError:
-            return "❌ API 응답 파싱 오류가 발생했습니다."
-        except KeyError as e:
-            return f"❌ 예상치 못한 API 응답 형식: {str(e)}"
-        except Exception as e:
-            return f"❌ 알 수 없는 오류: {str(e)}"
     def call_vertex_ai_api(self, image_base64: str) -> str:
         """Vertex AI API 호출 (서비스 계정 키 사용)"""
@@ -216,22 +270,15 @@ class KoreanOCRApp:
                 return image, auth_result
         try:
-            # 이미지 크기 확인 및 조정
-            img_byte_array = io.BytesIO()
-            image.save(img_byte_array, format='JPEG', quality=95)
-            img_size_mb = len(img_byte_array.getvalue()) / (1024 * 1024)
-            if img_size_mb > 4:  # 4MB로 제한을 낮춤
-                # 이미지 크기가 너무 크면 리사이즈
-                max_dimension = 1920
-                image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
             # 이미지를 base64로 인코딩
             image_base64 = self.encode_image_to_base64(image)
             # API 타입에 따라 호출
             if api_type == "Google AI Studio":
-                extracted_text = self.call_gemini_api_direct(image_base64)
             else:
                 extracted_text = self.call_vertex_ai_api(image_base64)
@@ -313,7 +360,7 @@ def create_interface():
                 )
         # API 설정 가이드
-        with gr.Accordion("📖 API 설정 가이드", open=False):
             gr.Markdown("""
             ### Google AI Studio API (권장)
             1. [Google AI Studio](https://aistudio.google.com/)에 접속
@@ -321,6 +368,12 @@ def create_interface():
             3. API 키 생성 및 복사
             4. 위의 "API 키" 필드에 붙여넣기
             ### Vertex AI API (고급 사용자용)
             1. [Google Cloud Console](https://console.cloud.google.com/)에서 프로젝트 생성
             2. Vertex AI API 활성화
@@ -328,13 +381,34 @@ def create_interface():
             4. `gcloud auth application-default login` 또는 Access Token 발급
             5. API 키와 프로젝트 ID 입력
-            ### ⚠️ 주의사항
-            - Google AI Studio는 개인 사용자에게 무료 할당량 제공
-            - Vertex AI는 유료 서비스로 사용량에 따라 과금
-            - API 키는 안전하게 보관하고 공유하지 마세요
             """, elem_classes="warning-box")
-        # 이미지 업로드 및 처리 섹션
         gr.Markdown("## 📤 이미지 업로드 및 텍스트 추출")
         with gr.Row():

+# 이미지 업로드 및 처리 섹션import gradio as gr
 import base64
 import requests
 import json
 import os
 from typing import Optional, Tuple
 import re
+import time
+import random
 class KoreanOCRApp:
     def __init__(self):
         self.project_id = project_id.strip()
         return "✅ 인증 정보가 설정되었습니다."
+    def optimize_image_for_api(self, image: Image.Image) -> Image.Image:
+        """API 호출을 위한 이미지 최적화"""
+        # 이미지 크기 최적화 (토큰 사용량 감소)
+        max_dimension = 1024  # 더 작은 크기로 제한
+        # 현재 이미지 크기 확인
+        width, height = image.size
+        # 큰 이미지일 경우 리사이즈
+        if width > max_dimension or height > max_dimension:
+            image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
+        # RGBA를 RGB로 변환 (파일 크기 감소)
         if image.mode == 'RGBA':
             background = Image.new('RGB', image.size, (255, 255, 255))
             background.paste(image, mask=image.split()[-1])
             image = background
+        elif image.mode != 'RGB':
+            image = image.convert('RGB')
+        return image
+    def encode_image_to_base64(self, image: Image.Image) -> str:
+        """이미지를 base64로 인코딩 (최적화된 버전)"""
+        # 이미지 최적화
+        image = self.optimize_image_for_api(image)
+        buffer = io.BytesIO()
+        # JPEG 형식으로 저장하여 파일 크기 최적화 (품질 80으로 낮춤)
+        image.save(buffer, format='JPEG', quality=80, optimize=True)
         image_bytes = buffer.getvalue()
+        # 파일 크기 확인
+        size_mb = len(image_bytes) / (1024 * 1024)
+        if size_mb > 3:  # 3MB 초과 시 추가 최적화
+            buffer = io.BytesIO()
+            image.save(buffer, format='JPEG', quality=60, optimize=True)
+            image_bytes = buffer.getvalue()
         return base64.b64encode(image_bytes).decode('utf-8')
+    def call_gemini_api_with_retry(self, image_base64: str, max_retries: int = 3, initial_delay: float = 2.0) -> str:
+        """재시도 로직이 포함된 Gemini API 호출"""
         if not self.api_key:
             return "❌ 먼저 API 키를 설정해주세요."
             ]
         }
+        for attempt in range(max_retries):
+            try:
+                response = requests.post(url, headers=headers, json=payload, timeout=60)
+                if response.status_code == 401:
+                    return "❌ API 키가 유효하지 않습니다. Google AI Studio에서 발급받은 올바른 API 키를 입력해주세요."
+                elif response.status_code == 403:
+                    return "❌ API 접근 권한이 없습니다. Gemini API가 활성화되어 있는지 확인해주세요."
+                elif response.status_code == 429:
+                    # 429 에러 시 재시도 로직
+                    if attempt < max_retries - 1:
+                        delay = initial_delay * (2 ** attempt) + random.uniform(0.5, 1.5)  # 지수 백오프 + 랜덤 지터
+                        return f"⏳ API 호출 한도를 초과했습니다. {delay:.1f}초 후 자동으로 재시도합니다... (시도 {attempt + 1}/{max_retries})"
+                    else:
+                        return """❌ API 호출 한도를 초과했습니다.
+📌 해결 방법:
+1. 잠시 기다린 후 다시 시도 (1-2분 권장)
+2. Google AI Studio에서 할당량 확인
+3. 유료 계정으로 업그레이드 고려
+4. 이미지 크기를 줄여서 재시도
+💡 팁: 높은 해상도의 이미지는 더 많은 토큰을 사용합니다."""
+                response.raise_for_status()
+                result = response.json()
+                if "candidates" in result and len(result["candidates"]) > 0:
+                    content = result["candidates"][0]["content"]["parts"][0]["text"]
+                    return content.strip()
+                elif "error" in result:
+                    error_msg = result['error'].get('message', '알 수 없는 오류')
+                    if "quota" in error_msg.lower() or "limit" in error_msg.lower():
+                        if attempt < max_retries - 1:
+                            delay = initial_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
+                            time.sleep(delay)
+                            continue
+                    return f"❌ API 오류: {error_msg}"
+                else:
+                    return "❌ 텍스트를 추출할 수 없습니다. 이미지에 한국어 텍스트가 포함되어 있는지 확인해주세요."
+            except requests.exceptions.RequestException as e:
+                if "429" in str(e) and attempt < max_retries - 1:
+                    delay = initial_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
+                    time.sleep(delay)
+                    continue
+                return f"❌ API 호출 오류: {str(e)}"
+            except json.JSONDecodeError:
+                return "❌ API 응답 파싱 오류가 발생했습니다."
+            except KeyError as e:
+                return f"❌ 예상치 못한 API 응답 형식: {str(e)}"
+            except Exception as e:
+                return f"❌ 알 수 없는 오류: {str(e)}"
+        return "❌ 최대 재시도 횟수를 초과했습니다. 잠시 후 다시 시도해주세요."
     def call_vertex_ai_api(self, image_base64: str) -> str:
         """Vertex AI API 호출 (서비스 계정 키 사용)"""
                 return image, auth_result
         try:
+            # 이미지 최적화 (토큰 사용량 감소를 위해)
+            image = self.optimize_image_for_api(image)
             # 이미지를 base64로 인코딩
             image_base64 = self.encode_image_to_base64(image)
             # API 타입에 따라 호출
             if api_type == "Google AI Studio":
+                extracted_text = self.call_gemini_api_with_retry(image_base64)
             else:
                 extracted_text = self.call_vertex_ai_api(image_base64)
                 )
         # API 설정 가이드
+        with gr.Accordion("📖 API 설정 가이드 및 할당량 정보", open=False):
             gr.Markdown("""
             ### Google AI Studio API (권장)
             1. [Google AI Studio](https://aistudio.google.com/)에 접속
             3. API 키 생성 및 복사
             4. 위의 "API 키" 필드에 붙여넣기
+            **📊 무료 할당량 (Google AI Studio):**
+            - 분당 15회 요청
+            - 일일 1,500회 요청
+            - 분당 100만 토큰
+            - 일일 5천만 토큰
             ### Vertex AI API (고급 사용자용)
             1. [Google Cloud Console](https://console.cloud.google.com/)에서 프로젝트 생성
             2. Vertex AI API 활성화
             4. `gcloud auth application-default login` 또는 Access Token 발급
             5. API 키와 프로젝트 ID 입력
+            ### ⚠️ 할당량 초과 시 해결 방법
+            1. **잠시 대기**: 1-2분 후 다시 시도
+            2. **이미지 최적화**: 더 작은 크기의 이미지 사용
+            3. **사용량 분산**: 여러 번 나누어서 처리
+            4. **유료 계정**: Google Cloud 유료 계정으로 업그레이드
+            ### 💡 토큰 절약 팁
+            - 이미지 해상도: 1024x1024 이하 권장
+            - 파일 형식: JPEG 사용 (PNG보다 작음)
+            - 불필요한 배경 제거
+            - 텍스트 영역만 크롭하여 업로드
             """, elem_classes="warning-box")
+        # 할당량 상태 표시
+        with gr.Row():
+            gr.Markdown("""
+            ### 📊 현재 상태
+            **무료 할당량 (Google AI Studio):**
+            - ⏱️ 분당 15회 요청 제한
+            - 📅 일일 1,500회 요청 제한
+            - 🔢 고해상도 이미지는 더 많은 토큰 사용
+            **💡 할당량 절약 팁:**
+            - 이미지 크기를 1024x1024 이하로 유지
+            - 텍스트가 있는 부분만 크롭하여 업로드
+            - 연속적인 요청 간 1-2초 간격 유지
+            """, elem_classes="info-box")
         gr.Markdown("## 📤 이미지 업로드 및 텍스트 추출")
         with gr.Row():